You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hbase.apache.org by ap...@apache.org on 2017/08/26 01:39:01 UTC

[01/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Repository: hbase
Updated Branches:
  refs/heads/master 8d33949b8 -> 664b6be0e


http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
deleted file mode 100644
index ad832e3..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.lang.reflect.Constructor;
-import java.net.InetAddress;
-import java.security.SecureRandom;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
-import java.util.Properties;
-import java.util.Random;
-import java.util.concurrent.atomic.AtomicReference;
-
-import javax.crypto.spec.SecretKeySpec;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.crypto.Cipher;
-import org.apache.hadoop.hbase.io.crypto.Encryption;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.security.EncryptionUtil;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.access.AccessControlClient;
-import org.apache.hadoop.hbase.security.access.Permission;
-import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
-import org.apache.hadoop.hbase.util.test.LoadTestDataGeneratorWithACL;
-import org.apache.hadoop.security.SecurityUtil;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A command-line utility that reads, writes, and verifies data. Unlike
- * {@link org.apache.hadoop.hbase.PerformanceEvaluation}, this tool validates the data written,
- * and supports simultaneously writing and reading the same set of keys.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class LoadTestTool extends AbstractHBaseTool {
-
-  private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
-  private static final String COLON = ":";
-
-  /** Table name for the test */
-  private TableName tableName;
-
-  /** Column families for the test */
-  private byte[][] families;
-
-  /** Table name to use of not overridden on the command line */
-  protected static final String DEFAULT_TABLE_NAME = "cluster_test";
-
-  /** Column family used by the test */
-  public static byte[] DEFAULT_COLUMN_FAMILY = Bytes.toBytes("test_cf");
-
-  /** Column families used by the test */
-  public static final byte[][] DEFAULT_COLUMN_FAMILIES = { DEFAULT_COLUMN_FAMILY };
-
-  /** The default data size if not specified */
-  protected static final int DEFAULT_DATA_SIZE = 64;
-
-  /** The number of reader/writer threads if not specified */
-  protected static final int DEFAULT_NUM_THREADS = 20;
-
-  /** Usage string for the load option */
-  protected static final String OPT_USAGE_LOAD =
-      "<avg_cols_per_key>:<avg_data_size>" +
-      "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
-
-  /** Usage string for the read option */
-  protected static final String OPT_USAGE_READ =
-      "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
-
-  /** Usage string for the update option */
-  protected static final String OPT_USAGE_UPDATE =
-      "<update_percent>[:<#threads=" + DEFAULT_NUM_THREADS
-      + ">][:<#whether to ignore nonce collisions=0>]";
-
-  protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
-      Arrays.toString(BloomType.values());
-
-  protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
-      "one of " + Arrays.toString(Compression.Algorithm.values());
-
-  public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
-    "Encoding algorithm (e.g. prefix "
-        + "compression) to use for data blocks in the test column family, "
-        + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
-
-  public static final String OPT_BLOOM = "bloom";
-  public static final String OPT_COMPRESSION = "compression";
-  public static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
-  public static final String OPT_DEFERRED_LOG_FLUSH_USAGE = "Enable deferred log flush.";
-
-  public static final String OPT_DATA_BLOCK_ENCODING =
-      HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase(Locale.ROOT);
-
-  public static final String OPT_INMEMORY = "in_memory";
-  public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
-      "inmemory as far as possible.  Not guaranteed that reads are always served from inmemory";
-
-  public static final String OPT_GENERATOR = "generator";
-  public static final String OPT_GENERATOR_USAGE = "The class which generates load for the tool."
-      + " Any args for this class can be passed as colon separated after class name";
-
-  public static final String OPT_WRITER = "writer";
-  public static final String OPT_WRITER_USAGE = "The class for executing the write requests";
-
-  public static final String OPT_UPDATER = "updater";
-  public static final String OPT_UPDATER_USAGE = "The class for executing the update requests";
-
-  public static final String OPT_READER = "reader";
-  public static final String OPT_READER_USAGE = "The class for executing the read requests";
-
-  protected static final String OPT_KEY_WINDOW = "key_window";
-  protected static final String OPT_WRITE = "write";
-  protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
-  public static final String OPT_MULTIPUT = "multiput";
-  public static final String OPT_MULTIGET = "multiget_batchsize";
-  protected static final String OPT_NUM_KEYS = "num_keys";
-  protected static final String OPT_READ = "read";
-  protected static final String OPT_START_KEY = "start_key";
-  public static final String OPT_TABLE_NAME = "tn";
-  public static final String OPT_COLUMN_FAMILIES = "families";
-  protected static final String OPT_ZK_QUORUM = "zk";
-  protected static final String OPT_ZK_PARENT_NODE = "zk_root";
-  protected static final String OPT_SKIP_INIT = "skip_init";
-  protected static final String OPT_INIT_ONLY = "init_only";
-  protected static final String NUM_TABLES = "num_tables";
-  protected static final String OPT_REGIONS_PER_SERVER = "regions_per_server";
-  protected static final String OPT_BATCHUPDATE = "batchupdate";
-  protected static final String OPT_UPDATE = "update";
-
-  public static final String OPT_ENCRYPTION = "encryption";
-  protected static final String OPT_ENCRYPTION_USAGE =
-    "Enables transparent encryption on the test table, one of " +
-    Arrays.toString(Encryption.getSupportedCiphers());
-
-  public static final String OPT_NUM_REGIONS_PER_SERVER = "num_regions_per_server";
-  protected static final String OPT_NUM_REGIONS_PER_SERVER_USAGE
-    = "Desired number of regions per region server. Defaults to 5.";
-  public static int DEFAULT_NUM_REGIONS_PER_SERVER = 5;
-
-  public static final String OPT_REGION_REPLICATION = "region_replication";
-  protected static final String OPT_REGION_REPLICATION_USAGE =
-      "Desired number of replicas per region";
-
-  public static final String OPT_REGION_REPLICA_ID = "region_replica_id";
-  protected static final String OPT_REGION_REPLICA_ID_USAGE =
-      "Region replica id to do the reads from";
-
-  public static final String OPT_MOB_THRESHOLD = "mob_threshold";
-  protected static final String OPT_MOB_THRESHOLD_USAGE =
-      "Desired cell size to exceed in bytes that will use the MOB write path";
-
-  protected static final long DEFAULT_START_KEY = 0;
-
-  /** This will be removed as we factor out the dependency on command line */
-  protected CommandLine cmd;
-
-  protected MultiThreadedWriter writerThreads = null;
-  protected MultiThreadedReader readerThreads = null;
-  protected MultiThreadedUpdater updaterThreads = null;
-
-  protected long startKey, endKey;
-
-  protected boolean isWrite, isRead, isUpdate;
-  protected boolean deferredLogFlush;
-
-  // Column family options
-  protected DataBlockEncoding dataBlockEncodingAlgo;
-  protected Compression.Algorithm compressAlgo;
-  protected BloomType bloomType;
-  private boolean inMemoryCF;
-
-  private User userOwner;
-  // Writer options
-  protected int numWriterThreads = DEFAULT_NUM_THREADS;
-  protected int minColsPerKey, maxColsPerKey;
-  protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
-  protected boolean isMultiPut;
-
-  // Updater options
-  protected int numUpdaterThreads = DEFAULT_NUM_THREADS;
-  protected int updatePercent;
-  protected boolean ignoreConflicts = false;
-  protected boolean isBatchUpdate;
-
-  // Reader options
-  private int numReaderThreads = DEFAULT_NUM_THREADS;
-  private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
-  private int multiGetBatchSize = MultiThreadedReader.DEFAULT_BATCH_SIZE;
-  private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
-  private int verifyPercent;
-
-  private int numTables = 1;
-
-  private String superUser;
-
-  private String userNames;
-  //This file is used to read authentication information in secure clusters.
-  private String authnFileName;
-
-  private int numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
-  private int regionReplication = -1; // not set
-  private int regionReplicaId = -1; // not set
-
-  private int mobThreshold = -1; // not set
-
-  // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
-  //       console tool itself should only be used from console.
-  protected boolean isSkipInit = false;
-  protected boolean isInitOnly = false;
-
-  protected Cipher cipher = null;
-
-  protected String[] splitColonSeparated(String option,
-      int minNumCols, int maxNumCols) {
-    String optVal = cmd.getOptionValue(option);
-    String[] cols = optVal.split(COLON);
-    if (cols.length < minNumCols || cols.length > maxNumCols) {
-      throw new IllegalArgumentException("Expected at least "
-          + minNumCols + " columns but no more than " + maxNumCols +
-          " in the colon-separated value '" + optVal + "' of the " +
-          "-" + option + " option");
-    }
-    return cols;
-  }
-
-  protected int getNumThreads(String numThreadsStr) {
-    return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
-  }
-
-  public byte[][] getColumnFamilies() {
-    return families;
-  }
-
-  /**
-   * Apply column family options such as Bloom filters, compression, and data
-   * block encoding.
-   */
-  protected void applyColumnFamilyOptions(TableName tableName,
-      byte[][] columnFamilies) throws IOException {
-    try (Connection conn = ConnectionFactory.createConnection(conf);
-        Admin admin = conn.getAdmin()) {
-      TableDescriptor tableDesc = admin.getTableDescriptor(tableName);
-      LOG.info("Disabling table " + tableName);
-      admin.disableTable(tableName);
-      for (byte[] cf : columnFamilies) {
-        HColumnDescriptor columnDesc = (HColumnDescriptor) tableDesc.getColumnFamily(cf);
-        boolean isNewCf = columnDesc == null;
-        if (isNewCf) {
-          columnDesc = new HColumnDescriptor(cf);
-        }
-        if (bloomType != null) {
-          columnDesc.setBloomFilterType(bloomType);
-        }
-        if (compressAlgo != null) {
-          columnDesc.setCompressionType(compressAlgo);
-        }
-        if (dataBlockEncodingAlgo != null) {
-          columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
-        }
-        if (inMemoryCF) {
-          columnDesc.setInMemory(inMemoryCF);
-        }
-        if (cipher != null) {
-          byte[] keyBytes = new byte[cipher.getKeyLength()];
-          new SecureRandom().nextBytes(keyBytes);
-          columnDesc.setEncryptionType(cipher.getName());
-          columnDesc.setEncryptionKey(
-              EncryptionUtil.wrapKey(conf,
-                  User.getCurrent().getShortName(),
-                  new SecretKeySpec(keyBytes,
-                      cipher.getName())));
-        }
-        if (mobThreshold >= 0) {
-          columnDesc.setMobEnabled(true);
-          columnDesc.setMobThreshold(mobThreshold);
-        }
-
-        if (isNewCf) {
-          admin.addColumnFamily(tableName, columnDesc);
-        } else {
-          admin.modifyColumnFamily(tableName, columnDesc);
-        }
-      }
-      LOG.info("Enabling table " + tableName);
-      admin.enableTable(tableName);
-    }
-  }
-
-  @Override
-  protected void addOptions() {
-    addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
-        "without port numbers");
-    addOptWithArg(OPT_ZK_PARENT_NODE, "name of parent znode in zookeeper");
-    addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
-    addOptWithArg(OPT_COLUMN_FAMILIES, "The name of the column families to use separated by comma");
-    addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
-    addOptWithArg(OPT_READ, OPT_USAGE_READ);
-    addOptWithArg(OPT_UPDATE, OPT_USAGE_UPDATE);
-    addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
-    addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
-    addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
-    addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
-    addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
-        "to tolerate before terminating all reader threads. The default is " +
-        MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
-    addOptWithArg(OPT_MULTIGET, "Whether to use multi-gets as opposed to " +
-        "separate gets for every column in a row");
-    addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
-        "reads and writes for concurrent write/read workload. The default " +
-        "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
-
-    addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
-        "separate puts for every column in a row");
-    addOptNoArg(OPT_BATCHUPDATE, "Whether to use batch as opposed to " +
-        "separate updates for every column in a row");
-    addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
-    addOptWithArg(OPT_GENERATOR, OPT_GENERATOR_USAGE);
-    addOptWithArg(OPT_WRITER, OPT_WRITER_USAGE);
-    addOptWithArg(OPT_UPDATER, OPT_UPDATER_USAGE);
-    addOptWithArg(OPT_READER, OPT_READER_USAGE);
-
-    addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
-    addOptWithArg(OPT_START_KEY, "The first key to read/write " +
-        "(a 0-based index). The default value is " +
-        DEFAULT_START_KEY + ".");
-    addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
-        + "already exists");
-
-    addOptWithArg(NUM_TABLES,
-      "A positive integer number. When a number n is speicfied, load test "
-          + "tool  will load n table parallely. -tn parameter value becomes "
-          + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
-
-    addOptWithArg(OPT_REGIONS_PER_SERVER,
-      "A positive integer number. When a number n is specified, load test "
-          + "tool will create the test table with n regions per server");
-
-    addOptWithArg(OPT_ENCRYPTION, OPT_ENCRYPTION_USAGE);
-    addOptNoArg(OPT_DEFERRED_LOG_FLUSH, OPT_DEFERRED_LOG_FLUSH_USAGE);
-    addOptWithArg(OPT_NUM_REGIONS_PER_SERVER, OPT_NUM_REGIONS_PER_SERVER_USAGE);
-    addOptWithArg(OPT_REGION_REPLICATION, OPT_REGION_REPLICATION_USAGE);
-    addOptWithArg(OPT_REGION_REPLICA_ID, OPT_REGION_REPLICA_ID_USAGE);
-    addOptWithArg(OPT_MOB_THRESHOLD, OPT_MOB_THRESHOLD_USAGE);
-  }
-
-  @Override
-  protected void processOptions(CommandLine cmd) {
-    this.cmd = cmd;
-
-    tableName = TableName.valueOf(cmd.getOptionValue(OPT_TABLE_NAME,
-        DEFAULT_TABLE_NAME));
-
-    if (cmd.hasOption(OPT_COLUMN_FAMILIES)) {
-      String[] list = cmd.getOptionValue(OPT_COLUMN_FAMILIES).split(",");
-      families = new byte[list.length][];
-      for (int i = 0; i < list.length; i++) {
-        families[i] = Bytes.toBytes(list[i]);
-      }
-    } else {
-      families = DEFAULT_COLUMN_FAMILIES;
-    }
-
-    isWrite = cmd.hasOption(OPT_WRITE);
-    isRead = cmd.hasOption(OPT_READ);
-    isUpdate = cmd.hasOption(OPT_UPDATE);
-    isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
-    deferredLogFlush = cmd.hasOption(OPT_DEFERRED_LOG_FLUSH);
-
-    if (!isWrite && !isRead && !isUpdate && !isInitOnly) {
-      throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
-        "-" + OPT_UPDATE + " or -" + OPT_READ + " has to be specified");
-    }
-
-    if (isInitOnly && (isRead || isWrite || isUpdate)) {
-      throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
-          + " either -" + OPT_WRITE + " or -" + OPT_UPDATE + " or -" + OPT_READ);
-    }
-
-    if (!isInitOnly) {
-      if (!cmd.hasOption(OPT_NUM_KEYS)) {
-        throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
-            + "read or write mode");
-      }
-      startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
-          String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
-      long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
-          Long.MAX_VALUE - startKey);
-      endKey = startKey + numKeys;
-      isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
-      System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
-    }
-
-    parseColumnFamilyOptions(cmd);
-
-    if (isWrite) {
-      String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
-
-      int colIndex = 0;
-      minColsPerKey = 1;
-      maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
-      int avgColDataSize =
-          parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
-      minColDataSize = avgColDataSize / 2;
-      maxColDataSize = avgColDataSize * 3 / 2;
-
-      if (colIndex < writeOpts.length) {
-        numWriterThreads = getNumThreads(writeOpts[colIndex++]);
-      }
-
-      isMultiPut = cmd.hasOption(OPT_MULTIPUT);
-
-      mobThreshold = -1;
-      if (cmd.hasOption(OPT_MOB_THRESHOLD)) {
-        mobThreshold = Integer.parseInt(cmd.getOptionValue(OPT_MOB_THRESHOLD));
-      }
-
-      System.out.println("Multi-puts: " + isMultiPut);
-      System.out.println("Columns per key: " + minColsPerKey + ".."
-          + maxColsPerKey);
-      System.out.println("Data size per column: " + minColDataSize + ".."
-          + maxColDataSize);
-    }
-
-    if (isUpdate) {
-      String[] mutateOpts = splitColonSeparated(OPT_UPDATE, 1, 3);
-      int colIndex = 0;
-      updatePercent = parseInt(mutateOpts[colIndex++], 0, 100);
-      if (colIndex < mutateOpts.length) {
-        numUpdaterThreads = getNumThreads(mutateOpts[colIndex++]);
-      }
-      if (colIndex < mutateOpts.length) {
-        ignoreConflicts = parseInt(mutateOpts[colIndex++], 0, 1) == 1;
-      }
-
-      isBatchUpdate = cmd.hasOption(OPT_BATCHUPDATE);
-
-      System.out.println("Batch updates: " + isBatchUpdate);
-      System.out.println("Percent of keys to update: " + updatePercent);
-      System.out.println("Updater threads: " + numUpdaterThreads);
-      System.out.println("Ignore nonce conflicts: " + ignoreConflicts);
-    }
-
-    if (isRead) {
-      String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
-      int colIndex = 0;
-      verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
-      if (colIndex < readOpts.length) {
-        numReaderThreads = getNumThreads(readOpts[colIndex++]);
-      }
-
-      if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
-        maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
-            0, Integer.MAX_VALUE);
-      }
-
-      if (cmd.hasOption(OPT_KEY_WINDOW)) {
-        keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
-            0, Integer.MAX_VALUE);
-      }
-
-      if (cmd.hasOption(OPT_MULTIGET)) {
-        multiGetBatchSize = parseInt(cmd.getOptionValue(OPT_MULTIGET),
-            0, Integer.MAX_VALUE);
-      }
-
-      System.out.println("Multi-gets (value of 1 means no multigets): " + multiGetBatchSize);
-      System.out.println("Percent of keys to verify: " + verifyPercent);
-      System.out.println("Reader threads: " + numReaderThreads);
-    }
-
-    numTables = 1;
-    if (cmd.hasOption(NUM_TABLES)) {
-      numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
-    }
-
-    numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
-    if (cmd.hasOption(OPT_NUM_REGIONS_PER_SERVER)) {
-      numRegionsPerServer = Integer.parseInt(cmd.getOptionValue(OPT_NUM_REGIONS_PER_SERVER));
-    }
-
-    regionReplication = 1;
-    if (cmd.hasOption(OPT_REGION_REPLICATION)) {
-      regionReplication = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICATION));
-    }
-
-    regionReplicaId = -1;
-    if (cmd.hasOption(OPT_REGION_REPLICA_ID)) {
-      regionReplicaId = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICA_ID));
-    }
-  }
-
-  private void parseColumnFamilyOptions(CommandLine cmd) {
-    String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
-    dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
-        DataBlockEncoding.valueOf(dataBlockEncodingStr);
-
-    String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
-    compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
-        Compression.Algorithm.valueOf(compressStr);
-
-    String bloomStr = cmd.getOptionValue(OPT_BLOOM);
-    bloomType = bloomStr == null ? BloomType.ROW :
-        BloomType.valueOf(bloomStr);
-
-    inMemoryCF = cmd.hasOption(OPT_INMEMORY);
-    if (cmd.hasOption(OPT_ENCRYPTION)) {
-      cipher = Encryption.getCipher(conf, cmd.getOptionValue(OPT_ENCRYPTION));
-    }
-
-  }
-
-  public void initTestTable() throws IOException {
-    Durability durability = Durability.USE_DEFAULT;
-    if (deferredLogFlush) {
-      durability = Durability.ASYNC_WAL;
-    }
-
-    HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
-      getColumnFamilies(), compressAlgo, dataBlockEncodingAlgo, numRegionsPerServer,
-        regionReplication, durability);
-    applyColumnFamilyOptions(tableName, getColumnFamilies());
-  }
-
-  @Override
-  protected int doWork() throws IOException {
-    if (numTables > 1) {
-      return parallelLoadTables();
-    } else {
-      return loadTable();
-    }
-  }
-
-  protected int loadTable() throws IOException {
-    if (cmd.hasOption(OPT_ZK_QUORUM)) {
-      conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
-    }
-    if (cmd.hasOption(OPT_ZK_PARENT_NODE)) {
-      conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, cmd.getOptionValue(OPT_ZK_PARENT_NODE));
-    }
-
-    if (isInitOnly) {
-      LOG.info("Initializing only; no reads or writes");
-      initTestTable();
-      return 0;
-    }
-
-    if (!isSkipInit) {
-      initTestTable();
-    }
-    LoadTestDataGenerator dataGen = null;
-    if (cmd.hasOption(OPT_GENERATOR)) {
-      String[] clazzAndArgs = cmd.getOptionValue(OPT_GENERATOR).split(COLON);
-      dataGen = getLoadGeneratorInstance(clazzAndArgs[0]);
-      String[] args;
-      if (dataGen instanceof LoadTestDataGeneratorWithACL) {
-        LOG.info("Using LoadTestDataGeneratorWithACL");
-        if (User.isHBaseSecurityEnabled(conf)) {
-          LOG.info("Security is enabled");
-          authnFileName = clazzAndArgs[1];
-          superUser = clazzAndArgs[2];
-          userNames = clazzAndArgs[3];
-          args = Arrays.copyOfRange(clazzAndArgs, 2, clazzAndArgs.length);
-          Properties authConfig = new Properties();
-          authConfig.load(this.getClass().getClassLoader().getResourceAsStream(authnFileName));
-          try {
-            addAuthInfoToConf(authConfig, conf, superUser, userNames);
-          } catch (IOException exp) {
-            LOG.error(exp);
-            return EXIT_FAILURE;
-          }
-          userOwner = User.create(loginAndReturnUGI(conf, superUser));
-        } else {
-          superUser = clazzAndArgs[1];
-          userNames = clazzAndArgs[2];
-          args = Arrays.copyOfRange(clazzAndArgs, 1, clazzAndArgs.length);
-          userOwner = User.createUserForTesting(conf, superUser, new String[0]);
-        }
-      } else {
-        args = clazzAndArgs.length == 1 ? new String[0] : Arrays.copyOfRange(clazzAndArgs, 1,
-            clazzAndArgs.length);
-      }
-      dataGen.initialize(args);
-    } else {
-      // Default DataGenerator is MultiThreadedAction.DefaultDataGenerator
-      dataGen = new MultiThreadedAction.DefaultDataGenerator(minColDataSize, maxColDataSize,
-          minColsPerKey, maxColsPerKey, families);
-    }
-
-    if (userOwner != null) {
-      LOG.info("Granting permissions for user " + userOwner.getShortName());
-      Permission.Action[] actions = {
-        Permission.Action.ADMIN, Permission.Action.CREATE,
-        Permission.Action.READ, Permission.Action.WRITE };
-      try {
-        AccessControlClient.grant(ConnectionFactory.createConnection(conf),
-            tableName, userOwner.getShortName(), null, null, actions);
-      } catch (Throwable e) {
-        LOG.fatal("Error in granting permission for the user " + userOwner.getShortName(), e);
-        return EXIT_FAILURE;
-      }
-    }
-
-    if (userNames != null) {
-      // This will be comma separated list of expressions.
-      String users[] = userNames.split(",");
-      User user = null;
-      for (String userStr : users) {
-        if (User.isHBaseSecurityEnabled(conf)) {
-          user = User.create(loginAndReturnUGI(conf, userStr));
-        } else {
-          user = User.createUserForTesting(conf, userStr, new String[0]);
-        }
-      }
-    }
-
-    if (isWrite) {
-      if (userOwner != null) {
-        writerThreads = new MultiThreadedWriterWithACL(dataGen, conf, tableName, userOwner);
-      } else {
-        String writerClass = null;
-        if (cmd.hasOption(OPT_WRITER)) {
-          writerClass = cmd.getOptionValue(OPT_WRITER);
-        } else {
-          writerClass = MultiThreadedWriter.class.getCanonicalName();
-        }
-
-        writerThreads = getMultiThreadedWriterInstance(writerClass, dataGen);
-      }
-      writerThreads.setMultiPut(isMultiPut);
-    }
-
-    if (isUpdate) {
-      if (userOwner != null) {
-        updaterThreads = new MultiThreadedUpdaterWithACL(dataGen, conf, tableName, updatePercent,
-            userOwner, userNames);
-      } else {
-        String updaterClass = null;
-        if (cmd.hasOption(OPT_UPDATER)) {
-          updaterClass = cmd.getOptionValue(OPT_UPDATER);
-        } else {
-          updaterClass = MultiThreadedUpdater.class.getCanonicalName();
-        }
-        updaterThreads = getMultiThreadedUpdaterInstance(updaterClass, dataGen);
-      }
-      updaterThreads.setBatchUpdate(isBatchUpdate);
-      updaterThreads.setIgnoreNonceConflicts(ignoreConflicts);
-    }
-
-    if (isRead) {
-      if (userOwner != null) {
-        readerThreads = new MultiThreadedReaderWithACL(dataGen, conf, tableName, verifyPercent,
-            userNames);
-      } else {
-        String readerClass = null;
-        if (cmd.hasOption(OPT_READER)) {
-          readerClass = cmd.getOptionValue(OPT_READER);
-        } else {
-          readerClass = MultiThreadedReader.class.getCanonicalName();
-        }
-        readerThreads = getMultiThreadedReaderInstance(readerClass, dataGen);
-      }
-      readerThreads.setMaxErrors(maxReadErrors);
-      readerThreads.setKeyWindow(keyWindow);
-      readerThreads.setMultiGetBatchSize(multiGetBatchSize);
-      readerThreads.setRegionReplicaId(regionReplicaId);
-    }
-
-    if (isUpdate && isWrite) {
-      LOG.info("Concurrent write/update workload: making updaters aware of the " +
-        "write point");
-      updaterThreads.linkToWriter(writerThreads);
-    }
-
-    if (isRead && (isUpdate || isWrite)) {
-      LOG.info("Concurrent write/read workload: making readers aware of the " +
-        "write point");
-      readerThreads.linkToWriter(isUpdate ? updaterThreads : writerThreads);
-    }
-
-    if (isWrite) {
-      System.out.println("Starting to write data...");
-      writerThreads.start(startKey, endKey, numWriterThreads);
-    }
-
-    if (isUpdate) {
-      LOG.info("Starting to mutate data...");
-      System.out.println("Starting to mutate data...");
-      // TODO : currently append and increment operations not tested with tags
-      // Will update this aftet it is done
-      updaterThreads.start(startKey, endKey, numUpdaterThreads);
-    }
-
-    if (isRead) {
-      System.out.println("Starting to read data...");
-      readerThreads.start(startKey, endKey, numReaderThreads);
-    }
-
-    if (isWrite) {
-      writerThreads.waitForFinish();
-    }
-
-    if (isUpdate) {
-      updaterThreads.waitForFinish();
-    }
-
-    if (isRead) {
-      readerThreads.waitForFinish();
-    }
-
-    boolean success = true;
-    if (isWrite) {
-      success = success && writerThreads.getNumWriteFailures() == 0;
-    }
-    if (isUpdate) {
-      success = success && updaterThreads.getNumWriteFailures() == 0;
-    }
-    if (isRead) {
-      success = success && readerThreads.getNumReadErrors() == 0
-          && readerThreads.getNumReadFailures() == 0;
-    }
-    return success ? EXIT_SUCCESS : EXIT_FAILURE;
-  }
-
-  private LoadTestDataGenerator getLoadGeneratorInstance(String clazzName) throws IOException {
-    try {
-      Class<?> clazz = Class.forName(clazzName);
-      Constructor<?> constructor = clazz.getConstructor(int.class, int.class, int.class, int.class,
-          byte[][].class);
-      return (LoadTestDataGenerator) constructor.newInstance(minColDataSize, maxColDataSize,
-          minColsPerKey, maxColsPerKey, families);
-    } catch (Exception e) {
-      throw new IOException(e);
-    }
-  }
-
-  private MultiThreadedWriter getMultiThreadedWriterInstance(String clazzName
-      , LoadTestDataGenerator dataGen) throws IOException {
-    try {
-      Class<?> clazz = Class.forName(clazzName);
-      Constructor<?> constructor = clazz.getConstructor(
-        LoadTestDataGenerator.class, Configuration.class, TableName.class);
-      return (MultiThreadedWriter) constructor.newInstance(dataGen, conf, tableName);
-    } catch (Exception e) {
-      throw new IOException(e);
-    }
-  }
-
-  private MultiThreadedUpdater getMultiThreadedUpdaterInstance(String clazzName
-      , LoadTestDataGenerator dataGen) throws IOException {
-    try {
-      Class<?> clazz = Class.forName(clazzName);
-      Constructor<?> constructor = clazz.getConstructor(
-        LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
-      return (MultiThreadedUpdater) constructor.newInstance(
-        dataGen, conf, tableName, updatePercent);
-    } catch (Exception e) {
-      throw new IOException(e);
-    }
-  }
-
-  private MultiThreadedReader getMultiThreadedReaderInstance(String clazzName
-      , LoadTestDataGenerator dataGen) throws IOException {
-    try {
-      Class<?> clazz = Class.forName(clazzName);
-      Constructor<?> constructor = clazz.getConstructor(
-        LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
-      return (MultiThreadedReader) constructor.newInstance(dataGen, conf, tableName, verifyPercent);
-    } catch (Exception e) {
-      throw new IOException(e);
-    }
-  }
-
-  public static byte[] generateData(final Random r, int length) {
-    byte [] b = new byte [length];
-    int i = 0;
-
-    for(i = 0; i < (length-8); i += 8) {
-      b[i] = (byte) (65 + r.nextInt(26));
-      b[i+1] = b[i];
-      b[i+2] = b[i];
-      b[i+3] = b[i];
-      b[i+4] = b[i];
-      b[i+5] = b[i];
-      b[i+6] = b[i];
-      b[i+7] = b[i];
-    }
-
-    byte a = (byte) (65 + r.nextInt(26));
-    for(; i < length; i++) {
-      b[i] = a;
-    }
-    return b;
-  }
-  public static void main(String[] args) {
-    new LoadTestTool().doStaticMain(args);
-  }
-
-  /**
-   * When NUM_TABLES is specified, the function starts multiple worker threads
-   * which individually start a LoadTestTool instance to load a table. Each
-   * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
-   * , table names will be "test_1", "test_2"
-   *
-   * @throws IOException
-   */
-  private int parallelLoadTables()
-      throws IOException {
-    // create new command args
-    String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
-    String[] newArgs = null;
-    if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
-      newArgs = new String[cmdLineArgs.length + 2];
-      newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
-      newArgs[1] = LoadTestTool.DEFAULT_TABLE_NAME;
-      System.arraycopy(cmdLineArgs, 0, newArgs, 2, cmdLineArgs.length);
-    } else {
-      newArgs = cmdLineArgs;
-    }
-
-    int tableNameValueIndex = -1;
-    for (int j = 0; j < newArgs.length; j++) {
-      if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
-        tableNameValueIndex = j + 1;
-      } else if (newArgs[j].endsWith(NUM_TABLES)) {
-        // change NUM_TABLES to 1 so that each worker loads one table
-        newArgs[j + 1] = "1";
-      }
-    }
-
-    // starting to load multiple tables
-    List<WorkerThread> workers = new ArrayList<>();
-    for (int i = 0; i < numTables; i++) {
-      String[] workerArgs = newArgs.clone();
-      workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
-      WorkerThread worker = new WorkerThread(i, workerArgs);
-      workers.add(worker);
-      LOG.info(worker + " starting");
-      worker.start();
-    }
-
-    // wait for all workers finish
-    LOG.info("Waiting for worker threads to finish");
-    for (WorkerThread t : workers) {
-      try {
-        t.join();
-      } catch (InterruptedException ie) {
-        IOException iie = new InterruptedIOException();
-        iie.initCause(ie);
-        throw iie;
-      }
-      checkForErrors();
-    }
-
-    return EXIT_SUCCESS;
-  }
-
-  // If an exception is thrown by one of worker threads, it will be
-  // stored here.
-  protected AtomicReference<Throwable> thrown = new AtomicReference<>();
-
-  private void workerThreadError(Throwable t) {
-    thrown.compareAndSet(null, t);
-  }
-
-  /**
-   * Check for errors in the writer threads. If any is found, rethrow it.
-   */
-  private void checkForErrors() throws IOException {
-    Throwable thrown = this.thrown.get();
-    if (thrown == null) return;
-    if (thrown instanceof IOException) {
-      throw (IOException) thrown;
-    } else {
-      throw new RuntimeException(thrown);
-    }
-  }
-
-  class WorkerThread extends Thread {
-    private String[] workerArgs;
-
-    WorkerThread(int i, String[] args) {
-      super("WorkerThread-" + i);
-      workerArgs = args;
-    }
-
-    @Override
-    public void run() {
-      try {
-        int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
-        if (ret != 0) {
-          throw new RuntimeException("LoadTestTool exit with non-zero return code.");
-        }
-      } catch (Exception ex) {
-        LOG.error("Error in worker thread", ex);
-        workerThreadError(ex);
-      }
-    }
-  }
-
-  private void addAuthInfoToConf(Properties authConfig, Configuration conf, String owner,
-      String userList) throws IOException {
-    List<String> users = new ArrayList(Arrays.asList(userList.split(",")));
-    users.add(owner);
-    for (String user : users) {
-      String keyTabFileConfKey = "hbase." + user + ".keytab.file";
-      String principalConfKey = "hbase." + user + ".kerberos.principal";
-      if (!authConfig.containsKey(keyTabFileConfKey) || !authConfig.containsKey(principalConfKey)) {
-        throw new IOException("Authentication configs missing for user : " + user);
-      }
-    }
-    for (String key : authConfig.stringPropertyNames()) {
-      conf.set(key, authConfig.getProperty(key));
-    }
-    LOG.debug("Added authentication properties to config successfully.");
-  }
-
-  public static UserGroupInformation loginAndReturnUGI(Configuration conf, String username)
-      throws IOException {
-    String hostname = InetAddress.getLocalHost().getHostName();
-    String keyTabFileConfKey = "hbase." + username + ".keytab.file";
-    String keyTabFileLocation = conf.get(keyTabFileConfKey);
-    String principalConfKey = "hbase." + username + ".kerberos.principal";
-    String principal = SecurityUtil.getServerPrincipal(conf.get(principalConfKey), hostname);
-    if (keyTabFileLocation == null || principal == null) {
-      LOG.warn("Principal or key tab file null for : " + principalConfKey + ", "
-          + keyTabFileConfKey);
-    }
-    UserGroupInformation ugi =
-        UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTabFileLocation);
-    return ugi;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
index 1d2e9a6..6550baa 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
@@ -49,7 +49,7 @@ import org.apache.hadoop.util.StringUtils;
 
 /**
  * Common base class for reader and writer parts of multi-thread HBase load
- * test ({@link LoadTestTool}).
+ * test (See LoadTestTool).
  */
 public abstract class MultiThreadedAction {
   private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class);

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
index 1e7e341..e951175 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -121,7 +122,7 @@ public class MultiThreadedReaderWithACL extends MultiThreadedReader {
         UserGroupInformation realUserUgi;
         if(!users.containsKey(userNames[mod])) {
           if(User.isHBaseSecurityEnabled(conf)) {
-            realUserUgi = LoadTestTool.loginAndReturnUGI(conf, userNames[mod]);
+            realUserUgi = HBaseKerberosUtils.loginAndReturnUGI(conf, userNames[mod]);
           } else {
             realUserUgi = UserGroupInformation.createRemoteUser(userNames[mod]);
           }

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
index 40e23fb..9d9bb63 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -138,7 +139,7 @@ public class MultiThreadedUpdaterWithACL extends MultiThreadedUpdater {
         try {
           if (!users.containsKey(userNames[mod])) {
             if (User.isHBaseSecurityEnabled(conf)) {
-              realUserUgi = LoadTestTool.loginAndReturnUGI(conf, userNames[mod]);
+              realUserUgi = HBaseKerberosUtils.loginAndReturnUGI(conf, userNames[mod]);
             } else {
               realUserUgi = UserGroupInformation.createRemoteUser(userNames[mod]);
             }

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
index 6beb2e6..7972855 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
@@ -81,7 +81,7 @@ public class RestartMetaTest extends AbstractHBaseTool {
     // start the writers
     LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
       minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey,
-      LoadTestTool.DEFAULT_COLUMN_FAMILY);
+      HFileTestUtil.DEFAULT_COLUMN_FAMILY);
     MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
     writer.setMultiPut(true);
     writer.start(startKey, endKey, numThreads);
@@ -101,7 +101,7 @@ public class RestartMetaTest extends AbstractHBaseTool {
 
     // create tables if needed
     HBaseTestingUtility.createPreSplitLoadTestTable(conf, TABLE_NAME,
-        LoadTestTool.DEFAULT_COLUMN_FAMILY, Compression.Algorithm.NONE,
+        HFileTestUtil.DEFAULT_COLUMN_FAMILY, Compression.Algorithm.NONE,
         DataBlockEncoding.NONE);
 
     LOG.debug("Loading data....\n\n");
@@ -143,8 +143,8 @@ public class RestartMetaTest extends AbstractHBaseTool {
   @Override
   protected void addOptions() {
     addOptWithArg(OPT_NUM_RS, "Number of Region Servers");
-    addOptWithArg(LoadTestTool.OPT_DATA_BLOCK_ENCODING,
-        LoadTestTool.OPT_DATA_BLOCK_ENCODING_USAGE);
+    addOptWithArg(HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
+        HFileTestUtil.OPT_DATA_BLOCK_ENCODING_USAGE);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
deleted file mode 100644
index 51dc238..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import static org.apache.hadoop.hbase.HConstants.DEFAULT_REGIONSERVER_PORT;
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.when;
-
-@Category({MiscTests.class, SmallTests.class})
-public class TestRegionSizeCalculator {
-
-  private Configuration configuration = new Configuration();
-  private final long megabyte = 1024L * 1024L;
-  private final ServerName sn = ServerName.valueOf("local-rs", DEFAULT_REGIONSERVER_PORT,
-      ServerName.NON_STARTCODE);
-
-  @Test
-  public void testSimpleTestCase() throws Exception {
-
-    RegionLocator regionLocator = mockRegionLocator("region1", "region2", "region3");
-
-    Admin admin = mockAdmin(
-        mockRegion("region1", 123),
-        mockRegion("region3", 1232),
-        mockRegion("region2",  54321)
-    );
-
-    RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
-
-    assertEquals(123 * megabyte, calculator.getRegionSize("region1".getBytes()));
-    assertEquals(54321 * megabyte, calculator.getRegionSize("region2".getBytes()));
-    assertEquals(1232 * megabyte, calculator.getRegionSize("region3".getBytes()));
-    // if regionCalculator does not know about a region, it should return 0
-    assertEquals(0 * megabyte, calculator.getRegionSize("otherTableRegion".getBytes()));
-
-    assertEquals(3, calculator.getRegionSizeMap().size());
-  }
-
-
-  /**
-   * When size of region in megabytes is larger than largest possible integer there could be
-   * error caused by lost of precision.
-   * */
-  @Test
-  public void testLargeRegion() throws Exception {
-
-    RegionLocator regionLocator = mockRegionLocator("largeRegion");
-
-    Admin admin = mockAdmin(
-        mockRegion("largeRegion", Integer.MAX_VALUE)
-    );
-
-    RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
-
-    assertEquals(((long) Integer.MAX_VALUE) * megabyte, calculator.getRegionSize("largeRegion".getBytes()));
-  }
-
-  /** When calculator is disabled, it should return 0 for each request.*/
-  @Test
-  public void testDisabled() throws Exception {
-    String regionName = "cz.goout:/index.html";
-    RegionLocator table = mockRegionLocator(regionName);
-
-    Admin admin = mockAdmin(
-        mockRegion(regionName, 999)
-    );
-
-    //first request on enabled calculator
-    RegionSizeCalculator calculator = new RegionSizeCalculator(table, admin);
-    assertEquals(999 * megabyte, calculator.getRegionSize(regionName.getBytes()));
-
-    //then disabled calculator.
-    configuration.setBoolean(RegionSizeCalculator.ENABLE_REGIONSIZECALCULATOR, false);
-    RegionSizeCalculator disabledCalculator = new RegionSizeCalculator(table, admin);
-    assertEquals(0 * megabyte, disabledCalculator.getRegionSize(regionName.getBytes()));
-
-    assertEquals(0, disabledCalculator.getRegionSizeMap().size());
-  }
-
-  /**
-   * Makes some table with given region names.
-   * */
-  private RegionLocator mockRegionLocator(String... regionNames) throws IOException {
-    RegionLocator mockedTable = Mockito.mock(RegionLocator.class);
-    when(mockedTable.getName()).thenReturn(TableName.valueOf("sizeTestTable"));
-    List<HRegionLocation> regionLocations = new ArrayList<>(regionNames.length);
-    when(mockedTable.getAllRegionLocations()).thenReturn(regionLocations);
-
-    for (String regionName : regionNames) {
-      HRegionInfo info = Mockito.mock(HRegionInfo.class);
-      when(info.getRegionName()).thenReturn(regionName.getBytes());
-      regionLocations.add(new HRegionLocation(info, sn));
-    }
-
-    return mockedTable;
-  }
-
-  /**
-   * Creates mock returning RegionLoad info about given servers.
-  */
-  private Admin mockAdmin(RegionLoad... regionLoadArray) throws Exception {
-    Admin mockAdmin = Mockito.mock(Admin.class);
-    Map<byte[], RegionLoad> regionLoads = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (RegionLoad regionLoad : regionLoadArray) {
-      regionLoads.put(regionLoad.getName(), regionLoad);
-    }
-    when(mockAdmin.getConfiguration()).thenReturn(configuration);
-    when(mockAdmin.getRegionLoad(sn, TableName.valueOf("sizeTestTable"))).thenReturn(regionLoads);
-    return mockAdmin;
-  }
-
-  /**
-   * Creates mock of region with given name and size.
-   *
-   * @param  fileSizeMb number of megabytes occupied by region in file store in megabytes
-   * */
-  private RegionLoad mockRegion(String regionName, int fileSizeMb) {
-    RegionLoad region = Mockito.mock(RegionLoad.class);
-    when(region.getName()).thenReturn(regionName.getBytes());
-    when(region.getNameAsString()).thenReturn(regionName);
-    when(region.getStorefileSizeMB()).thenReturn(fileSizeMb);
-    return region;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
index bf7bf45..2deba00 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
@@ -17,6 +17,7 @@
 package org.apache.hadoop.hbase.util.test;
 
 import java.io.IOException;
+import java.util.Random;
 import java.util.Set;
 
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -56,6 +57,28 @@ public abstract class LoadTestDataGenerator {
     this.kvGenerator = new LoadTestKVGenerator(minValueSize, maxValueSize);
   }
 
+  public static byte[] generateData(final Random r, int length) {
+    byte [] b = new byte [length];
+    int i = 0;
+
+    for(i = 0; i < (length-8); i += 8) {
+      b[i] = (byte) (65 + r.nextInt(26));
+      b[i+1] = b[i];
+      b[i+2] = b[i];
+      b[i+3] = b[i];
+      b[i+4] = b[i];
+      b[i+5] = b[i];
+      b[i+6] = b[i];
+      b[i+7] = b[i];
+    }
+
+    byte a = (byte) (65 + r.nextInt(26));
+    for(; i < length; i++) {
+      b[i] = a;
+    }
+    return b;
+  }
+
   /**
    * initialize the LoadTestDataGenerator
    *

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties b/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
deleted file mode 100644
index 6fca96a..0000000
--- a/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-# ResourceBundle properties file for Map-Reduce counters
-
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one
-# * or more contributor license agreements.  See the NOTICE file
-# * distributed with this work for additional information
-# * regarding copyright ownership.  The ASF licenses this file
-# * to you under the Apache License, Version 2.0 (the
-# * "License"); you may not use this file except in compliance
-# * with the License.  You may obtain a copy of the License at
-# *
-# *     http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-
-CounterGroupName=              HBase Performance Evaluation
-ELAPSED_TIME.name=             Elapsed time in milliseconds
-ROWS.name=									  Row count
-# ResourceBundle properties file for Map-Reduce counters
-
-CounterGroupName=              HBase Performance Evaluation
-ELAPSED_TIME.name=             Elapsed time in milliseconds
-ROWS.name=									  Row count

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format b/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
deleted file mode 100755
index 762ddd7..0000000
Binary files a/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format and /dev/null differ

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-spark/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-spark/pom.xml b/hbase-spark/pom.xml
index 51e67fa..d4ffb6b 100644
--- a/hbase-spark/pom.xml
+++ b/hbase-spark/pom.xml
@@ -488,6 +488,10 @@
       <type>test-jar</type>
     </dependency>
     <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
----------------------------------------------------------------------
diff --git a/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java b/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
index 93cd939..bfacbe8 100644
--- a/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
+++ b/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
@@ -27,7 +27,6 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 464d42f..e610c22 100755
--- a/pom.xml
+++ b/pom.xml
@@ -64,6 +64,7 @@
     <module>hbase-build-support</module>
     <module>hbase-build-configuration</module>
     <module>hbase-replication</module>
+    <module>hbase-mapreduce</module>
     <module>hbase-resource-bundle</module>
     <module>hbase-server</module>
     <module>hbase-thrift</module>
@@ -1628,6 +1629,18 @@
         <scope>test</scope>
       </dependency>
       <dependency>
+        <artifactId>hbase-mapreduce</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${project.version}</version>
+      </dependency>
+      <dependency>
+        <artifactId>hbase-mapreduce</artifactId>
+        <groupId>org.apache.hbase</groupId>
+        <version>${project.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
         <artifactId>hbase-endpoint</artifactId>
         <groupId>org.apache.hbase</groupId>
         <version>${project.version}</version>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 6181b13..f96cd6c 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -2478,7 +2478,7 @@ void rename(Admin admin, String oldTableName, TableName newTableName) {
 RegionServer Grouping (A.K.A `rsgroup`) is an advanced feature for
 partitioning regionservers into distinctive groups for strict isolation. It
 should only be used by users who are sophisticated enough to understand the
-full implications and have a sufficient background in managing HBase clusters. 
+full implications and have a sufficient background in managing HBase clusters.
 It was developed by Yahoo! and they run it at scale on their large grid cluster.
 See link:http://www.slideshare.net/HBaseCon/keynote-apache-hbase-at-yahoo-scale[HBase at Yahoo! Scale].
 
@@ -2491,20 +2491,20 @@ rsgroup at a time. By default, all tables and regionservers belong to the
 APIs. A custom balancer implementation tracks assignments per rsgroup and makes
 sure to move regions to the relevant regionservers in that rsgroup. The rsgroup
 information is stored in a regular HBase table, and a zookeeper-based read-only
-cache is used at cluster bootstrap time. 
+cache is used at cluster bootstrap time.
 
-To enable, add the following to your hbase-site.xml and restart your Master: 
+To enable, add the following to your hbase-site.xml and restart your Master:
 
 [source,xml]
 ----
- <property> 
-   <name>hbase.coprocessor.master.classes</name> 
-   <value>org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint</value> 
- </property> 
- <property> 
-   <name>hbase.master.loadbalancer.class</name> 
-   <value>org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer</value> 
- </property> 
+ <property>
+   <name>hbase.coprocessor.master.classes</name>
+   <value>org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint</value>
+ </property>
+ <property>
+   <name>hbase.master.loadbalancer.class</name>
+   <value>org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer</value>
+ </property>
 ----
 
 Then use the shell _rsgroup_ commands to create and manipulate RegionServer
@@ -2514,7 +2514,7 @@ rsgroup commands available in the hbase shell type:
 [source, bash]
 ----
  hbase(main):008:0> help ‘rsgroup’
- Took 0.5610 seconds 
+ Took 0.5610 seconds
 ----
 
 High level, you create a rsgroup that is other than the `default` group using
@@ -2531,8 +2531,8 @@ Here is example using a few of the rsgroup  commands. To add a group, do as foll
 
 [source, bash]
 ----
- hbase(main):008:0> add_rsgroup 'my_group' 
- Took 0.5610 seconds 
+ hbase(main):008:0> add_rsgroup 'my_group'
+ Took 0.5610 seconds
 ----
 
 
@@ -2556,11 +2556,11 @@ ERROR: org.apache.hadoop.hbase.exceptions.UnknownProtocolException: No registere
 ====
 
 Add a server (specified by hostname + port) to the just-made group using the
-_move_servers_rsgroup_ command as follows: 
+_move_servers_rsgroup_ command as follows:
 
 [source, bash]
 ----
- hbase(main):010:0> move_servers_rsgroup 'my_group',['k.att.net:51129'] 
+ hbase(main):010:0> move_servers_rsgroup 'my_group',['k.att.net:51129']
 ----
 
 .Hostname and Port vs ServerName

[11/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
deleted file mode 100644
index e80410f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++ /dev/null
@@ -1,1111 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import java.io.BufferedInputStream;
-import java.io.FileNotFoundException;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.Option;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileChecksum;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.io.FileLink;
-import org.apache.hadoop.hbase.io.HFileLink;
-import org.apache.hadoop.hbase.io.WALLink;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mob.MobUtils;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.util.AbstractHBaseTool;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.HFileArchiveUtil;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.mapreduce.security.TokenCache;
-import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.util.Tool;
-
-/**
- * Export the specified snapshot to a given FileSystem.
- *
- * The .snapshot/name folder is copied to the destination cluster
- * and then all the hfiles/wals are copied using a Map-Reduce Job in the .archive/ location.
- * When everything is done, the second cluster can restore the snapshot.
- */
-@InterfaceAudience.Public
-public class ExportSnapshot extends AbstractHBaseTool implements Tool {
-  public static final String NAME = "exportsnapshot";
-  /** Configuration prefix for overrides for the source filesystem */
-  public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
-  /** Configuration prefix for overrides for the destination filesystem */
-  public static final String CONF_DEST_PREFIX = NAME + ".to.";
-
-  private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
-
-  private static final String MR_NUM_MAPS = "mapreduce.job.maps";
-  private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
-  private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
-  private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
-  private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
-  private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
-  private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
-  private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
-  private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
-  private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
-  private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
-  private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
-  private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
-  protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
-
-  static class Testing {
-    static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
-    static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
-    int failuresCountToInject = 0;
-    int injectedFailureCount = 0;
-  }
-
-  // Command line options and defaults.
-  static final class Options {
-    static final Option SNAPSHOT = new Option(null, "snapshot", true, "Snapshot to restore.");
-    static final Option TARGET_NAME = new Option(null, "target", true,
-        "Target name for the snapshot.");
-    static final Option COPY_TO = new Option(null, "copy-to", true, "Remote "
-        + "destination hdfs://");
-    static final Option COPY_FROM = new Option(null, "copy-from", true,
-        "Input folder hdfs:// (default hbase.rootdir)");
-    static final Option NO_CHECKSUM_VERIFY = new Option(null, "no-checksum-verify", false,
-        "Do not verify checksum, use name+length only.");
-    static final Option NO_TARGET_VERIFY = new Option(null, "no-target-verify", false,
-        "Do not verify the integrity of the exported snapshot.");
-    static final Option OVERWRITE = new Option(null, "overwrite", false,
-        "Rewrite the snapshot manifest if already exists.");
-    static final Option CHUSER = new Option(null, "chuser", true,
-        "Change the owner of the files to the specified one.");
-    static final Option CHGROUP = new Option(null, "chgroup", true,
-        "Change the group of the files to the specified one.");
-    static final Option CHMOD = new Option(null, "chmod", true,
-        "Change the permission of the files to the specified one.");
-    static final Option MAPPERS = new Option(null, "mappers", true,
-        "Number of mappers to use during the copy (mapreduce.job.maps).");
-    static final Option BANDWIDTH = new Option(null, "bandwidth", true,
-        "Limit bandwidth to this value in MB/second.");
-  }
-
-  // Export Map-Reduce Counters, to keep track of the progress
-  public enum Counter {
-    MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
-    BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
-  }
-
-  private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
-                                                   NullWritable, NullWritable> {
-    private static final Log LOG = LogFactory.getLog(ExportMapper.class);
-    final static int REPORT_SIZE = 1 * 1024 * 1024;
-    final static int BUFFER_SIZE = 64 * 1024;
-
-    private boolean verifyChecksum;
-    private String filesGroup;
-    private String filesUser;
-    private short filesMode;
-    private int bufferSize;
-
-    private FileSystem outputFs;
-    private Path outputArchive;
-    private Path outputRoot;
-
-    private FileSystem inputFs;
-    private Path inputArchive;
-    private Path inputRoot;
-
-    private static Testing testing = new Testing();
-
-    @Override
-    public void setup(Context context) throws IOException {
-      Configuration conf = context.getConfiguration();
-
-      Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
-      Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
-
-      verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
-
-      filesGroup = conf.get(CONF_FILES_GROUP);
-      filesUser = conf.get(CONF_FILES_USER);
-      filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
-      outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
-      inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
-
-      inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
-      outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
-
-      try {
-        srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
-        inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
-      } catch (IOException e) {
-        throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
-      }
-
-      try {
-        destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
-        outputFs = FileSystem.get(outputRoot.toUri(), destConf);
-      } catch (IOException e) {
-        throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
-      }
-
-      // Use the default block size of the outputFs if bigger
-      int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(outputRoot), BUFFER_SIZE);
-      bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
-      LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
-
-      for (Counter c : Counter.values()) {
-        context.getCounter(c).increment(0);
-      }
-      if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
-        testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
-        // Get number of times we have already injected failure based on attempt number of this
-        // task.
-        testing.injectedFailureCount = context.getTaskAttemptID().getId();
-      }
-    }
-
-    @Override
-    protected void cleanup(Context context) {
-      IOUtils.closeStream(inputFs);
-      IOUtils.closeStream(outputFs);
-    }
-
-    @Override
-    public void map(BytesWritable key, NullWritable value, Context context)
-        throws InterruptedException, IOException {
-      SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(key.copyBytes());
-      Path outputPath = getOutputPath(inputInfo);
-
-      copyFile(context, inputInfo, outputPath);
-    }
-
-    /**
-     * Returns the location where the inputPath will be copied.
-     */
-    private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
-      Path path = null;
-      switch (inputInfo.getType()) {
-        case HFILE:
-          Path inputPath = new Path(inputInfo.getHfile());
-          String family = inputPath.getParent().getName();
-          TableName table =HFileLink.getReferencedTableName(inputPath.getName());
-          String region = HFileLink.getReferencedRegionName(inputPath.getName());
-          String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
-          path = new Path(FSUtils.getTableDir(new Path("./"), table),
-              new Path(region, new Path(family, hfile)));
-          break;
-        case WAL:
-          LOG.warn("snapshot does not keeps WALs: " + inputInfo);
-          break;
-        default:
-          throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
-      }
-      return new Path(outputArchive, path);
-    }
-
-    /**
-     * Used by TestExportSnapshot to test for retries when failures happen.
-     * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
-     */
-    private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
-        throws IOException {
-      if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
-      if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
-      testing.injectedFailureCount++;
-      context.getCounter(Counter.COPY_FAILED).increment(1);
-      LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
-      throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
-          testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
-    }
-
-    private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
-        final Path outputPath) throws IOException {
-      // Get the file information
-      FileStatus inputStat = getSourceFileStatus(context, inputInfo);
-
-      // Verify if the output file exists and is the same that we want to copy
-      if (outputFs.exists(outputPath)) {
-        FileStatus outputStat = outputFs.getFileStatus(outputPath);
-        if (outputStat != null && sameFile(inputStat, outputStat)) {
-          LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
-          context.getCounter(Counter.FILES_SKIPPED).increment(1);
-          context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
-          return;
-        }
-      }
-
-      InputStream in = openSourceFile(context, inputInfo);
-      int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
-      if (Integer.MAX_VALUE != bandwidthMB) {
-        in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L);
-      }
-
-      try {
-        context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
-
-        // Ensure that the output folder is there and copy the file
-        createOutputPath(outputPath.getParent());
-        FSDataOutputStream out = outputFs.create(outputPath, true);
-        try {
-          copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
-        } finally {
-          out.close();
-        }
-
-        // Try to Preserve attributes
-        if (!preserveAttributes(outputPath, inputStat)) {
-          LOG.warn("You may have to run manually chown on: " + outputPath);
-        }
-      } finally {
-        in.close();
-        injectTestFailure(context, inputInfo);
-      }
-    }
-
-    /**
-     * Create the output folder and optionally set ownership.
-     */
-    private void createOutputPath(final Path path) throws IOException {
-      if (filesUser == null && filesGroup == null) {
-        outputFs.mkdirs(path);
-      } else {
-        Path parent = path.getParent();
-        if (!outputFs.exists(parent) && !parent.isRoot()) {
-          createOutputPath(parent);
-        }
-        outputFs.mkdirs(path);
-        if (filesUser != null || filesGroup != null) {
-          // override the owner when non-null user/group is specified
-          outputFs.setOwner(path, filesUser, filesGroup);
-        }
-        if (filesMode > 0) {
-          outputFs.setPermission(path, new FsPermission(filesMode));
-        }
-      }
-    }
-
-    /**
-     * Try to Preserve the files attribute selected by the user copying them from the source file
-     * This is only required when you are exporting as a different user than "hbase" or on a system
-     * that doesn't have the "hbase" user.
-     *
-     * This is not considered a blocking failure since the user can force a chmod with the user
-     * that knows is available on the system.
-     */
-    private boolean preserveAttributes(final Path path, final FileStatus refStat) {
-      FileStatus stat;
-      try {
-        stat = outputFs.getFileStatus(path);
-      } catch (IOException e) {
-        LOG.warn("Unable to get the status for file=" + path);
-        return false;
-      }
-
-      try {
-        if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
-          outputFs.setPermission(path, new FsPermission(filesMode));
-        } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
-          outputFs.setPermission(path, refStat.getPermission());
-        }
-      } catch (IOException e) {
-        LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
-        return false;
-      }
-
-      boolean hasRefStat = (refStat != null);
-      String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
-      String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
-      if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
-        try {
-          if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
-            outputFs.setOwner(path, user, group);
-          }
-        } catch (IOException e) {
-          LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
-          LOG.warn("The user/group may not exist on the destination cluster: user=" +
-                   user + " group=" + group);
-          return false;
-        }
-      }
-
-      return true;
-    }
-
-    private boolean stringIsNotEmpty(final String str) {
-      return str != null && str.length() > 0;
-    }
-
-    private void copyData(final Context context,
-        final Path inputPath, final InputStream in,
-        final Path outputPath, final FSDataOutputStream out,
-        final long inputFileSize)
-        throws IOException {
-      final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
-                                   " (%.1f%%)";
-
-      try {
-        byte[] buffer = new byte[bufferSize];
-        long totalBytesWritten = 0;
-        int reportBytes = 0;
-        int bytesRead;
-
-        long stime = System.currentTimeMillis();
-        while ((bytesRead = in.read(buffer)) > 0) {
-          out.write(buffer, 0, bytesRead);
-          totalBytesWritten += bytesRead;
-          reportBytes += bytesRead;
-
-          if (reportBytes >= REPORT_SIZE) {
-            context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
-            context.setStatus(String.format(statusMessage,
-                              StringUtils.humanReadableInt(totalBytesWritten),
-                              (totalBytesWritten/(float)inputFileSize) * 100.0f) +
-                              " from " + inputPath + " to " + outputPath);
-            reportBytes = 0;
-          }
-        }
-        long etime = System.currentTimeMillis();
-
-        context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
-        context.setStatus(String.format(statusMessage,
-                          StringUtils.humanReadableInt(totalBytesWritten),
-                          (totalBytesWritten/(float)inputFileSize) * 100.0f) +
-                          " from " + inputPath + " to " + outputPath);
-
-        // Verify that the written size match
-        if (totalBytesWritten != inputFileSize) {
-          String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
-                       " expected=" + inputFileSize + " for file=" + inputPath;
-          throw new IOException(msg);
-        }
-
-        LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
-        LOG.info("size=" + totalBytesWritten +
-            " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
-            " time=" + StringUtils.formatTimeDiff(etime, stime) +
-            String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
-        context.getCounter(Counter.FILES_COPIED).increment(1);
-      } catch (IOException e) {
-        LOG.error("Error copying " + inputPath + " to " + outputPath, e);
-        context.getCounter(Counter.COPY_FAILED).increment(1);
-        throw e;
-      }
-    }
-
-    /**
-     * Try to open the "source" file.
-     * Throws an IOException if the communication with the inputFs fail or
-     * if the file is not found.
-     */
-    private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
-            throws IOException {
-      try {
-        Configuration conf = context.getConfiguration();
-        FileLink link = null;
-        switch (fileInfo.getType()) {
-          case HFILE:
-            Path inputPath = new Path(fileInfo.getHfile());
-            link = getFileLink(inputPath, conf);
-            break;
-          case WAL:
-            String serverName = fileInfo.getWalServer();
-            String logName = fileInfo.getWalName();
-            link = new WALLink(inputRoot, serverName, logName);
-            break;
-          default:
-            throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
-        }
-        return link.open(inputFs);
-      } catch (IOException e) {
-        context.getCounter(Counter.MISSING_FILES).increment(1);
-        LOG.error("Unable to open source file=" + fileInfo.toString(), e);
-        throw e;
-      }
-    }
-
-    private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
-        throws IOException {
-      try {
-        Configuration conf = context.getConfiguration();
-        FileLink link = null;
-        switch (fileInfo.getType()) {
-          case HFILE:
-            Path inputPath = new Path(fileInfo.getHfile());
-            link = getFileLink(inputPath, conf);
-            break;
-          case WAL:
-            link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
-            break;
-          default:
-            throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
-        }
-        return link.getFileStatus(inputFs);
-      } catch (FileNotFoundException e) {
-        context.getCounter(Counter.MISSING_FILES).increment(1);
-        LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
-        throw e;
-      } catch (IOException e) {
-        LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
-        throw e;
-      }
-    }
-
-    private FileLink getFileLink(Path path, Configuration conf) throws IOException{
-      String regionName = HFileLink.getReferencedRegionName(path.getName());
-      TableName tableName = HFileLink.getReferencedTableName(path.getName());
-      if(MobUtils.getMobRegionInfo(tableName).getEncodedName().equals(regionName)) {
-        return HFileLink.buildFromHFileLinkPattern(MobUtils.getQualifiedMobRootDir(conf),
-                HFileArchiveUtil.getArchivePath(conf), path);
-      }
-      return HFileLink.buildFromHFileLinkPattern(inputRoot, inputArchive, path);
-    }
-
-    private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
-      try {
-        return fs.getFileChecksum(path);
-      } catch (IOException e) {
-        LOG.warn("Unable to get checksum for file=" + path, e);
-        return null;
-      }
-    }
-
-    /**
-     * Check if the two files are equal by looking at the file length,
-     * and at the checksum (if user has specified the verifyChecksum flag).
-     */
-    private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
-      // Not matching length
-      if (inputStat.getLen() != outputStat.getLen()) return false;
-
-      // Mark files as equals, since user asked for no checksum verification
-      if (!verifyChecksum) return true;
-
-      // If checksums are not available, files are not the same.
-      FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
-      if (inChecksum == null) return false;
-
-      FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
-      if (outChecksum == null) return false;
-
-      return inChecksum.equals(outChecksum);
-    }
-  }
-
-  // ==========================================================================
-  //  Input Format
-  // ==========================================================================
-
-  /**
-   * Extract the list of files (HFiles/WALs) to copy using Map-Reduce.
-   * @return list of files referenced by the snapshot (pair of path and size)
-   */
-  private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
-      final FileSystem fs, final Path snapshotDir) throws IOException {
-    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
-
-    final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>();
-    final TableName table = TableName.valueOf(snapshotDesc.getTable());
-
-    // Get snapshot files
-    LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
-    SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
-      new SnapshotReferenceUtil.SnapshotVisitor() {
-        @Override
-        public void storeFile(final HRegionInfo regionInfo, final String family,
-            final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
-          // for storeFile.hasReference() case, copied as part of the manifest
-          if (!storeFile.hasReference()) {
-            String region = regionInfo.getEncodedName();
-            String hfile = storeFile.getName();
-            Path path = HFileLink.createPath(table, region, family, hfile);
-
-            SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
-              .setType(SnapshotFileInfo.Type.HFILE)
-              .setHfile(path.toString())
-              .build();
-
-            long size;
-            if (storeFile.hasFileSize()) {
-              size = storeFile.getFileSize();
-            } else {
-              size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
-            }
-            files.add(new Pair<>(fileInfo, size));
-          }
-        }
-    });
-
-    return files;
-  }
-
-  /**
-   * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
-   * The groups created will have similar amounts of bytes.
-   * <p>
-   * The algorithm used is pretty straightforward; the file list is sorted by size,
-   * and then each group fetch the bigger file available, iterating through groups
-   * alternating the direction.
-   */
-  static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
-      final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
-    // Sort files by size, from small to big
-    Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
-      public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
-        long r = a.getSecond() - b.getSecond();
-        return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
-      }
-    });
-
-    // create balanced groups
-    List<List<Pair<SnapshotFileInfo, Long>>> fileGroups = new LinkedList<>();
-    long[] sizeGroups = new long[ngroups];
-    int hi = files.size() - 1;
-    int lo = 0;
-
-    List<Pair<SnapshotFileInfo, Long>> group;
-    int dir = 1;
-    int g = 0;
-
-    while (hi >= lo) {
-      if (g == fileGroups.size()) {
-        group = new LinkedList<>();
-        fileGroups.add(group);
-      } else {
-        group = fileGroups.get(g);
-      }
-
-      Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
-
-      // add the hi one
-      sizeGroups[g] += fileInfo.getSecond();
-      group.add(fileInfo);
-
-      // change direction when at the end or the beginning
-      g += dir;
-      if (g == ngroups) {
-        dir = -1;
-        g = ngroups - 1;
-      } else if (g < 0) {
-        dir = 1;
-        g = 0;
-      }
-    }
-
-    if (LOG.isDebugEnabled()) {
-      for (int i = 0; i < sizeGroups.length; ++i) {
-        LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
-      }
-    }
-
-    return fileGroups;
-  }
-
-  private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
-    @Override
-    public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
-        TaskAttemptContext tac) throws IOException, InterruptedException {
-      return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
-    }
-
-    @Override
-    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
-      Configuration conf = context.getConfiguration();
-      Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
-      FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
-
-      List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
-      int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
-      if (mappers == 0 && snapshotFiles.size() > 0) {
-        mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
-        mappers = Math.min(mappers, snapshotFiles.size());
-        conf.setInt(CONF_NUM_SPLITS, mappers);
-        conf.setInt(MR_NUM_MAPS, mappers);
-      }
-
-      List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
-      List<InputSplit> splits = new ArrayList(groups.size());
-      for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
-        splits.add(new ExportSnapshotInputSplit(files));
-      }
-      return splits;
-    }
-
-    private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
-      private List<Pair<BytesWritable, Long>> files;
-      private long length;
-
-      public ExportSnapshotInputSplit() {
-        this.files = null;
-      }
-
-      public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
-        this.files = new ArrayList(snapshotFiles.size());
-        for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
-          this.files.add(new Pair<>(
-            new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
-          this.length += fileInfo.getSecond();
-        }
-      }
-
-      private List<Pair<BytesWritable, Long>> getSplitKeys() {
-        return files;
-      }
-
-      @Override
-      public long getLength() throws IOException, InterruptedException {
-        return length;
-      }
-
-      @Override
-      public String[] getLocations() throws IOException, InterruptedException {
-        return new String[] {};
-      }
-
-      @Override
-      public void readFields(DataInput in) throws IOException {
-        int count = in.readInt();
-        files = new ArrayList<>(count);
-        length = 0;
-        for (int i = 0; i < count; ++i) {
-          BytesWritable fileInfo = new BytesWritable();
-          fileInfo.readFields(in);
-          long size = in.readLong();
-          files.add(new Pair<>(fileInfo, size));
-          length += size;
-        }
-      }
-
-      @Override
-      public void write(DataOutput out) throws IOException {
-        out.writeInt(files.size());
-        for (final Pair<BytesWritable, Long> fileInfo: files) {
-          fileInfo.getFirst().write(out);
-          out.writeLong(fileInfo.getSecond());
-        }
-      }
-    }
-
-    private static class ExportSnapshotRecordReader
-        extends RecordReader<BytesWritable, NullWritable> {
-      private final List<Pair<BytesWritable, Long>> files;
-      private long totalSize = 0;
-      private long procSize = 0;
-      private int index = -1;
-
-      ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
-        this.files = files;
-        for (Pair<BytesWritable, Long> fileInfo: files) {
-          totalSize += fileInfo.getSecond();
-        }
-      }
-
-      @Override
-      public void close() { }
-
-      @Override
-      public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
-
-      @Override
-      public NullWritable getCurrentValue() { return NullWritable.get(); }
-
-      @Override
-      public float getProgress() { return (float)procSize / totalSize; }
-
-      @Override
-      public void initialize(InputSplit split, TaskAttemptContext tac) { }
-
-      @Override
-      public boolean nextKeyValue() {
-        if (index >= 0) {
-          procSize += files.get(index).getSecond();
-        }
-        return(++index < files.size());
-      }
-    }
-  }
-
-  // ==========================================================================
-  //  Tool
-  // ==========================================================================
-
-  /**
-   * Run Map-Reduce Job to perform the files copy.
-   */
-  private void runCopyJob(final Path inputRoot, final Path outputRoot,
-      final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
-      final String filesUser, final String filesGroup, final int filesMode,
-      final int mappers, final int bandwidthMB)
-          throws IOException, InterruptedException, ClassNotFoundException {
-    Configuration conf = getConf();
-    if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
-    if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
-    if (mappers > 0) {
-      conf.setInt(CONF_NUM_SPLITS, mappers);
-      conf.setInt(MR_NUM_MAPS, mappers);
-    }
-    conf.setInt(CONF_FILES_MODE, filesMode);
-    conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
-    conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
-    conf.set(CONF_INPUT_ROOT, inputRoot.toString());
-    conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
-    conf.set(CONF_SNAPSHOT_NAME, snapshotName);
-    conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
-
-    Job job = new Job(conf);
-    job.setJobName("ExportSnapshot-" + snapshotName);
-    job.setJarByClass(ExportSnapshot.class);
-    TableMapReduceUtil.addDependencyJars(job);
-    job.setMapperClass(ExportMapper.class);
-    job.setInputFormatClass(ExportSnapshotInputFormat.class);
-    job.setOutputFormatClass(NullOutputFormat.class);
-    job.setMapSpeculativeExecution(false);
-    job.setNumReduceTasks(0);
-
-    // Acquire the delegation Tokens
-    Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
-    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
-      new Path[] { inputRoot }, srcConf);
-    Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
-    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
-        new Path[] { outputRoot }, destConf);
-
-    // Run the MR Job
-    if (!job.waitForCompletion(true)) {
-      // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
-      // when it will be available on all the supported versions.
-      throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
-    }
-  }
-
-  private void verifySnapshot(final Configuration baseConf,
-      final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
-    // Update the conf with the current root dir, since may be a different cluster
-    Configuration conf = new Configuration(baseConf);
-    FSUtils.setRootDir(conf, rootDir);
-    FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
-    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
-    SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
-  }
-
-  /**
-   * Set path ownership.
-   */
-  private void setOwner(final FileSystem fs, final Path path, final String user,
-      final String group, final boolean recursive) throws IOException {
-    if (user != null || group != null) {
-      if (recursive && fs.isDirectory(path)) {
-        for (FileStatus child : fs.listStatus(path)) {
-          setOwner(fs, child.getPath(), user, group, recursive);
-        }
-      }
-      fs.setOwner(path, user, group);
-    }
-  }
-
-  /**
-   * Set path permission.
-   */
-  private void setPermission(final FileSystem fs, final Path path, final short filesMode,
-      final boolean recursive) throws IOException {
-    if (filesMode > 0) {
-      FsPermission perm = new FsPermission(filesMode);
-      if (recursive && fs.isDirectory(path)) {
-        for (FileStatus child : fs.listStatus(path)) {
-          setPermission(fs, child.getPath(), filesMode, recursive);
-        }
-      }
-      fs.setPermission(path, perm);
-    }
-  }
-
-  private boolean verifyTarget = true;
-  private boolean verifyChecksum = true;
-  private String snapshotName = null;
-  private String targetName = null;
-  private boolean overwrite = false;
-  private String filesGroup = null;
-  private String filesUser = null;
-  private Path outputRoot = null;
-  private Path inputRoot = null;
-  private int bandwidthMB = Integer.MAX_VALUE;
-  private int filesMode = 0;
-  private int mappers = 0;
-
-  @Override
-  protected void processOptions(CommandLine cmd) {
-    snapshotName = cmd.getOptionValue(Options.SNAPSHOT.getLongOpt(), snapshotName);
-    targetName = cmd.getOptionValue(Options.TARGET_NAME.getLongOpt(), targetName);
-    if (cmd.hasOption(Options.COPY_TO.getLongOpt())) {
-      outputRoot = new Path(cmd.getOptionValue(Options.COPY_TO.getLongOpt()));
-    }
-    if (cmd.hasOption(Options.COPY_FROM.getLongOpt())) {
-      inputRoot = new Path(cmd.getOptionValue(Options.COPY_FROM.getLongOpt()));
-    }
-    mappers = getOptionAsInt(cmd, Options.MAPPERS.getLongOpt(), mappers);
-    filesUser = cmd.getOptionValue(Options.CHUSER.getLongOpt(), filesUser);
-    filesGroup = cmd.getOptionValue(Options.CHGROUP.getLongOpt(), filesGroup);
-    filesMode = getOptionAsInt(cmd, Options.CHMOD.getLongOpt(), filesMode);
-    bandwidthMB = getOptionAsInt(cmd, Options.BANDWIDTH.getLongOpt(), bandwidthMB);
-    overwrite = cmd.hasOption(Options.OVERWRITE.getLongOpt());
-    // And verifyChecksum and verifyTarget with values read from old args in processOldArgs(...).
-    verifyChecksum = !cmd.hasOption(Options.NO_CHECKSUM_VERIFY.getLongOpt());
-    verifyTarget = !cmd.hasOption(Options.NO_TARGET_VERIFY.getLongOpt());
-  }
-
-  /**
-   * Execute the export snapshot by copying the snapshot metadata, hfiles and wals.
-   * @return 0 on success, and != 0 upon failure.
-   */
-  @Override
-  public int doWork() throws IOException {
-    Configuration conf = getConf();
-
-    // Check user options
-    if (snapshotName == null) {
-      System.err.println("Snapshot name not provided.");
-      LOG.error("Use -h or --help for usage instructions.");
-      return 0;
-    }
-
-    if (outputRoot == null) {
-      System.err.println("Destination file-system (--" + Options.COPY_TO.getLongOpt()
-              + ") not provided.");
-      LOG.error("Use -h or --help for usage instructions.");
-      return 0;
-    }
-
-    if (targetName == null) {
-      targetName = snapshotName;
-    }
-    if (inputRoot == null) {
-      inputRoot = FSUtils.getRootDir(conf);
-    } else {
-      FSUtils.setRootDir(conf, inputRoot);
-    }
-
-    Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
-    srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
-    FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
-    LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
-    Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
-    destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
-    FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
-    LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
-
-    boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
-
-    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
-    Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
-    Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
-    Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
-
-    // Find the necessary directory which need to change owner and group
-    Path needSetOwnerDir = SnapshotDescriptionUtils.getSnapshotRootDir(outputRoot);
-    if (outputFs.exists(needSetOwnerDir)) {
-      if (skipTmp) {
-        needSetOwnerDir = outputSnapshotDir;
-      } else {
-        needSetOwnerDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(outputRoot);
-        if (outputFs.exists(needSetOwnerDir)) {
-          needSetOwnerDir = snapshotTmpDir;
-        }
-      }
-    }
-
-    // Check if the snapshot already exists
-    if (outputFs.exists(outputSnapshotDir)) {
-      if (overwrite) {
-        if (!outputFs.delete(outputSnapshotDir, true)) {
-          System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
-          return 1;
-        }
-      } else {
-        System.err.println("The snapshot '" + targetName +
-          "' already exists in the destination: " + outputSnapshotDir);
-        return 1;
-      }
-    }
-
-    if (!skipTmp) {
-      // Check if the snapshot already in-progress
-      if (outputFs.exists(snapshotTmpDir)) {
-        if (overwrite) {
-          if (!outputFs.delete(snapshotTmpDir, true)) {
-            System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
-            return 1;
-          }
-        } else {
-          System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
-          System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
-          System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
-          return 1;
-        }
-      }
-    }
-
-    // Step 1 - Copy fs1:/.snapshot/<snapshot> to  fs2:/.snapshot/.tmp/<snapshot>
-    // The snapshot references must be copied before the hfiles otherwise the cleaner
-    // will remove them because they are unreferenced.
-    try {
-      LOG.info("Copy Snapshot Manifest");
-      FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
-    } catch (IOException e) {
-      throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
-        snapshotDir + " to=" + initialOutputSnapshotDir, e);
-    } finally {
-      if (filesUser != null || filesGroup != null) {
-        LOG.warn((filesUser == null ? "" : "Change the owner of " + needSetOwnerDir + " to "
-            + filesUser)
-            + (filesGroup == null ? "" : ", Change the group of " + needSetOwnerDir + " to "
-            + filesGroup));
-        setOwner(outputFs, needSetOwnerDir, filesUser, filesGroup, true);
-      }
-      if (filesMode > 0) {
-        LOG.warn("Change the permission of " + needSetOwnerDir + " to " + filesMode);
-        setPermission(outputFs, needSetOwnerDir, (short)filesMode, true);
-      }
-    }
-
-    // Write a new .snapshotinfo if the target name is different from the source name
-    if (!targetName.equals(snapshotName)) {
-      SnapshotDescription snapshotDesc =
-        SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
-          .toBuilder()
-          .setName(targetName)
-          .build();
-      SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, initialOutputSnapshotDir, outputFs);
-      if (filesUser != null || filesGroup != null) {
-        outputFs.setOwner(new Path(initialOutputSnapshotDir,
-          SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), filesUser, filesGroup);
-      }
-      if (filesMode > 0) {
-        outputFs.setPermission(new Path(initialOutputSnapshotDir,
-          SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), new FsPermission((short)filesMode));
-      }
-    }
-
-    // Step 2 - Start MR Job to copy files
-    // The snapshot references must be copied before the files otherwise the files gets removed
-    // by the HFileArchiver, since they have no references.
-    try {
-      runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
-                 filesUser, filesGroup, filesMode, mappers, bandwidthMB);
-
-      LOG.info("Finalize the Snapshot Export");
-      if (!skipTmp) {
-        // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
-        if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
-          throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
-            snapshotTmpDir + " to=" + outputSnapshotDir);
-        }
-      }
-
-      // Step 4 - Verify snapshot integrity
-      if (verifyTarget) {
-        LOG.info("Verify snapshot integrity");
-        verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
-      }
-
-      LOG.info("Export Completed: " + targetName);
-      return 0;
-    } catch (Exception e) {
-      LOG.error("Snapshot export failed", e);
-      if (!skipTmp) {
-        outputFs.delete(snapshotTmpDir, true);
-      }
-      outputFs.delete(outputSnapshotDir, true);
-      return 1;
-    } finally {
-      IOUtils.closeStream(inputFs);
-      IOUtils.closeStream(outputFs);
-    }
-  }
-
-  @Override
-  protected void printUsage() {
-    super.printUsage();
-    System.out.println("\n"
-        + "Examples:\n"
-        + "  hbase snapshot export \\\n"
-        + "    --snapshot MySnapshot --copy-to hdfs://srv2:8082/hbase \\\n"
-        + "    --chuser MyUser --chgroup MyGroup --chmod 700 --mappers 16\n"
-        + "\n"
-        + "  hbase snapshot export \\\n"
-        + "    --snapshot MySnapshot --copy-from hdfs://srv2:8082/hbase \\\n"
-        + "    --copy-to hdfs://srv1:50070/hbase");
-  }
-
-  @Override protected void addOptions() {
-    addRequiredOption(Options.SNAPSHOT);
-    addOption(Options.COPY_TO);
-    addOption(Options.COPY_FROM);
-    addOption(Options.TARGET_NAME);
-    addOption(Options.NO_CHECKSUM_VERIFY);
-    addOption(Options.NO_TARGET_VERIFY);
-    addOption(Options.OVERWRITE);
-    addOption(Options.CHUSER);
-    addOption(Options.CHGROUP);
-    addOption(Options.CHMOD);
-    addOption(Options.MAPPERS);
-    addOption(Options.BANDWIDTH);
-  }
-
-  public static void main(String[] args) {
-    new ExportSnapshot().doStaticMain(args);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
deleted file mode 100644
index e8f073d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * Generate a classpath string containing any jars required by mapreduce jobs. Specify
- * additional values by providing a comma-separated list of paths via -Dtmpjars.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class MapreduceDependencyClasspathTool implements Tool {
-
-  private Configuration conf;
-
-  @Override
-  public void setConf(Configuration conf) {
-    this.conf = conf;
-  }
-
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length > 0) {
-      System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
-      System.err.println("  Construct a CLASSPATH containing dependency jars required to run a mapreduce");
-      System.err.println("  job. By default, includes any jars detected by TableMapReduceUtils. Provide");
-      System.err.println("  additional entries by specifying a comma-separated list in tmpjars.");
-      return 0;
-    }
-
-    TableMapReduceUtil.addHBaseDependencyJars(getConf());
-    System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
-    return 0;
-  }
-
-  public static void main(String[] argv) throws Exception {
-    // Silence the usual noise. This is probably fragile...
-    Logger logger = Logger.getLogger("org.apache.hadoop.hbase");
-    if (logger != null) {
-      logger.setLevel(Level.WARN);
-    }
-    System.exit(ToolRunner.run(
-      HBaseConfiguration.create(), new MapreduceDependencyClasspathTool(), argv));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
deleted file mode 100644
index 99769b7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-
-/**
- * Computes size of each region for given table and given column families.
- * The value is used by MapReduce for better scheduling.
- * */
-@InterfaceStability.Evolving
-@InterfaceAudience.Private
-public class RegionSizeCalculator {
-
-  private static final Log LOG = LogFactory.getLog(RegionSizeCalculator.class);
-
-  /**
-   * Maps each region to its size in bytes.
-   * */
-  private final Map<byte[], Long> sizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-
-  static final String ENABLE_REGIONSIZECALCULATOR = "hbase.regionsizecalculator.enable";
-  private static final long MEGABYTE = 1024L * 1024L;
-
-  /**
-   * Computes size of each region for table and given column families.
-   * 
-   * @deprecated Use {@link #RegionSizeCalculator(RegionLocator, Admin)} instead.
-   */
-  @Deprecated
-  public RegionSizeCalculator(Table table) throws IOException {
-    try (Connection conn = ConnectionFactory.createConnection(table.getConfiguration());
-        RegionLocator locator = conn.getRegionLocator(table.getName());
-        Admin admin = conn.getAdmin()) {
-      init(locator, admin);
-    }
-  }
-
-  /**
-   * Computes size of each region for table and given column families.
-   * */
-  public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) throws IOException {
-    init(regionLocator, admin);
-  }
-
-  private void init(RegionLocator regionLocator, Admin admin)
-      throws IOException {
-    if (!enabled(admin.getConfiguration())) {
-      LOG.info("Region size calculation disabled.");
-      return;
-    }
-
-    if (regionLocator.getName().isSystemTable()) {
-      LOG.info("Region size calculation disabled for system tables.");
-      return;
-    }
-
-    LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\".");
-
-    // Get the servers which host regions of the table
-    Set<ServerName> tableServers = getRegionServersOfTable(regionLocator);
-
-    for (ServerName tableServerName : tableServers) {
-      Map<byte[], RegionLoad> regionLoads =
-          admin.getRegionLoad(tableServerName, regionLocator.getName());
-      for (RegionLoad regionLoad : regionLoads.values()) {
-
-        byte[] regionId = regionLoad.getName();
-        long regionSizeBytes = regionLoad.getStorefileSizeMB() * MEGABYTE;
-        sizeMap.put(regionId, regionSizeBytes);
-
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes);
-        }
-      }
-    }
-    LOG.debug("Region sizes calculated");
-  }
-
-  private Set<ServerName> getRegionServersOfTable(RegionLocator regionLocator)
-      throws IOException {
-
-    Set<ServerName> tableServers = Sets.newHashSet();
-    for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) {
-      tableServers.add(regionLocation.getServerName());
-    }
-    return tableServers;
-  }
-
-  boolean enabled(Configuration configuration) {
-    return configuration.getBoolean(ENABLE_REGIONSIZECALCULATOR, true);
-  }
-
-  /**
-   * Returns size of given region in bytes. Returns 0 if region was not found.
-   * */
-  public long getRegionSize(byte[] regionId) {
-    Long size = sizeMap.get(regionId);
-    if (size == null) {
-      LOG.debug("Unknown region:" + Arrays.toString(regionId));
-      return 0;
-    } else {
-      return size;
-    }
-  }
-
-  public Map<byte[], Long> getRegionSizeMap() {
-    return Collections.unmodifiableMap(sizeMap);
-  }
-}

[35/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
new file mode 100644
index 0000000..c72a0c3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
@@ -0,0 +1,786 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Collections;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterators;
+
+public class SyncTable extends Configured implements Tool {
+
+  private static final Log LOG = LogFactory.getLog(SyncTable.class);
+
+  static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
+  static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
+  static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
+  static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
+  static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
+  static final String DRY_RUN_CONF_KEY="sync.table.dry.run";
+
+  Path sourceHashDir;
+  String sourceTableName;
+  String targetTableName;
+
+  String sourceZkCluster;
+  String targetZkCluster;
+  boolean dryRun;
+
+  Counters counters;
+
+  public SyncTable(Configuration conf) {
+    super(conf);
+  }
+
+  public Job createSubmittableJob(String[] args) throws IOException {
+    FileSystem fs = sourceHashDir.getFileSystem(getConf());
+    if (!fs.exists(sourceHashDir)) {
+      throw new IOException("Source hash dir not found: " + sourceHashDir);
+    }
+
+    HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
+    LOG.info("Read source hash manifest: " + tableHash);
+    LOG.info("Read " + tableHash.partitions.size() + " partition keys");
+    if (!tableHash.tableName.equals(sourceTableName)) {
+      LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
+          + tableHash.tableName + " but job is reading from: " + sourceTableName);
+    }
+    if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
+      throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+          + " should be 1 more than the number of partition keys.  However, the manifest file "
+          + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
+          + " found in the partitions file is " + tableHash.partitions.size());
+    }
+
+    Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
+    int dataSubdirCount = 0;
+    for (FileStatus file : fs.listStatus(dataDir)) {
+      if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
+        dataSubdirCount++;
+      }
+    }
+
+    if (dataSubdirCount != tableHash.numHashFiles) {
+      throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+          + " should be 1 more than the number of partition keys.  However, the number of data dirs"
+          + " found is " + dataSubdirCount + " but the number of partition keys"
+          + " found in the partitions file is " + tableHash.partitions.size());
+    }
+
+    Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
+        "syncTable_" + sourceTableName + "-" + targetTableName));
+    Configuration jobConf = job.getConfiguration();
+    job.setJarByClass(HashTable.class);
+    jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
+    jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
+    jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
+    if (sourceZkCluster != null) {
+      jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
+    }
+    if (targetZkCluster != null) {
+      jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
+    }
+    jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
+
+    TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
+        SyncMapper.class, null, null, job);
+
+    job.setNumReduceTasks(0);
+
+    if (dryRun) {
+      job.setOutputFormatClass(NullOutputFormat.class);
+    } else {
+      // No reducers.  Just write straight to table.  Call initTableReducerJob
+      // because it sets up the TableOutputFormat.
+      TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
+          targetZkCluster, null, null);
+
+      // would be nice to add an option for bulk load instead
+    }
+
+    // Obtain an authentication token, for the specified cluster, on behalf of the current user
+    if (sourceZkCluster != null) {
+      Configuration peerConf =
+          HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
+      TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
+    }
+    return job;
+  }
+
+  public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
+    Path sourceHashDir;
+
+    Connection sourceConnection;
+    Connection targetConnection;
+    Table sourceTable;
+    Table targetTable;
+    boolean dryRun;
+
+    HashTable.TableHash sourceTableHash;
+    HashTable.TableHash.Reader sourceHashReader;
+    ImmutableBytesWritable currentSourceHash;
+    ImmutableBytesWritable nextSourceKey;
+    HashTable.ResultHasher targetHasher;
+
+    Throwable mapperException;
+
+    public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
+      SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
+      MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
+
+    @Override
+    protected void setup(Context context) throws IOException {
+
+      Configuration conf = context.getConfiguration();
+      sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
+      sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
+      targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
+          TableOutputFormat.OUTPUT_CONF_PREFIX);
+      sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
+      targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
+      dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false);
+
+      sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
+      LOG.info("Read source hash manifest: " + sourceTableHash);
+      LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
+
+      TableSplit split = (TableSplit) context.getInputSplit();
+      ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
+
+      sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
+      findNextKeyHashPair();
+
+      // create a hasher, but don't start it right away
+      // instead, find the first hash batch at or after the start row
+      // and skip any rows that come before.  they will be caught by the previous task
+      targetHasher = new HashTable.ResultHasher();
+    }
+
+    private static Connection openConnection(Configuration conf, String zkClusterConfKey,
+                                             String configPrefix)
+      throws IOException {
+        String zkCluster = conf.get(zkClusterConfKey);
+        Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
+            zkCluster, configPrefix);
+        return ConnectionFactory.createConnection(clusterConf);
+    }
+
+    private static Table openTable(Connection connection, Configuration conf,
+        String tableNameConfKey) throws IOException {
+      return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
+    }
+
+    /**
+     * Attempt to read the next source key/hash pair.
+     * If there are no more, set nextSourceKey to null
+     */
+    private void findNextKeyHashPair() throws IOException {
+      boolean hasNext = sourceHashReader.next();
+      if (hasNext) {
+        nextSourceKey = sourceHashReader.getCurrentKey();
+      } else {
+        // no more keys - last hash goes to the end
+        nextSourceKey = null;
+      }
+    }
+
+    @Override
+    protected void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException, InterruptedException {
+      try {
+        // first, finish any hash batches that end before the scanned row
+        while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
+          moveToNextBatch(context);
+        }
+
+        // next, add the scanned row (as long as we've reached the first batch)
+        if (targetHasher.isBatchStarted()) {
+          targetHasher.hashResult(value);
+        }
+      } catch (Throwable t) {
+        mapperException = t;
+        Throwables.propagateIfInstanceOf(t, IOException.class);
+        Throwables.propagateIfInstanceOf(t, InterruptedException.class);
+        Throwables.propagate(t);
+      }
+    }
+
+    /**
+     * If there is an open hash batch, complete it and sync if there are diffs.
+     * Start a new batch, and seek to read the
+     */
+    private void moveToNextBatch(Context context) throws IOException, InterruptedException {
+      if (targetHasher.isBatchStarted()) {
+        finishBatchAndCompareHashes(context);
+      }
+      targetHasher.startBatch(nextSourceKey);
+      currentSourceHash = sourceHashReader.getCurrentHash();
+
+      findNextKeyHashPair();
+    }
+
+    /**
+     * Finish the currently open hash batch.
+     * Compare the target hash to the given source hash.
+     * If they do not match, then sync the covered key range.
+     */
+    private void finishBatchAndCompareHashes(Context context)
+        throws IOException, InterruptedException {
+      targetHasher.finishBatch();
+      context.getCounter(Counter.BATCHES).increment(1);
+      if (targetHasher.getBatchSize() == 0) {
+        context.getCounter(Counter.EMPTY_BATCHES).increment(1);
+      }
+      ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
+      if (targetHash.equals(currentSourceHash)) {
+        context.getCounter(Counter.HASHES_MATCHED).increment(1);
+      } else {
+        context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
+
+        ImmutableBytesWritable stopRow = nextSourceKey == null
+                                          ? new ImmutableBytesWritable(sourceTableHash.stopRow)
+                                          : nextSourceKey;
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Hash mismatch.  Key range: " + toHex(targetHasher.getBatchStartKey())
+              + " to " + toHex(stopRow)
+              + " sourceHash: " + toHex(currentSourceHash)
+              + " targetHash: " + toHex(targetHash));
+        }
+
+        syncRange(context, targetHasher.getBatchStartKey(), stopRow);
+      }
+    }
+    private static String toHex(ImmutableBytesWritable bytes) {
+      return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
+    }
+
+    private static final CellScanner EMPTY_CELL_SCANNER
+      = new CellScanner(Collections.<Result>emptyIterator());
+
+    /**
+     * Rescan the given range directly from the source and target tables.
+     * Count and log differences, and if this is not a dry run, output Puts and Deletes
+     * to make the target table match the source table for this range
+     */
+    private void syncRange(Context context, ImmutableBytesWritable startRow,
+        ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
+      Scan scan = sourceTableHash.initScan();
+      scan.setStartRow(startRow.copyBytes());
+      scan.setStopRow(stopRow.copyBytes());
+
+      ResultScanner sourceScanner = sourceTable.getScanner(scan);
+      CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
+
+      ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
+      CellScanner targetCells = new CellScanner(targetScanner.iterator());
+
+      boolean rangeMatched = true;
+      byte[] nextSourceRow = sourceCells.nextRow();
+      byte[] nextTargetRow = targetCells.nextRow();
+      while(nextSourceRow != null || nextTargetRow != null) {
+        boolean rowMatched;
+        int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
+        if (rowComparison < 0) {
+          if (LOG.isInfoEnabled()) {
+            LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
+          }
+          context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
+
+          rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
+          nextSourceRow = sourceCells.nextRow();  // advance only source to next row
+        } else if (rowComparison > 0) {
+          if (LOG.isInfoEnabled()) {
+            LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
+          }
+          context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
+
+          rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
+          nextTargetRow = targetCells.nextRow();  // advance only target to next row
+        } else {
+          // current row is the same on both sides, compare cell by cell
+          rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
+          nextSourceRow = sourceCells.nextRow();
+          nextTargetRow = targetCells.nextRow();
+        }
+
+        if (!rowMatched) {
+          rangeMatched = false;
+        }
+      }
+
+      sourceScanner.close();
+      targetScanner.close();
+
+      context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
+        .increment(1);
+    }
+
+    private static class CellScanner {
+      private final Iterator<Result> results;
+
+      private byte[] currentRow;
+      private Result currentRowResult;
+      private int nextCellInRow;
+
+      private Result nextRowResult;
+
+      public CellScanner(Iterator<Result> results) {
+        this.results = results;
+      }
+
+      /**
+       * Advance to the next row and return its row key.
+       * Returns null iff there are no more rows.
+       */
+      public byte[] nextRow() {
+        if (nextRowResult == null) {
+          // no cached row - check scanner for more
+          while (results.hasNext()) {
+            nextRowResult = results.next();
+            Cell nextCell = nextRowResult.rawCells()[0];
+            if (currentRow == null
+                || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
+                nextCell.getRowOffset(), nextCell.getRowLength())) {
+              // found next row
+              break;
+            } else {
+              // found another result from current row, keep scanning
+              nextRowResult = null;
+            }
+          }
+
+          if (nextRowResult == null) {
+            // end of data, no more rows
+            currentRowResult = null;
+            currentRow = null;
+            return null;
+          }
+        }
+
+        // advance to cached result for next row
+        currentRowResult = nextRowResult;
+        nextCellInRow = 0;
+        currentRow = currentRowResult.getRow();
+        nextRowResult = null;
+        return currentRow;
+      }
+
+      /**
+       * Returns the next Cell in the current row or null iff none remain.
+       */
+      public Cell nextCellInRow() {
+        if (currentRowResult == null) {
+          // nothing left in current row
+          return null;
+        }
+
+        Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
+        nextCellInRow++;
+        if (nextCellInRow == currentRowResult.size()) {
+          if (results.hasNext()) {
+            Result result = results.next();
+            Cell cell = result.rawCells()[0];
+            if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
+                cell.getRowOffset(), cell.getRowLength())) {
+              // result is part of current row
+              currentRowResult = result;
+              nextCellInRow = 0;
+            } else {
+              // result is part of next row, cache it
+              nextRowResult = result;
+              // current row is complete
+              currentRowResult = null;
+            }
+          } else {
+            // end of data
+            currentRowResult = null;
+          }
+        }
+        return nextCell;
+      }
+    }
+
+    /**
+     * Compare the cells for the given row from the source and target tables.
+     * Count and log any differences.
+     * If not a dry run, output a Put and/or Delete needed to sync the target table
+     * to match the source table.
+     */
+    private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
+        CellScanner targetCells) throws IOException, InterruptedException {
+      Put put = null;
+      Delete delete = null;
+      long matchingCells = 0;
+      boolean matchingRow = true;
+      Cell sourceCell = sourceCells.nextCellInRow();
+      Cell targetCell = targetCells.nextCellInRow();
+      while (sourceCell != null || targetCell != null) {
+
+        int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
+        if (cellKeyComparison < 0) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Target missing cell: " + sourceCell);
+          }
+          context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
+          matchingRow = false;
+
+          if (!dryRun) {
+            if (put == null) {
+              put = new Put(rowKey);
+            }
+            put.add(sourceCell);
+          }
+
+          sourceCell = sourceCells.nextCellInRow();
+        } else if (cellKeyComparison > 0) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Source missing cell: " + targetCell);
+          }
+          context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
+          matchingRow = false;
+
+          if (!dryRun) {
+            if (delete == null) {
+              delete = new Delete(rowKey);
+            }
+            // add a tombstone to exactly match the target cell that is missing on the source
+            delete.addColumn(CellUtil.cloneFamily(targetCell),
+                CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
+          }
+
+          targetCell = targetCells.nextCellInRow();
+        } else {
+          // the cell keys are equal, now check values
+          if (CellUtil.matchingValue(sourceCell, targetCell)) {
+            matchingCells++;
+          } else {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Different values: ");
+              LOG.debug("  source cell: " + sourceCell
+                  + " value: " + Bytes.toHex(sourceCell.getValueArray(),
+                      sourceCell.getValueOffset(), sourceCell.getValueLength()));
+              LOG.debug("  target cell: " + targetCell
+                  + " value: " + Bytes.toHex(targetCell.getValueArray(),
+                      targetCell.getValueOffset(), targetCell.getValueLength()));
+            }
+            context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
+            matchingRow = false;
+
+            if (!dryRun) {
+              // overwrite target cell
+              if (put == null) {
+                put = new Put(rowKey);
+              }
+              put.add(sourceCell);
+            }
+          }
+          sourceCell = sourceCells.nextCellInRow();
+          targetCell = targetCells.nextCellInRow();
+        }
+
+        if (!dryRun && sourceTableHash.scanBatch > 0) {
+          if (put != null && put.size() >= sourceTableHash.scanBatch) {
+            context.write(new ImmutableBytesWritable(rowKey), put);
+            put = null;
+          }
+          if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
+            context.write(new ImmutableBytesWritable(rowKey), delete);
+            delete = null;
+          }
+        }
+      }
+
+      if (!dryRun) {
+        if (put != null) {
+          context.write(new ImmutableBytesWritable(rowKey), put);
+        }
+        if (delete != null) {
+          context.write(new ImmutableBytesWritable(rowKey), delete);
+        }
+      }
+
+      if (matchingCells > 0) {
+        context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
+      }
+      if (matchingRow) {
+        context.getCounter(Counter.MATCHINGROWS).increment(1);
+        return true;
+      } else {
+        context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
+        return false;
+      }
+    }
+
+    /**
+     * Compare row keys of the given Result objects.
+     * Nulls are after non-nulls
+     */
+    private static int compareRowKeys(byte[] r1, byte[] r2) {
+      if (r1 == null) {
+        return 1;  // source missing row
+      } else if (r2 == null) {
+        return -1; // target missing row
+      } else {
+        // Sync on no META tables only. We can directly do what CellComparator is doing inside.
+        // Never the call going to MetaCellComparator.
+        return Bytes.compareTo(r1, 0, r1.length, r2, 0, r2.length);
+      }
+    }
+
+    /**
+     * Compare families, qualifiers, and timestamps of the given Cells.
+     * They are assumed to be of the same row.
+     * Nulls are after non-nulls.
+     */
+     private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
+      if (c1 == null) {
+        return 1; // source missing cell
+      }
+      if (c2 == null) {
+        return -1; // target missing cell
+      }
+
+      int result = CellComparator.compareFamilies(c1, c2);
+      if (result != 0) {
+        return result;
+      }
+
+      result = CellComparator.compareQualifiers(c1, c2);
+      if (result != 0) {
+        return result;
+      }
+
+      // note timestamp comparison is inverted - more recent cells first
+      return CellComparator.compareTimestamps(c1, c2);
+    }
+
+    @Override
+    protected void cleanup(Context context)
+        throws IOException, InterruptedException {
+      if (mapperException == null) {
+        try {
+          finishRemainingHashRanges(context);
+        } catch (Throwable t) {
+          mapperException = t;
+        }
+      }
+
+      try {
+        sourceTable.close();
+        targetTable.close();
+        sourceConnection.close();
+        targetConnection.close();
+      } catch (Throwable t) {
+        if (mapperException == null) {
+          mapperException = t;
+        } else {
+          LOG.error("Suppressing exception from closing tables", t);
+        }
+      }
+
+      // propagate first exception
+      if (mapperException != null) {
+        Throwables.propagateIfInstanceOf(mapperException, IOException.class);
+        Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
+        Throwables.propagate(mapperException);
+      }
+    }
+
+    private void finishRemainingHashRanges(Context context) throws IOException,
+        InterruptedException {
+      TableSplit split = (TableSplit) context.getInputSplit();
+      byte[] splitEndRow = split.getEndRow();
+      boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
+
+      // if there are more hash batches that begin before the end of this split move to them
+      while (nextSourceKey != null
+          && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
+        moveToNextBatch(context);
+      }
+
+      if (targetHasher.isBatchStarted()) {
+        // need to complete the final open hash batch
+
+        if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
+              || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
+          // the open hash range continues past the end of this region
+          // add a scan to complete the current hash range
+          Scan scan = sourceTableHash.initScan();
+          scan.setStartRow(splitEndRow);
+          if (nextSourceKey == null) {
+            scan.setStopRow(sourceTableHash.stopRow);
+          } else {
+            scan.setStopRow(nextSourceKey.copyBytes());
+          }
+
+          ResultScanner targetScanner = null;
+          try {
+            targetScanner = targetTable.getScanner(scan);
+            for (Result row : targetScanner) {
+              targetHasher.hashResult(row);
+            }
+          } finally {
+            if (targetScanner != null) {
+              targetScanner.close();
+            }
+          }
+        } // else current batch ends exactly at split end row
+
+        finishBatchAndCompareHashes(context);
+      }
+    }
+  }
+
+  private static final int NUM_ARGS = 3;
+  private static void printUsage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+      System.err.println();
+    }
+    System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
+    System.err.println();
+    System.err.println("Options:");
+
+    System.err.println(" sourcezkcluster  ZK cluster key of the source table");
+    System.err.println("                  (defaults to cluster in classpath's config)");
+    System.err.println(" targetzkcluster  ZK cluster key of the target table");
+    System.err.println("                  (defaults to cluster in classpath's config)");
+    System.err.println(" dryrun           if true, output counters but no writes");
+    System.err.println("                  (defaults to false)");
+    System.err.println();
+    System.err.println("Args:");
+    System.err.println(" sourcehashdir    path to HashTable output dir for source table");
+    System.err.println("                  (see org.apache.hadoop.hbase.mapreduce.HashTable)");
+    System.err.println(" sourcetable      Name of the source table to sync from");
+    System.err.println(" targettable      Name of the target table to sync to");
+    System.err.println();
+    System.err.println("Examples:");
+    System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
+    System.err.println(" to a local target cluster:");
+    System.err.println(" $ hbase " +
+        "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
+        + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
+        + " hdfs://nn:9000/hashes/tableA tableA tableA");
+  }
+
+  private boolean doCommandLine(final String[] args) {
+    if (args.length < NUM_ARGS) {
+      printUsage(null);
+      return false;
+    }
+    try {
+      sourceHashDir = new Path(args[args.length - 3]);
+      sourceTableName = args[args.length - 2];
+      targetTableName = args[args.length - 1];
+
+      for (int i = 0; i < args.length - NUM_ARGS; i++) {
+        String cmd = args[i];
+        if (cmd.equals("-h") || cmd.startsWith("--h")) {
+          printUsage(null);
+          return false;
+        }
+
+        final String sourceZkClusterKey = "--sourcezkcluster=";
+        if (cmd.startsWith(sourceZkClusterKey)) {
+          sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
+          continue;
+        }
+
+        final String targetZkClusterKey = "--targetzkcluster=";
+        if (cmd.startsWith(targetZkClusterKey)) {
+          targetZkCluster = cmd.substring(targetZkClusterKey.length());
+          continue;
+        }
+
+        final String dryRunKey = "--dryrun=";
+        if (cmd.startsWith(dryRunKey)) {
+          dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
+          continue;
+        }
+
+        printUsage("Invalid argument '" + cmd + "'");
+        return false;
+      }
+
+
+    } catch (Exception e) {
+      e.printStackTrace();
+      printUsage("Can't start because " + e.getMessage());
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Main entry point.
+   */
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
+    System.exit(ret);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
+    if (!doCommandLine(otherArgs)) {
+      return 1;
+    }
+
+    Job job = createSubmittableJob(otherArgs);
+    if (!job.waitForCompletion(true)) {
+      LOG.info("Map-reduce job failed!");
+      return 1;
+    }
+    counters = job.getCounters();
+    return 0;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
new file mode 100644
index 0000000..63868da
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
@@ -0,0 +1,294 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Convert HBase tabular data into a format that is consumable by Map/Reduce.
+ */
+@InterfaceAudience.Public
+public class TableInputFormat extends TableInputFormatBase
+implements Configurable {
+
+  @SuppressWarnings("hiding")
+  private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
+
+  /** Job parameter that specifies the input table. */
+  public static final String INPUT_TABLE = "hbase.mapreduce.inputtable";
+  /**
+   * If specified, use start keys of this table to split.
+   * This is useful when you are preparing data for bulkload.
+   */
+  private static final String SPLIT_TABLE = "hbase.mapreduce.splittable";
+  /** Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
+   * See {@link TableMapReduceUtil#convertScanToString(Scan)} for more details.
+   */
+  public static final String SCAN = "hbase.mapreduce.scan";
+  /** Scan start row */
+  public static final String SCAN_ROW_START = "hbase.mapreduce.scan.row.start";
+  /** Scan stop row */
+  public static final String SCAN_ROW_STOP = "hbase.mapreduce.scan.row.stop";
+  /** Column Family to Scan */
+  public static final String SCAN_COLUMN_FAMILY = "hbase.mapreduce.scan.column.family";
+  /** Space delimited list of columns and column families to scan. */
+  public static final String SCAN_COLUMNS = "hbase.mapreduce.scan.columns";
+  /** The timestamp used to filter columns with a specific timestamp. */
+  public static final String SCAN_TIMESTAMP = "hbase.mapreduce.scan.timestamp";
+  /** The starting timestamp used to filter columns with a specific range of versions. */
+  public static final String SCAN_TIMERANGE_START = "hbase.mapreduce.scan.timerange.start";
+  /** The ending timestamp used to filter columns with a specific range of versions. */
+  public static final String SCAN_TIMERANGE_END = "hbase.mapreduce.scan.timerange.end";
+  /** The maximum number of version to return. */
+  public static final String SCAN_MAXVERSIONS = "hbase.mapreduce.scan.maxversions";
+  /** Set to false to disable server-side caching of blocks for this scan. */
+  public static final String SCAN_CACHEBLOCKS = "hbase.mapreduce.scan.cacheblocks";
+  /** The number of rows for caching that will be passed to scanners. */
+  public static final String SCAN_CACHEDROWS = "hbase.mapreduce.scan.cachedrows";
+  /** Set the maximum number of values to return for each call to next(). */
+  public static final String SCAN_BATCHSIZE = "hbase.mapreduce.scan.batchsize";
+  /** Specify if we have to shuffle the map tasks. */
+  public static final String SHUFFLE_MAPS = "hbase.mapreduce.inputtable.shufflemaps";
+
+  /** The configuration. */
+  private Configuration conf = null;
+
+  /**
+   * Returns the current configuration.
+   *
+   * @return The current configuration.
+   * @see org.apache.hadoop.conf.Configurable#getConf()
+   */
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /**
+   * Sets the configuration. This is used to set the details for the table to
+   * be scanned.
+   *
+   * @param configuration  The configuration to set.
+   * @see org.apache.hadoop.conf.Configurable#setConf(
+   *   org.apache.hadoop.conf.Configuration)
+   */
+  @Override
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
+    justification="Intentional")
+  public void setConf(Configuration configuration) {
+    this.conf = configuration;
+
+    Scan scan = null;
+
+    if (conf.get(SCAN) != null) {
+      try {
+        scan = TableMapReduceUtil.convertStringToScan(conf.get(SCAN));
+      } catch (IOException e) {
+        LOG.error("An error occurred.", e);
+      }
+    } else {
+      try {
+        scan = createScanFromConfiguration(conf);
+      } catch (Exception e) {
+          LOG.error(StringUtils.stringifyException(e));
+      }
+    }
+
+    setScan(scan);
+  }
+
+  /**
+   * Sets up a {@link Scan} instance, applying settings from the configuration property
+   * constants defined in {@code TableInputFormat}.  This allows specifying things such as:
+   * <ul>
+   *   <li>start and stop rows</li>
+   *   <li>column qualifiers or families</li>
+   *   <li>timestamps or timerange</li>
+   *   <li>scanner caching and batch size</li>
+   * </ul>
+   */
+  public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
+    Scan scan = new Scan();
+
+    if (conf.get(SCAN_ROW_START) != null) {
+      scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
+    }
+
+    if (conf.get(SCAN_ROW_STOP) != null) {
+      scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
+    }
+
+    if (conf.get(SCAN_COLUMNS) != null) {
+      addColumns(scan, conf.get(SCAN_COLUMNS));
+    }
+
+    for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
+      scan.addFamily(Bytes.toBytes(columnFamily));
+    }
+
+    if (conf.get(SCAN_TIMESTAMP) != null) {
+      scan.setTimeStamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
+    }
+
+    if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
+      scan.setTimeRange(
+          Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
+          Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
+    }
+
+    if (conf.get(SCAN_MAXVERSIONS) != null) {
+      scan.setMaxVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
+    }
+
+    if (conf.get(SCAN_CACHEDROWS) != null) {
+      scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
+    }
+
+    if (conf.get(SCAN_BATCHSIZE) != null) {
+      scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
+    }
+
+    // false by default, full table scans generate too much BC churn
+    scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));
+
+    return scan;
+  }
+
+  @Override
+  protected void initialize(JobContext context) throws IOException {
+    // Do we have to worry about mis-matches between the Configuration from setConf and the one
+    // in this context?
+    TableName tableName = TableName.valueOf(conf.get(INPUT_TABLE));
+    try {
+      initializeTable(ConnectionFactory.createConnection(new Configuration(conf)), tableName);
+    } catch (Exception e) {
+      LOG.error(StringUtils.stringifyException(e));
+    }
+  }
+
+  /**
+   * Parses a combined family and qualifier and adds either both or just the
+   * family in case there is no qualifier. This assumes the older colon
+   * divided notation, e.g. "family:qualifier".
+   *
+   * @param scan The Scan to update.
+   * @param familyAndQualifier family and qualifier
+   * @throws IllegalArgumentException When familyAndQualifier is invalid.
+   */
+  private static void addColumn(Scan scan, byte[] familyAndQualifier) {
+    byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
+    if (fq.length == 1) {
+      scan.addFamily(fq[0]);
+    } else if (fq.length == 2) {
+      scan.addColumn(fq[0], fq[1]);
+    } else {
+      throw new IllegalArgumentException("Invalid familyAndQualifier provided.");
+    }
+  }
+
+  /**
+   * Adds an array of columns specified using old format, family:qualifier.
+   * <p>
+   * Overrides previous calls to {@link Scan#addColumn(byte[], byte[])}for any families in the
+   * input.
+   *
+   * @param scan The Scan to update.
+   * @param columns array of columns, formatted as <code>family:qualifier</code>
+   * @see Scan#addColumn(byte[], byte[])
+   */
+  public static void addColumns(Scan scan, byte [][] columns) {
+    for (byte[] column : columns) {
+      addColumn(scan, column);
+    }
+  }
+
+  /**
+   * Calculates the splits that will serve as input for the map tasks. The
+   * number of splits matches the number of regions in a table. Splits are shuffled if
+   * required.
+   * @param context  The current job context.
+   * @return The list of input splits.
+   * @throws IOException When creating the list of splits fails.
+   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+   *   org.apache.hadoop.mapreduce.JobContext)
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException {
+    List<InputSplit> splits = super.getSplits(context);
+    if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) {
+      Collections.shuffle(splits);
+    }
+    return splits;
+  }
+
+  /**
+   * Convenience method to parse a string representation of an array of column specifiers.
+   *
+   * @param scan The Scan to update.
+   * @param columns  The columns to parse.
+   */
+  private static void addColumns(Scan scan, String columns) {
+    String[] cols = columns.split(" ");
+    for (String col : cols) {
+      addColumn(scan, Bytes.toBytes(col));
+    }
+  }
+
+  @Override
+  protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
+    if (conf.get(SPLIT_TABLE) != null) {
+      TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
+      try (Connection conn = ConnectionFactory.createConnection(getConf())) {
+        try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
+          return rl.getStartEndKeys();
+        }
+      }
+    }
+
+    return super.getStartEndKeys();
+  }
+
+  /**
+   * Sets split table in map-reduce job.
+   */
+  public static void configureSplitTable(Job job, TableName tableName) {
+    job.getConfiguration().set(SPLIT_TABLE, tableName.getNameAsString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
new file mode 100644
index 0000000..fb38ebe
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -0,0 +1,652 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Addressing;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Strings;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName},
+ * an {@link Scan} instance that defines the input columns etc. Subclasses may use
+ * other TableRecordReader implementations.
+ *
+ * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
+ * function properly. Each of the entry points to this class used by the MapReduce framework,
+ * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
+ * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information. If your subclass overrides either of these
+ * methods, either call the parent version or call initialize yourself.
+ *
+ * <p>
+ * An example of a subclass:
+ * <pre>
+ *   class ExampleTIF extends TableInputFormatBase {
+ *
+ *     {@literal @}Override
+ *     protected void initialize(JobContext context) throws IOException {
+ *       // We are responsible for the lifecycle of this connection until we hand it over in
+ *       // initializeTable.
+ *       Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
+ *              job.getConfiguration()));
+ *       TableName tableName = TableName.valueOf("exampleTable");
+ *       // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
+ *       initializeTable(connection, tableName);
+ *       byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ *         Bytes.toBytes("columnB") };
+ *       // optional, by default we'll get everything for the table.
+ *       Scan scan = new Scan();
+ *       for (byte[] family : inputColumns) {
+ *         scan.addFamily(family);
+ *       }
+ *       Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ *       scan.setFilter(exampleFilter);
+ *       setScan(scan);
+ *     }
+ *   }
+ * </pre>
+ */
+@InterfaceAudience.Public
+public abstract class TableInputFormatBase
+extends InputFormat<ImmutableBytesWritable, Result> {
+
+  /** Specify if we enable auto-balance for input in M/R jobs.*/
+  public static final String MAPREDUCE_INPUT_AUTOBALANCE = "hbase.mapreduce.input.autobalance";
+  /** Specify if ratio for data skew in M/R jobs, it goes well with the enabling hbase.mapreduce
+   * .input.autobalance property.*/
+  public static final String INPUT_AUTOBALANCE_MAXSKEWRATIO = "hbase.mapreduce.input.autobalance" +
+          ".maxskewratio";
+  /** Specify if the row key in table is text (ASCII between 32~126),
+   * default is true. False means the table is using binary row key*/
+  public static final String TABLE_ROW_TEXTKEY = "hbase.table.row.textkey";
+
+  private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
+
+  private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
+      "initialized. Ensure you call initializeTable either in your constructor or initialize " +
+      "method";
+  private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
+            " previous error. Please look at the previous logs lines from" +
+            " the task's full log for more details.";
+
+  /** Holds the details for the internal scanner.
+   *
+   * @see Scan */
+  private Scan scan = null;
+  /** The {@link Admin}. */
+  private Admin admin;
+  /** The {@link Table} to scan. */
+  private Table table;
+  /** The {@link RegionLocator} of the table. */
+  private RegionLocator regionLocator;
+  /** The reader scanning the table, can be a custom one. */
+  private TableRecordReader tableRecordReader = null;
+  /** The underlying {@link Connection} of the table. */
+  private Connection connection;
+
+
+  /** The reverse DNS lookup cache mapping: IPAddress => HostName */
+  private HashMap<InetAddress, String> reverseDNSCacheMap = new HashMap<>();
+
+  /**
+   * Builds a {@link TableRecordReader}. If no {@link TableRecordReader} was provided, uses
+   * the default.
+   *
+   * @param split  The split to work with.
+   * @param context  The current context.
+   * @return The newly created record reader.
+   * @throws IOException When creating the reader fails.
+   * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
+   *   org.apache.hadoop.mapreduce.InputSplit,
+   *   org.apache.hadoop.mapreduce.TaskAttemptContext)
+   */
+  @Override
+  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+      InputSplit split, TaskAttemptContext context)
+  throws IOException {
+    // Just in case a subclass is relying on JobConfigurable magic.
+    if (table == null) {
+      initialize(context);
+    }
+    // null check in case our child overrides getTable to not throw.
+    try {
+      if (getTable() == null) {
+        // initialize() must not have been implemented in the subclass.
+        throw new IOException(INITIALIZATION_ERROR);
+      }
+    } catch (IllegalStateException exception) {
+      throw new IOException(INITIALIZATION_ERROR, exception);
+    }
+    TableSplit tSplit = (TableSplit) split;
+    LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
+    final TableRecordReader trr =
+        this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
+    Scan sc = new Scan(this.scan);
+    sc.setStartRow(tSplit.getStartRow());
+    sc.setStopRow(tSplit.getEndRow());
+    trr.setScan(sc);
+    trr.setTable(getTable());
+    return new RecordReader<ImmutableBytesWritable, Result>() {
+
+      @Override
+      public void close() throws IOException {
+        trr.close();
+        closeTable();
+      }
+
+      @Override
+      public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+        return trr.getCurrentKey();
+      }
+
+      @Override
+      public Result getCurrentValue() throws IOException, InterruptedException {
+        return trr.getCurrentValue();
+      }
+
+      @Override
+      public float getProgress() throws IOException, InterruptedException {
+        return trr.getProgress();
+      }
+
+      @Override
+      public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException,
+          InterruptedException {
+        trr.initialize(inputsplit, context);
+      }
+
+      @Override
+      public boolean nextKeyValue() throws IOException, InterruptedException {
+        return trr.nextKeyValue();
+      }
+    };
+  }
+
+  protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
+    return getRegionLocator().getStartEndKeys();
+  }
+
+  /**
+   * Calculates the splits that will serve as input for the map tasks. The
+   * number of splits matches the number of regions in a table.
+   *
+   * @param context  The current job context.
+   * @return The list of input splits.
+   * @throws IOException When creating the list of splits fails.
+   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+   *   org.apache.hadoop.mapreduce.JobContext)
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException {
+    boolean closeOnFinish = false;
+
+    // Just in case a subclass is relying on JobConfigurable magic.
+    if (table == null) {
+      initialize(context);
+      closeOnFinish = true;
+    }
+
+    // null check in case our child overrides getTable to not throw.
+    try {
+      if (getTable() == null) {
+        // initialize() must not have been implemented in the subclass.
+        throw new IOException(INITIALIZATION_ERROR);
+      }
+    } catch (IllegalStateException exception) {
+      throw new IOException(INITIALIZATION_ERROR, exception);
+    }
+
+    try {
+      RegionSizeCalculator sizeCalculator =
+          new RegionSizeCalculator(getRegionLocator(), getAdmin());
+
+      TableName tableName = getTable().getName();
+
+      Pair<byte[][], byte[][]> keys = getStartEndKeys();
+      if (keys == null || keys.getFirst() == null ||
+          keys.getFirst().length == 0) {
+        HRegionLocation regLoc =
+            getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
+        if (null == regLoc) {
+          throw new IOException("Expecting at least one region.");
+        }
+        List<InputSplit> splits = new ArrayList<>(1);
+        long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
+        TableSplit split = new TableSplit(tableName, scan,
+            HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
+                .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
+        splits.add(split);
+        return splits;
+      }
+      List<InputSplit> splits = new ArrayList<>(keys.getFirst().length);
+      for (int i = 0; i < keys.getFirst().length; i++) {
+        if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+          continue;
+        }
+
+        byte[] startRow = scan.getStartRow();
+        byte[] stopRow = scan.getStopRow();
+        // determine if the given start an stop key fall into the region
+        if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+            Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+            (stopRow.length == 0 ||
+             Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
+          byte[] splitStart = startRow.length == 0 ||
+            Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
+              keys.getFirst()[i] : startRow;
+          byte[] splitStop = (stopRow.length == 0 ||
+            Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
+            keys.getSecond()[i].length > 0 ?
+              keys.getSecond()[i] : stopRow;
+
+          HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
+          // The below InetSocketAddress creation does a name resolution.
+          InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
+          if (isa.isUnresolved()) {
+            LOG.warn("Failed resolve " + isa);
+          }
+          InetAddress regionAddress = isa.getAddress();
+          String regionLocation;
+          regionLocation = reverseDNS(regionAddress);
+
+          byte[] regionName = location.getRegionInfo().getRegionName();
+          String encodedRegionName = location.getRegionInfo().getEncodedName();
+          long regionSize = sizeCalculator.getRegionSize(regionName);
+          TableSplit split = new TableSplit(tableName, scan,
+            splitStart, splitStop, regionLocation, encodedRegionName, regionSize);
+          splits.add(split);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("getSplits: split -> " + i + " -> " + split);
+          }
+        }
+      }
+      //The default value of "hbase.mapreduce.input.autobalance" is false, which means not enabled.
+      boolean enableAutoBalance = context.getConfiguration()
+        .getBoolean(MAPREDUCE_INPUT_AUTOBALANCE, false);
+      if (enableAutoBalance) {
+        long totalRegionSize=0;
+        for (int i = 0; i < splits.size(); i++){
+          TableSplit ts = (TableSplit)splits.get(i);
+          totalRegionSize += ts.getLength();
+        }
+        long averageRegionSize = totalRegionSize / splits.size();
+        // the averageRegionSize must be positive.
+        if (averageRegionSize <= 0) {
+            LOG.warn("The averageRegionSize is not positive: "+ averageRegionSize + ", " +
+                    "set it to 1.");
+            averageRegionSize = 1;
+        }
+        return calculateRebalancedSplits(splits, context, averageRegionSize);
+      } else {
+        return splits;
+      }
+    } finally {
+      if (closeOnFinish) {
+        closeTable();
+      }
+    }
+  }
+
+  String reverseDNS(InetAddress ipAddress) throws UnknownHostException {
+    String hostName = this.reverseDNSCacheMap.get(ipAddress);
+    if (hostName == null) {
+      String ipAddressString = null;
+      try {
+        ipAddressString = DNS.reverseDns(ipAddress, null);
+      } catch (Exception e) {
+        // We can use InetAddress in case the jndi failed to pull up the reverse DNS entry from the
+        // name service. Also, in case of ipv6, we need to use the InetAddress since resolving
+        // reverse DNS using jndi doesn't work well with ipv6 addresses.
+        ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
+      }
+      if (ipAddressString == null) throw new UnknownHostException("No host found for " + ipAddress);
+      hostName = Strings.domainNamePointerToHostName(ipAddressString);
+      this.reverseDNSCacheMap.put(ipAddress, hostName);
+    }
+    return hostName;
+  }
+
+  /**
+   * Calculates the number of MapReduce input splits for the map tasks. The number of
+   * MapReduce input splits depends on the average region size and the "data skew ratio" user set in
+   * configuration.
+   *
+   * @param list  The list of input splits before balance.
+   * @param context  The current job context.
+   * @param average  The average size of all regions .
+   * @return The list of input splits.
+   * @throws IOException When creating the list of splits fails.
+   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+   *   org.apache.hadoop.mapreduce.JobContext)
+   */
+  private List<InputSplit> calculateRebalancedSplits(List<InputSplit> list, JobContext context,
+                                               long average) throws IOException {
+    List<InputSplit> resultList = new ArrayList<>();
+    Configuration conf = context.getConfiguration();
+    //The default data skew ratio is 3
+    long dataSkewRatio = conf.getLong(INPUT_AUTOBALANCE_MAXSKEWRATIO, 3);
+    //It determines which mode to use: text key mode or binary key mode. The default is text mode.
+    boolean isTextKey = context.getConfiguration().getBoolean(TABLE_ROW_TEXTKEY, true);
+    long dataSkewThreshold = dataSkewRatio * average;
+    int count = 0;
+    while (count < list.size()) {
+      TableSplit ts = (TableSplit)list.get(count);
+      TableName tableName = ts.getTable();
+      String regionLocation = ts.getRegionLocation();
+      String encodedRegionName = ts.getEncodedRegionName();
+      long regionSize = ts.getLength();
+      if (regionSize >= dataSkewThreshold) {
+        // if the current region size is large than the data skew threshold,
+        // split the region into two MapReduce input splits.
+        byte[] splitKey = getSplitKey(ts.getStartRow(), ts.getEndRow(), isTextKey);
+        if (Arrays.equals(ts.getEndRow(), splitKey)) {
+          // Not splitting since the end key is the same as the split key
+          resultList.add(ts);
+        } else {
+          //Set the size of child TableSplit as 1/2 of the region size. The exact size of the
+          // MapReduce input splits is not far off.
+          TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), splitKey,
+              regionLocation, regionSize / 2);
+          TableSplit t2 = new TableSplit(tableName, scan, splitKey, ts.getEndRow(), regionLocation,
+              regionSize - regionSize / 2);
+          resultList.add(t1);
+          resultList.add(t2);
+        }
+        count++;
+      } else if (regionSize >= average) {
+        // if the region size between average size and data skew threshold size,
+        // make this region as one MapReduce input split.
+        resultList.add(ts);
+        count++;
+      } else {
+        // if the total size of several small continuous regions less than the average region size,
+        // combine them into one MapReduce input split.
+        long totalSize = regionSize;
+        byte[] splitStartKey = ts.getStartRow();
+        byte[] splitEndKey = ts.getEndRow();
+        count++;
+        for (; count < list.size(); count++) {
+          TableSplit nextRegion = (TableSplit)list.get(count);
+          long nextRegionSize = nextRegion.getLength();
+          if (totalSize + nextRegionSize <= dataSkewThreshold) {
+            totalSize = totalSize + nextRegionSize;
+            splitEndKey = nextRegion.getEndRow();
+          } else {
+            break;
+          }
+        }
+        TableSplit t = new TableSplit(tableName, scan, splitStartKey, splitEndKey,
+                regionLocation, encodedRegionName, totalSize);
+        resultList.add(t);
+      }
+    }
+    return resultList;
+  }
+
+  /**
+   * select a split point in the region. The selection of the split point is based on an uniform
+   * distribution assumption for the keys in a region.
+   * Here are some examples:
+   *
+   * <table>
+   *   <tr>
+   *     <th>start key</th>
+   *     <th>end key</th>
+   *     <th>is text</th>
+   *     <th>split point</th>
+   *   </tr>
+   *   <tr>
+   *     <td>'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'</td>
+   *     <td>'a', 'a', 'a', 'f', 'f', 'f'</td>
+   *     <td>true</td>
+   *     <td>'a', 'a', 'a', 'd', 'd', -78, 50, -77, 51</td>
+   *   </tr>
+   *   <tr>
+   *     <td>'1', '1', '1', '0', '0', '0'</td>
+   *     <td>'1', '1', '2', '5', '7', '9', '0'</td>
+   *     <td>true</td>
+   *     <td>'1', '1', '1', -78, -77, -76, -104</td>
+   *   </tr>
+   *   <tr>
+   *     <td>'1', '1', '1', '0'</td>
+   *     <td>'1', '1', '2', '0'</td>
+   *     <td>true</td>
+   *     <td>'1', '1', '1', -80</td>
+   *   </tr>
+   *   <tr>
+   *     <td>13, -19, 126, 127</td>
+   *     <td>13, -19, 127, 0</td>
+   *     <td>false</td>
+   *     <td>13, -19, 126, -65</td>
+   *   </tr>
+   * </table>
+   *
+   * Set this function as "public static", make it easier for test.
+   *
+   * @param start Start key of the region
+   * @param end End key of the region
+   * @param isText It determines to use text key mode or binary key mode
+   * @return The split point in the region.
+   */
+  @InterfaceAudience.Private
+  public static byte[] getSplitKey(byte[] start, byte[] end, boolean isText) {
+    byte upperLimitByte;
+    byte lowerLimitByte;
+    //Use text mode or binary mode.
+    if (isText) {
+      //The range of text char set in ASCII is [32,126], the lower limit is space and the upper
+      // limit is '~'.
+      upperLimitByte = '~';
+      lowerLimitByte = ' ';
+    } else {
+      upperLimitByte = -1;
+      lowerLimitByte = 0;
+    }
+    // For special case
+    // Example 1 : startkey=null, endkey="hhhqqqwww", splitKey="h"
+    // Example 2 (text key mode): startKey="ffffaaa", endKey=null, splitkey="f~~~~~~"
+    if (start.length == 0 && end.length == 0){
+      return new byte[]{(byte) ((lowerLimitByte + upperLimitByte) / 2)};
+    }
+    if (start.length == 0 && end.length != 0){
+      return new byte[]{ end[0] };
+    }
+    if (start.length != 0 && end.length == 0){
+      byte[] result =new byte[start.length];
+      result[0]=start[0];
+      for (int k = 1; k < start.length; k++){
+          result[k] = upperLimitByte;
+      }
+      return result;
+    }
+    return Bytes.split(start, end, false, 1)[1];
+  }
+
+  /**
+   * Test if the given region is to be included in the InputSplit while splitting
+   * the regions of a table.
+   * <p>
+   * This optimization is effective when there is a specific reasoning to exclude an entire region from the M-R job,
+   * (and hence, not contributing to the InputSplit), given the start and end keys of the same. <br>
+   * Useful when we need to remember the last-processed top record and revisit the [last, current) interval for M-R processing,
+   * continuously. In addition to reducing InputSplits, reduces the load on the region server as well, due to the ordering of the keys.
+   * <br>
+   * <br>
+   * Note: It is possible that <code>endKey.length() == 0 </code> , for the last (recent) region.
+   * <br>
+   * Override this method, if you want to bulk exclude regions altogether from M-R. By default, no region is excluded( i.e. all regions are included).
+   *
+   *
+   * @param startKey Start key of the region
+   * @param endKey End key of the region
+   * @return true, if this region needs to be included as part of the input (default).
+   *
+   */
+  protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
+    return true;
+  }
+
+  /**
+   * Allows subclasses to get the {@link RegionLocator}.
+   */
+  protected RegionLocator getRegionLocator() {
+    if (regionLocator == null) {
+      throw new IllegalStateException(NOT_INITIALIZED);
+    }
+    return regionLocator;
+  }
+
+  /**
+   * Allows subclasses to get the {@link Table}.
+   */
+  protected Table getTable() {
+    if (table == null) {
+      throw new IllegalStateException(NOT_INITIALIZED);
+    }
+    return table;
+  }
+
+  /**
+   * Allows subclasses to get the {@link Admin}.
+   */
+  protected Admin getAdmin() {
+    if (admin == null) {
+      throw new IllegalStateException(NOT_INITIALIZED);
+    }
+    return admin;
+  }
+
+  /**
+   * Allows subclasses to initialize the table information.
+   *
+   * @param connection  The Connection to the HBase cluster. MUST be unmanaged. We will close.
+   * @param tableName  The {@link TableName} of the table to process.
+   * @throws IOException
+   */
+  protected void initializeTable(Connection connection, TableName tableName) throws IOException {
+    if (this.table != null || this.connection != null) {
+      LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
+          "reference; TableInputFormatBase will not close these old references when done.");
+    }
+    this.table = connection.getTable(tableName);
+    this.regionLocator = connection.getRegionLocator(tableName);
+    this.admin = connection.getAdmin();
+    this.connection = connection;
+  }
+
+  /**
+   * Gets the scan defining the actual details like columns etc.
+   *
+   * @return The internal scan instance.
+   */
+  public Scan getScan() {
+    if (this.scan == null) this.scan = new Scan();
+    return scan;
+  }
+
+  /**
+   * Sets the scan defining the actual details like columns etc.
+   *
+   * @param scan  The scan to set.
+   */
+  public void setScan(Scan scan) {
+    this.scan = scan;
+  }
+
+  /**
+   * Allows subclasses to set the {@link TableRecordReader}.
+   *
+   * @param tableRecordReader A different {@link TableRecordReader}
+   *   implementation.
+   */
+  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+    this.tableRecordReader = tableRecordReader;
+  }
+
+  /**
+   * Handle subclass specific set up.
+   * Each of the entry points used by the MapReduce framework,
+   * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
+   * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
+   * retrieving the necessary configuration information and calling
+   * {@link #initializeTable(Connection, TableName)}.
+   *
+   * Subclasses should implement their initialize call such that it is safe to call multiple times.
+   * The current TableInputFormatBase implementation relies on a non-null table reference to decide
+   * if an initialize call is needed, but this behavior may change in the future. In particular,
+   * it is critical that initializeTable not be called multiple times since this will leak
+   * Connection instances.
+   *
+   */
+  protected void initialize(JobContext context) throws IOException {
+  }
+
+  /**
+   * Close the Table and related objects that were initialized via
+   * {@link #initializeTable(Connection, TableName)}.
+   *
+   * @throws IOException
+   */
+  protected void closeTable() throws IOException {
+    close(admin, table, regionLocator, connection);
+    admin = null;
+    table = null;
+    regionLocator = null;
+    connection = null;
+  }
+
+  private void close(Closeable... closables) throws IOException {
+    for (Closeable c : closables) {
+      if(c != null) { c.close(); }
+    }
+  }
+
+}

[34/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
new file mode 100644
index 0000000..ff458ff
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
@@ -0,0 +1,1027 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.token.TokenUtil;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKConfig;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.StringUtils;
+
+import com.codahale.metrics.MetricRegistry;
+
+/**
+ * Utility for {@link TableMapper} and {@link TableReducer}
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+@InterfaceAudience.Public
+public class TableMapReduceUtil {
+  private static final Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(String table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job)
+  throws IOException {
+    initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
+        job, true);
+  }
+
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(TableName table,
+      Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass,
+      Job job) throws IOException {
+    initTableMapperJob(table.getNameAsString(),
+        scan,
+        mapper,
+        outputKeyClass,
+        outputValueClass,
+        job,
+        true);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table Binary representation of the table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @throws IOException When setting up the details fails.
+   */
+   public static void initTableMapperJob(byte[] table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job)
+  throws IOException {
+      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
+              job, true);
+  }
+
+   /**
+    * Use this before submitting a TableMap job. It will appropriately set up
+    * the job.
+    *
+    * @param table  The table name to read from.
+    * @param scan  The scan instance with the columns, time range etc.
+    * @param mapper  The mapper class to use.
+    * @param outputKeyClass  The class of the output key.
+    * @param outputValueClass  The class of the output value.
+    * @param job  The current job to adjust.  Make sure the passed job is
+    * carrying all necessary HBase configuration.
+    * @param addDependencyJars upload HBase jars and jars for any of the configured
+    *           job classes via the distributed cache (tmpjars).
+    * @throws IOException When setting up the details fails.
+    */
+   public static void initTableMapperJob(String table, Scan scan,
+       Class<? extends TableMapper> mapper,
+       Class<?> outputKeyClass,
+       Class<?> outputValueClass, Job job,
+       boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
+   throws IOException {
+     initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
+         addDependencyJars, true, inputFormatClass);
+   }
+
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @param initCredentials whether to initialize hbase auth credentials for the job
+   * @param inputFormatClass the input format
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(String table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars, boolean initCredentials,
+      Class<? extends InputFormat> inputFormatClass)
+  throws IOException {
+    job.setInputFormatClass(inputFormatClass);
+    if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
+    if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
+    job.setMapperClass(mapper);
+    if (Put.class.equals(outputValueClass)) {
+      job.setCombinerClass(PutCombiner.class);
+    }
+    Configuration conf = job.getConfiguration();
+    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+    conf.set(TableInputFormat.INPUT_TABLE, table);
+    conf.set(TableInputFormat.SCAN, convertScanToString(scan));
+    conf.setStrings("io.serializations", conf.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+        KeyValueSerialization.class.getName());
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
+    if (initCredentials) {
+      initCredentials(job);
+    }
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table Binary representation of the table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @param inputFormatClass The class of the input format
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(byte[] table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
+  throws IOException {
+      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
+              outputValueClass, job, addDependencyJars, inputFormatClass);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table Binary representation of the table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(byte[] table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars)
+  throws IOException {
+      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
+              outputValueClass, job, addDependencyJars, TableInputFormat.class);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(String table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars)
+  throws IOException {
+      initTableMapperJob(table, scan, mapper, outputKeyClass,
+              outputValueClass, job, addDependencyJars, TableInputFormat.class);
+  }
+
+  /**
+   * Enable a basic on-heap cache for these jobs. Any BlockCache implementation based on
+   * direct memory will likely cause the map tasks to OOM when opening the region. This
+   * is done here instead of in TableSnapshotRegionRecordReader in case an advanced user
+   * wants to override this behavior in their job.
+   */
+  public static void resetCacheConfig(Configuration conf) {
+    conf.setFloat(
+      HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
+    conf.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0f);
+    conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
+  }
+
+  /**
+   * Sets up the job for reading from one or more table snapshots, with one or more scans
+   * per snapshot.
+   * It bypasses hbase servers and read directly from snapshot files.
+   *
+   * @param snapshotScans     map of snapshot name to scans on that snapshot.
+   * @param mapper            The mapper class to use.
+   * @param outputKeyClass    The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job               The current job to adjust.  Make sure the passed job is
+   *                          carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *                          job classes via the distributed cache (tmpjars).
+   */
+  public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
+      Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
+      Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
+    MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);
+
+    job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
+    if (outputValueClass != null) {
+      job.setMapOutputValueClass(outputValueClass);
+    }
+    if (outputKeyClass != null) {
+      job.setMapOutputKeyClass(outputKeyClass);
+    }
+    job.setMapperClass(mapper);
+    Configuration conf = job.getConfiguration();
+    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+
+    if (addDependencyJars) {
+      addDependencyJars(job);
+      addDependencyJarsForClasses(job.getConfiguration(), MetricRegistry.class);
+    }
+
+    resetCacheConfig(job.getConfiguration());
+  }
+
+  /**
+   * Sets up the job for reading from a table snapshot. It bypasses hbase servers
+   * and read directly from snapshot files.
+   *
+   * @param snapshotName The name of the snapshot (of a table) to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   *
+   * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
+   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+   * After the job is finished, restore directory can be deleted.
+   * @throws IOException When setting up the details fails.
+   * @see TableSnapshotInputFormat
+   */
+  public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars, Path tmpRestoreDir)
+  throws IOException {
+    TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
+    initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
+        outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
+    resetCacheConfig(job.getConfiguration());
+  }
+
+  /**
+   * Use this before submitting a Multi TableMap job. It will appropriately set
+   * up the job.
+   *
+   * @param scans The list of {@link Scan} objects to read from.
+   * @param mapper The mapper class to use.
+   * @param outputKeyClass The class of the output key.
+   * @param outputValueClass The class of the output value.
+   * @param job The current job to adjust. Make sure the passed job is carrying
+   *          all necessary HBase configuration.
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(List<Scan> scans,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job) throws IOException {
+    initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
+        true);
+  }
+
+  /**
+   * Use this before submitting a Multi TableMap job. It will appropriately set
+   * up the job.
+   *
+   * @param scans The list of {@link Scan} objects to read from.
+   * @param mapper The mapper class to use.
+   * @param outputKeyClass The class of the output key.
+   * @param outputValueClass The class of the output value.
+   * @param job The current job to adjust. Make sure the passed job is carrying
+   *          all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the
+   *          configured job classes via the distributed cache (tmpjars).
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(List<Scan> scans,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars) throws IOException {
+    initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
+      addDependencyJars, true);
+  }
+
+  /**
+   * Use this before submitting a Multi TableMap job. It will appropriately set
+   * up the job.
+   *
+   * @param scans The list of {@link Scan} objects to read from.
+   * @param mapper The mapper class to use.
+   * @param outputKeyClass The class of the output key.
+   * @param outputValueClass The class of the output value.
+   * @param job The current job to adjust. Make sure the passed job is carrying
+   *          all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the
+   *          configured job classes via the distributed cache (tmpjars).
+   * @param initCredentials whether to initialize hbase auth credentials for the job
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(List<Scan> scans,
+      Class<? extends TableMapper> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, Job job,
+      boolean addDependencyJars,
+      boolean initCredentials) throws IOException {
+    job.setInputFormatClass(MultiTableInputFormat.class);
+    if (outputValueClass != null) {
+      job.setMapOutputValueClass(outputValueClass);
+    }
+    if (outputKeyClass != null) {
+      job.setMapOutputKeyClass(outputKeyClass);
+    }
+    job.setMapperClass(mapper);
+    Configuration conf = job.getConfiguration();
+    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+    List<String> scanStrings = new ArrayList<>();
+
+    for (Scan scan : scans) {
+      scanStrings.add(convertScanToString(scan));
+    }
+    job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
+      scanStrings.toArray(new String[scanStrings.size()]));
+
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
+
+    if (initCredentials) {
+      initCredentials(job);
+    }
+  }
+
+  public static void initCredentials(Job job) throws IOException {
+    UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
+    if (userProvider.isHadoopSecurityEnabled()) {
+      // propagate delegation related props from launcher job to MR job
+      if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+        job.getConfiguration().set("mapreduce.job.credentials.binary",
+                                   System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+      }
+    }
+
+    if (userProvider.isHBaseSecurityEnabled()) {
+      try {
+        // init credentials for remote cluster
+        String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
+        User user = userProvider.getCurrent();
+        if (quorumAddress != null) {
+          Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
+              quorumAddress, TableOutputFormat.OUTPUT_CONF_PREFIX);
+          Connection peerConn = ConnectionFactory.createConnection(peerConf);
+          try {
+            TokenUtil.addTokenForJob(peerConn, user, job);
+          } finally {
+            peerConn.close();
+          }
+        }
+
+        Connection conn = ConnectionFactory.createConnection(job.getConfiguration());
+        try {
+          TokenUtil.addTokenForJob(conn, user, job);
+        } finally {
+          conn.close();
+        }
+      } catch (InterruptedException ie) {
+        LOG.info("Interrupted obtaining user authentication token");
+        Thread.currentThread().interrupt();
+      }
+    }
+  }
+
+  /**
+   * Obtain an authentication token, for the specified cluster, on behalf of the current user
+   * and add it to the credentials for the given map reduce job.
+   *
+   * The quorumAddress is the key to the ZK ensemble, which contains:
+   * hbase.zookeeper.quorum, hbase.zookeeper.client.port and
+   * zookeeper.znode.parent
+   *
+   * @param job The job that requires the permission.
+   * @param quorumAddress string that contains the 3 required configuratins
+   * @throws IOException When the authentication token cannot be obtained.
+   * @deprecated Since 1.2.0, use {@link #initCredentialsForCluster(Job, Configuration)} instead.
+   */
+  @Deprecated
+  public static void initCredentialsForCluster(Job job, String quorumAddress)
+      throws IOException {
+    Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
+        quorumAddress);
+    initCredentialsForCluster(job, peerConf);
+  }
+
+  /**
+   * Obtain an authentication token, for the specified cluster, on behalf of the current user
+   * and add it to the credentials for the given map reduce job.
+   *
+   * @param job The job that requires the permission.
+   * @param conf The configuration to use in connecting to the peer cluster
+   * @throws IOException When the authentication token cannot be obtained.
+   */
+  public static void initCredentialsForCluster(Job job, Configuration conf)
+      throws IOException {
+    UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
+    if (userProvider.isHBaseSecurityEnabled()) {
+      try {
+        Connection peerConn = ConnectionFactory.createConnection(conf);
+        try {
+          TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
+        } finally {
+          peerConn.close();
+        }
+      } catch (InterruptedException e) {
+        LOG.info("Interrupted obtaining user authentication token");
+        Thread.interrupted();
+      }
+    }
+  }
+
+  /**
+   * Writes the given scan into a Base64 encoded string.
+   *
+   * @param scan  The scan to write out.
+   * @return The scan saved in a Base64 encoded string.
+   * @throws IOException When writing the scan fails.
+   */
+  public static String convertScanToString(Scan scan) throws IOException {
+    ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
+    return Base64.encodeBytes(proto.toByteArray());
+  }
+
+  /**
+   * Converts the given Base64 string back into a Scan instance.
+   *
+   * @param base64  The scan details.
+   * @return The newly created Scan instance.
+   * @throws IOException When reading the scan instance fails.
+   */
+  public static Scan convertStringToScan(String base64) throws IOException {
+    byte [] decoded = Base64.decode(base64);
+    return ProtobufUtil.toScan(ClientProtos.Scan.parseFrom(decoded));
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job)
+  throws IOException {
+    initTableReducerJob(table, reducer, job, null);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job,
+    Class partitioner) throws IOException {
+    initTableReducerJob(table, reducer, job, partitioner, null, null, null);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param quorumAddress Distant cluster to write to; default is null for
+   * output to the cluster that is designated in <code>hbase-site.xml</code>.
+   * Set this String to the zookeeper ensemble of an alternate remote cluster
+   * when you would have the reduce write a cluster that is other than the
+   * default; e.g. copying tables between clusters, the source would be
+   * designated by <code>hbase-site.xml</code> and this param would have the
+   * ensemble address of the remote cluster.  The format to pass is particular.
+   * Pass <code> &lt;hbase.zookeeper.quorum&gt;:&lt;
+   *             hbase.zookeeper.client.port&gt;:&lt;zookeeper.znode.parent&gt;
+   * </code> such as <code>server,server2,server3:2181:/hbase</code>.
+   * @param serverClass redefined hbase.regionserver.class
+   * @param serverImpl redefined hbase.regionserver.impl
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job,
+    Class partitioner, String quorumAddress, String serverClass,
+    String serverImpl) throws IOException {
+    initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
+        serverClass, serverImpl, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param quorumAddress Distant cluster to write to; default is null for
+   * output to the cluster that is designated in <code>hbase-site.xml</code>.
+   * Set this String to the zookeeper ensemble of an alternate remote cluster
+   * when you would have the reduce write a cluster that is other than the
+   * default; e.g. copying tables between clusters, the source would be
+   * designated by <code>hbase-site.xml</code> and this param would have the
+   * ensemble address of the remote cluster.  The format to pass is particular.
+   * Pass <code> &lt;hbase.zookeeper.quorum&gt;:&lt;
+   *             hbase.zookeeper.client.port&gt;:&lt;zookeeper.znode.parent&gt;
+   * </code> such as <code>server,server2,server3:2181:/hbase</code>.
+   * @param serverClass redefined hbase.regionserver.class
+   * @param serverImpl redefined hbase.regionserver.impl
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job,
+    Class partitioner, String quorumAddress, String serverClass,
+    String serverImpl, boolean addDependencyJars) throws IOException {
+
+    Configuration conf = job.getConfiguration();
+    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+    job.setOutputFormatClass(TableOutputFormat.class);
+    if (reducer != null) job.setReducerClass(reducer);
+    conf.set(TableOutputFormat.OUTPUT_TABLE, table);
+    conf.setStrings("io.serializations", conf.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName());
+    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
+    if (quorumAddress != null) {
+      // Calling this will validate the format
+      ZKConfig.validateClusterKey(quorumAddress);
+      conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
+    }
+    if (serverClass != null && serverImpl != null) {
+      conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
+      conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
+    }
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(Writable.class);
+    if (partitioner == HRegionPartitioner.class) {
+      job.setPartitionerClass(HRegionPartitioner.class);
+      int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
+      if (job.getNumReduceTasks() > regions) {
+        job.setNumReduceTasks(regions);
+      }
+    } else if (partitioner != null) {
+      job.setPartitionerClass(partitioner);
+    }
+
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
+
+    initCredentials(job);
+  }
+
+  /**
+   * Ensures that the given number of reduce tasks for the given job
+   * configuration does not exceed the number of regions for the given table.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  public static void limitNumReduceTasks(String table, Job job)
+  throws IOException {
+    int regions =
+      MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table));
+    if (job.getNumReduceTasks() > regions)
+      job.setNumReduceTasks(regions);
+  }
+
+  /**
+   * Sets the number of reduce tasks for the given job configuration to the
+   * number of regions the given table has.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  public static void setNumReduceTasks(String table, Job job)
+  throws IOException {
+    job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),
+       TableName.valueOf(table)));
+  }
+
+  /**
+   * Sets the number of rows to return and cache with each scanner iteration.
+   * Higher caching values will enable faster mapreduce jobs at the expense of
+   * requiring more heap to contain the cached rows.
+   *
+   * @param job The current job to adjust.
+   * @param batchSize The number of rows to return in batch with each scanner
+   * iteration.
+   */
+  public static void setScannerCaching(Job job, int batchSize) {
+    job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
+  }
+
+  /**
+   * Add HBase and its dependencies (only) to the job configuration.
+   * <p>
+   * This is intended as a low-level API, facilitating code reuse between this
+   * class and its mapred counterpart. It also of use to external tools that
+   * need to build a MapReduce job that interacts with HBase but want
+   * fine-grained control over the jars shipped to the cluster.
+   * </p>
+   * @param conf The Configuration object to extend with dependencies.
+   * @see org.apache.hadoop.hbase.mapred.TableMapReduceUtil
+   * @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
+   */
+  public static void addHBaseDependencyJars(Configuration conf) throws IOException {
+
+    // PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
+    // dependencies, MR jobs that write encoded hfiles will fail.
+    // We used reflection here so to prevent a circular module dependency.
+    // TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
+    Class prefixTreeCodecClass = null;
+    try {
+      prefixTreeCodecClass =
+          Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
+    } catch (ClassNotFoundException e) {
+      // this will show up in unit tests but should not show in real deployments
+      LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
+          "  Continuing without it.");
+    }
+
+    addDependencyJarsForClasses(conf,
+      // explicitly pull a class from each module
+      org.apache.hadoop.hbase.HConstants.class,                      // hbase-common
+      org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // hbase-protocol
+      org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.class, // hbase-protocol-shaded
+      org.apache.hadoop.hbase.client.Put.class,                      // hbase-client
+      org.apache.hadoop.hbase.CompatibilityFactory.class,            // hbase-hadoop-compat
+      org.apache.hadoop.hbase.mapreduce.JobUtil.class,               // hbase-hadoop2-compat
+      org.apache.hadoop.hbase.mapreduce.TableMapper.class,           // hbase-server
+      org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class,  // hbase-metrics
+      org.apache.hadoop.hbase.metrics.Snapshot.class,                // hbase-metrics-api
+      prefixTreeCodecClass, //  hbase-prefix-tree (if null will be skipped)
+      // pull necessary dependencies
+      org.apache.zookeeper.ZooKeeper.class,
+      org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
+      com.google.protobuf.Message.class,
+      org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists.class,
+      org.apache.htrace.Trace.class,
+      com.codahale.metrics.MetricRegistry.class);
+  }
+
+  /**
+   * Returns a classpath string built from the content of the "tmpjars" value in {@code conf}.
+   * Also exposed to shell scripts via `bin/hbase mapredcp`.
+   */
+  public static String buildDependencyClasspath(Configuration conf) {
+    if (conf == null) {
+      throw new IllegalArgumentException("Must provide a configuration object.");
+    }
+    Set<String> paths = new HashSet<>(conf.getStringCollection("tmpjars"));
+    if (paths.isEmpty()) {
+      throw new IllegalArgumentException("Configuration contains no tmpjars.");
+    }
+    StringBuilder sb = new StringBuilder();
+    for (String s : paths) {
+      // entries can take the form 'file:/path/to/file.jar'.
+      int idx = s.indexOf(":");
+      if (idx != -1) s = s.substring(idx + 1);
+      if (sb.length() > 0) sb.append(File.pathSeparator);
+      sb.append(s);
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Add the HBase dependency jars as well as jars for any of the configured
+   * job classes to the job configuration, so that JobClient will ship them
+   * to the cluster and add them to the DistributedCache.
+   */
+  public static void addDependencyJars(Job job) throws IOException {
+    addHBaseDependencyJars(job.getConfiguration());
+    try {
+      addDependencyJarsForClasses(job.getConfiguration(),
+          // when making changes here, consider also mapred.TableMapReduceUtil
+          // pull job classes
+          job.getMapOutputKeyClass(),
+          job.getMapOutputValueClass(),
+          job.getInputFormatClass(),
+          job.getOutputKeyClass(),
+          job.getOutputValueClass(),
+          job.getOutputFormatClass(),
+          job.getPartitionerClass(),
+          job.getCombinerClass());
+    } catch (ClassNotFoundException e) {
+      throw new IOException(e);
+    }
+  }
+
+  /**
+   * Add the jars containing the given classes to the job's configuration
+   * such that JobClient will ship them to the cluster and add them to
+   * the DistributedCache.
+   * @deprecated rely on {@link #addDependencyJars(Job)} instead.
+   */
+  @Deprecated
+  public static void addDependencyJars(Configuration conf,
+      Class<?>... classes) throws IOException {
+    LOG.warn("The addDependencyJars(Configuration, Class<?>...) method has been deprecated since it"
+             + " is easy to use incorrectly. Most users should rely on addDependencyJars(Job) " +
+             "instead. See HBASE-8386 for more details.");
+    addDependencyJarsForClasses(conf, classes);
+  }
+
+  /**
+   * Add the jars containing the given classes to the job's configuration
+   * such that JobClient will ship them to the cluster and add them to
+   * the DistributedCache.
+   *
+   * N.B. that this method at most adds one jar per class given. If there is more than one
+   * jar available containing a class with the same name as a given class, we don't define
+   * which of those jars might be chosen.
+   *
+   * @param conf The Hadoop Configuration to modify
+   * @param classes will add just those dependencies needed to find the given classes
+   * @throws IOException if an underlying library call fails.
+   */
+  @InterfaceAudience.Private
+  public static void addDependencyJarsForClasses(Configuration conf,
+      Class<?>... classes) throws IOException {
+
+    FileSystem localFs = FileSystem.getLocal(conf);
+    Set<String> jars = new HashSet<>();
+    // Add jars that are already in the tmpjars variable
+    jars.addAll(conf.getStringCollection("tmpjars"));
+
+    // add jars as we find them to a map of contents jar name so that we can avoid
+    // creating new jars for classes that have already been packaged.
+    Map<String, String> packagedClasses = new HashMap<>();
+
+    // Add jars containing the specified classes
+    for (Class<?> clazz : classes) {
+      if (clazz == null) continue;
+
+      Path path = findOrCreateJar(clazz, localFs, packagedClasses);
+      if (path == null) {
+        LOG.warn("Could not find jar for class " + clazz +
+                 " in order to ship it to the cluster.");
+        continue;
+      }
+      if (!localFs.exists(path)) {
+        LOG.warn("Could not validate jar file " + path + " for class "
+                 + clazz);
+        continue;
+      }
+      jars.add(path.toString());
+    }
+    if (jars.isEmpty()) return;
+
+    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
+  }
+
+  /**
+   * Finds the Jar for a class or creates it if it doesn't exist. If the class is in
+   * a directory in the classpath, it creates a Jar on the fly with the
+   * contents of the directory and returns the path to that Jar. If a Jar is
+   * created, it is created in the system temporary directory. Otherwise,
+   * returns an existing jar that contains a class of the same name. Maintains
+   * a mapping from jar contents to the tmp jar created.
+   * @param my_class the class to find.
+   * @param fs the FileSystem with which to qualify the returned path.
+   * @param packagedClasses a map of class name to path.
+   * @return a jar file that contains the class.
+   * @throws IOException
+   */
+  private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
+      Map<String, String> packagedClasses)
+  throws IOException {
+    // attempt to locate an existing jar for the class.
+    String jar = findContainingJar(my_class, packagedClasses);
+    if (null == jar || jar.isEmpty()) {
+      jar = getJar(my_class);
+      updateMap(jar, packagedClasses);
+    }
+
+    if (null == jar || jar.isEmpty()) {
+      return null;
+    }
+
+    LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
+    return new Path(jar).makeQualified(fs);
+  }
+
+  /**
+   * Add entries to <code>packagedClasses</code> corresponding to class files
+   * contained in <code>jar</code>.
+   * @param jar The jar who's content to list.
+   * @param packagedClasses map[class -> jar]
+   */
+  private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
+    if (null == jar || jar.isEmpty()) {
+      return;
+    }
+    ZipFile zip = null;
+    try {
+      zip = new ZipFile(jar);
+      for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
+        ZipEntry entry = iter.nextElement();
+        if (entry.getName().endsWith("class")) {
+          packagedClasses.put(entry.getName(), jar);
+        }
+      }
+    } finally {
+      if (null != zip) zip.close();
+    }
+  }
+
+  /**
+   * Find a jar that contains a class of the same name, if any. It will return
+   * a jar file, even if that is not the first thing on the class path that
+   * has a class with the same name. Looks first on the classpath and then in
+   * the <code>packagedClasses</code> map.
+   * @param my_class the class to find.
+   * @return a jar file that contains the class, or null.
+   * @throws IOException
+   */
+  private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
+      throws IOException {
+    ClassLoader loader = my_class.getClassLoader();
+
+    String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
+
+    if (loader != null) {
+      // first search the classpath
+      for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
+        URL url = itr.nextElement();
+        if ("jar".equals(url.getProtocol())) {
+          String toReturn = url.getPath();
+          if (toReturn.startsWith("file:")) {
+            toReturn = toReturn.substring("file:".length());
+          }
+          // URLDecoder is a misnamed class, since it actually decodes
+          // x-www-form-urlencoded MIME type rather than actual
+          // URL encoding (which the file path has). Therefore it would
+          // decode +s to ' 's which is incorrect (spaces are actually
+          // either unencoded or encoded as "%20"). Replace +s first, so
+          // that they are kept sacred during the decoding process.
+          toReturn = toReturn.replaceAll("\\+", "%2B");
+          toReturn = URLDecoder.decode(toReturn, "UTF-8");
+          return toReturn.replaceAll("!.*$", "");
+        }
+      }
+    }
+
+    // now look in any jars we've packaged using JarFinder. Returns null when
+    // no jar is found.
+    return packagedClasses.get(class_file);
+  }
+
+  /**
+   * Invoke 'getJar' on a custom JarFinder implementation. Useful for some job
+   * configuration contexts (HBASE-8140) and also for testing on MRv2.
+   * check if we have HADOOP-9426.
+   * @param my_class the class to find.
+   * @return a jar file that contains the class, or null.
+   */
+  private static String getJar(Class<?> my_class) {
+    String ret = null;
+    try {
+      ret = JarFinder.getJar(my_class);
+    } catch (Exception e) {
+      // toss all other exceptions, related to reflection failure
+      throw new RuntimeException("getJar invocation failed.", e);
+    }
+
+    return ret;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
new file mode 100644
index 0000000..9a7dcb7
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+
+/**
+ * Extends the base <code>Mapper</code> class to add the required input key
+ * and value classes.
+ *
+ * @param <KEYOUT>  The type of the key.
+ * @param <VALUEOUT>  The type of the value.
+ * @see org.apache.hadoop.mapreduce.Mapper
+ */
+@InterfaceAudience.Public
+public abstract class TableMapper<KEYOUT, VALUEOUT>
+extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
new file mode 100644
index 0000000..749fd85
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
@@ -0,0 +1,67 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Small committer class that does not do anything.
+ */
+@InterfaceAudience.Public
+public class TableOutputCommitter extends OutputCommitter {
+
+  @Override
+  public void abortTask(TaskAttemptContext arg0) throws IOException {
+  }
+
+  @Override
+  public void cleanupJob(JobContext arg0) throws IOException {
+  }
+
+  @Override
+  public void commitTask(TaskAttemptContext arg0) throws IOException {
+  }
+
+  @Override
+  public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+    return false;
+  }
+
+  @Override
+  public void setupJob(JobContext arg0) throws IOException {
+  }
+
+  @Override
+  public void setupTask(TaskAttemptContext arg0) throws IOException {
+  }
+
+  public boolean isRecoverySupported() {
+    return true;
+  }
+
+  public void recoverTask(TaskAttemptContext taskContext)
+  throws IOException
+  {
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
new file mode 100644
index 0000000..604ef00
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
@@ -0,0 +1,239 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
+ * while the output value <u>must</u> be either a {@link Put} or a
+ * {@link Delete} instance.
+ */
+@InterfaceAudience.Public
+public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
+implements Configurable {
+
+  private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
+
+  /** Job parameter that specifies the output table. */
+  public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
+
+  /**
+   * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
+   * For keys matching this prefix, the prefix is stripped, and the value is set in the
+   * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
+   * would be set in the configuration as "key1 = value1".  Use this to set properties
+   * which should only be applied to the {@code TableOutputFormat} configuration and not the
+   * input configuration.
+   */
+  public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
+
+  /**
+   * Optional job parameter to specify a peer cluster.
+   * Used specifying remote cluster when copying between hbase clusters (the
+   * source is picked up from <code>hbase-site.xml</code>).
+   * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
+   */
+  public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
+
+  /** Optional job parameter to specify peer cluster's ZK client port */
+  public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
+
+  /** Optional specification of the rs class name of the peer cluster */
+  public static final String
+      REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
+  /** Optional specification of the rs impl name of the peer cluster */
+  public static final String
+      REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
+
+  /** The configuration. */
+  private Configuration conf = null;
+
+  /**
+   * Writes the reducer output to an HBase table.
+   */
+  protected class TableRecordWriter
+  extends RecordWriter<KEY, Mutation> {
+
+    private Connection connection;
+    private BufferedMutator mutator;
+
+    /**
+     * @throws IOException
+     *
+     */
+    public TableRecordWriter() throws IOException {
+      String tableName = conf.get(OUTPUT_TABLE);
+      this.connection = ConnectionFactory.createConnection(conf);
+      this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
+      LOG.info("Created table instance for "  + tableName);
+    }
+    /**
+     * Closes the writer, in this case flush table commits.
+     *
+     * @param context  The context.
+     * @throws IOException When closing the writer fails.
+     * @see RecordWriter#close(TaskAttemptContext)
+     */
+    @Override
+    public void close(TaskAttemptContext context) throws IOException {
+      try {
+        if (mutator != null) {
+          mutator.close();
+        }
+      } finally {
+        if (connection != null) {
+          connection.close();
+        }
+      }
+    }
+
+    /**
+     * Writes a key/value pair into the table.
+     *
+     * @param key  The key.
+     * @param value  The value.
+     * @throws IOException When writing fails.
+     * @see RecordWriter#write(Object, Object)
+     */
+    @Override
+    public void write(KEY key, Mutation value)
+    throws IOException {
+      if (!(value instanceof Put) && !(value instanceof Delete)) {
+        throw new IOException("Pass a Delete or a Put");
+      }
+      mutator.mutate(value);
+    }
+  }
+
+  /**
+   * Creates a new record writer.
+   *
+   * Be aware that the baseline javadoc gives the impression that there is a single
+   * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
+   * RecordWriter per call of this method. You must close the returned RecordWriter when done.
+   * Failure to do so will drop writes.
+   *
+   * @param context  The current task context.
+   * @return The newly created writer instance.
+   * @throws IOException When creating the writer fails.
+   * @throws InterruptedException When the jobs is cancelled.
+   */
+  @Override
+  public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
+  throws IOException, InterruptedException {
+    return new TableRecordWriter();
+  }
+
+  /**
+   * Checks if the output table exists and is enabled.
+   *
+   * @param context  The current context.
+   * @throws IOException When the check fails.
+   * @throws InterruptedException When the job is aborted.
+   * @see OutputFormat#checkOutputSpecs(JobContext)
+   */
+  @Override
+  public void checkOutputSpecs(JobContext context) throws IOException,
+      InterruptedException {
+
+    try (Admin admin = ConnectionFactory.createConnection(getConf()).getAdmin()) {
+      TableName tableName = TableName.valueOf(this.conf.get(OUTPUT_TABLE));
+      if (!admin.tableExists(tableName)) {
+        throw new TableNotFoundException("Can't write, table does not exist:" +
+            tableName.getNameAsString());
+      }
+
+      if (!admin.isTableEnabled(tableName)) {
+        throw new TableNotEnabledException("Can't write, table is not enabled: " +
+            tableName.getNameAsString());
+      }
+    }
+  }
+
+  /**
+   * Returns the output committer.
+   *
+   * @param context  The current context.
+   * @return The committer.
+   * @throws IOException When creating the committer fails.
+   * @throws InterruptedException When the job is aborted.
+   * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
+   */
+  @Override
+  public OutputCommitter getOutputCommitter(TaskAttemptContext context)
+  throws IOException, InterruptedException {
+    return new TableOutputCommitter();
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration otherConf) {
+    String tableName = otherConf.get(OUTPUT_TABLE);
+    if(tableName == null || tableName.length() <= 0) {
+      throw new IllegalArgumentException("Must specify table name");
+    }
+
+    String address = otherConf.get(QUORUM_ADDRESS);
+    int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
+    String serverClass = otherConf.get(REGION_SERVER_CLASS);
+    String serverImpl = otherConf.get(REGION_SERVER_IMPL);
+
+    try {
+      this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
+
+      if (serverClass != null) {
+        this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
+      }
+      if (zkClientPort != 0) {
+        this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
+      }
+    } catch(IOException e) {
+      LOG.error(e);
+      throw new RuntimeException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
new file mode 100644
index 0000000..f66520b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
@@ -0,0 +1,147 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
+ * pairs.
+ */
+@InterfaceAudience.Public
+public class TableRecordReader
+extends RecordReader<ImmutableBytesWritable, Result> {
+
+  private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
+
+  /**
+   * Restart from survivable exceptions by creating a new scanner.
+   *
+   * @param firstRow  The first row to start at.
+   * @throws IOException When restarting fails.
+   */
+  public void restart(byte[] firstRow) throws IOException {
+    this.recordReaderImpl.restart(firstRow);
+  }
+
+  /**
+   * @param table the {@link Table} to scan.
+   */
+  public void setTable(Table table) {
+    this.recordReaderImpl.setHTable(table);
+  }
+
+  /**
+   * Sets the scan defining the actual details like columns etc.
+   *
+   * @param scan  The scan to set.
+   */
+  public void setScan(Scan scan) {
+    this.recordReaderImpl.setScan(scan);
+  }
+
+  /**
+   * Closes the split.
+   *
+   * @see org.apache.hadoop.mapreduce.RecordReader#close()
+   */
+  @Override
+  public void close() {
+    this.recordReaderImpl.close();
+  }
+
+  /**
+   * Returns the current key.
+   *
+   * @return The current key.
+   * @throws IOException
+   * @throws InterruptedException When the job is aborted.
+   * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey()
+   */
+  @Override
+  public ImmutableBytesWritable getCurrentKey() throws IOException,
+      InterruptedException {
+    return this.recordReaderImpl.getCurrentKey();
+  }
+
+  /**
+   * Returns the current value.
+   *
+   * @return The current value.
+   * @throws IOException When the value is faulty.
+   * @throws InterruptedException When the job is aborted.
+   * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue()
+   */
+  @Override
+  public Result getCurrentValue() throws IOException, InterruptedException {
+    return this.recordReaderImpl.getCurrentValue();
+  }
+
+  /**
+   * Initializes the reader.
+   *
+   * @param inputsplit  The split to work with.
+   * @param context  The current task context.
+   * @throws IOException When setting up the reader fails.
+   * @throws InterruptedException When the job is aborted.
+   * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
+   *   org.apache.hadoop.mapreduce.InputSplit,
+   *   org.apache.hadoop.mapreduce.TaskAttemptContext)
+   */
+  @Override
+  public void initialize(InputSplit inputsplit,
+      TaskAttemptContext context) throws IOException,
+      InterruptedException {
+    this.recordReaderImpl.initialize(inputsplit, context);
+  }
+
+  /**
+   * Positions the record reader to the next record.
+   *
+   * @return <code>true</code> if there was another record.
+   * @throws IOException When reading the record failed.
+   * @throws InterruptedException When the job was aborted.
+   * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue()
+   */
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    return this.recordReaderImpl.nextKeyValue();
+  }
+
+  /**
+   * The current progress of the record reader through its data.
+   *
+   * @return A number between 0.0 and 1.0, the fraction of the data read.
+   * @see org.apache.hadoop.mapreduce.RecordReader#getProgress()
+   */
+  @Override
+  public float getProgress() {
+    return this.recordReaderImpl.getProgress();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
new file mode 100644
index 0000000..5f85537
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
@@ -0,0 +1,315 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.ScannerCallable;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
+ * pairs.
+ */
+@InterfaceAudience.Public
+public class TableRecordReaderImpl {
+  public static final String LOG_PER_ROW_COUNT
+      = "hbase.mapreduce.log.scanner.rowcount";
+
+  private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
+
+  // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase
+  @VisibleForTesting
+  static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
+  private ResultScanner scanner = null;
+  private Scan scan = null;
+  private Scan currentScan = null;
+  private Table htable = null;
+  private byte[] lastSuccessfulRow = null;
+  private ImmutableBytesWritable key = null;
+  private Result value = null;
+  private TaskAttemptContext context = null;
+  private Method getCounter = null;
+  private long numRestarts = 0;
+  private long numStale = 0;
+  private long timestamp;
+  private int rowcount;
+  private boolean logScannerActivity = false;
+  private int logPerRowCount = 100;
+
+  /**
+   * Restart from survivable exceptions by creating a new scanner.
+   *
+   * @param firstRow  The first row to start at.
+   * @throws IOException When restarting fails.
+   */
+  public void restart(byte[] firstRow) throws IOException {
+    currentScan = new Scan(scan);
+    currentScan.withStartRow(firstRow);
+    currentScan.setScanMetricsEnabled(true);
+    if (this.scanner != null) {
+      if (logScannerActivity) {
+        LOG.info("Closing the previously opened scanner object.");
+      }
+      this.scanner.close();
+    }
+    this.scanner = this.htable.getScanner(currentScan);
+    if (logScannerActivity) {
+      LOG.info("Current scan=" + currentScan.toString());
+      timestamp = System.currentTimeMillis();
+      rowcount = 0;
+    }
+  }
+
+  /**
+   * In new mapreduce APIs, TaskAttemptContext has two getCounter methods
+   * Check if getCounter(String, String) method is available.
+   * @return The getCounter method or null if not available.
+   * @throws IOException
+   */
+  protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
+  throws IOException {
+    Method m = null;
+    try {
+      m = context.getClass().getMethod("getCounter",
+        new Class [] {String.class, String.class});
+    } catch (SecurityException e) {
+      throw new IOException("Failed test for getCounter", e);
+    } catch (NoSuchMethodException e) {
+      // Ignore
+    }
+    return m;
+  }
+
+  /**
+   * Sets the HBase table.
+   *
+   * @param htable  The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+   */
+  public void setHTable(Table htable) {
+    Configuration conf = htable.getConfiguration();
+    logScannerActivity = conf.getBoolean(
+      ScannerCallable.LOG_SCANNER_ACTIVITY, false);
+    logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
+    this.htable = htable;
+  }
+
+  /**
+   * Sets the scan defining the actual details like columns etc.
+   *
+   * @param scan  The scan to set.
+   */
+  public void setScan(Scan scan) {
+    this.scan = scan;
+  }
+
+  /**
+   * Build the scanner. Not done in constructor to allow for extension.
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public void initialize(InputSplit inputsplit,
+      TaskAttemptContext context) throws IOException,
+      InterruptedException {
+    if (context != null) {
+      this.context = context;
+      getCounter = retrieveGetCounterWithStringsParams(context);
+    }
+    restart(scan.getStartRow());
+  }
+
+  /**
+   * Closes the split.
+   *
+   *
+   */
+  public void close() {
+    if (this.scanner != null) {
+      this.scanner.close();
+    }
+    try {
+      this.htable.close();
+    } catch (IOException ioe) {
+      LOG.warn("Error closing table", ioe);
+    }
+  }
+
+  /**
+   * Returns the current key.
+   *
+   * @return The current key.
+   * @throws IOException
+   * @throws InterruptedException When the job is aborted.
+   */
+  public ImmutableBytesWritable getCurrentKey() throws IOException,
+      InterruptedException {
+    return key;
+  }
+
+  /**
+   * Returns the current value.
+   *
+   * @return The current value.
+   * @throws IOException When the value is faulty.
+   * @throws InterruptedException When the job is aborted.
+   */
+  public Result getCurrentValue() throws IOException, InterruptedException {
+    return value;
+  }
+
+
+  /**
+   * Positions the record reader to the next record.
+   *
+   * @return <code>true</code> if there was another record.
+   * @throws IOException When reading the record failed.
+   * @throws InterruptedException When the job was aborted.
+   */
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    if (key == null) key = new ImmutableBytesWritable();
+    if (value == null) value = new Result();
+    try {
+      try {
+        value = this.scanner.next();
+        if (value != null && value.isStale()) numStale++;
+        if (logScannerActivity) {
+          rowcount ++;
+          if (rowcount >= logPerRowCount) {
+            long now = System.currentTimeMillis();
+            LOG.info("Mapper took " + (now-timestamp)
+              + "ms to process " + rowcount + " rows");
+            timestamp = now;
+            rowcount = 0;
+          }
+        }
+      } catch (IOException e) {
+        // do not retry if the exception tells us not to do so
+        if (e instanceof DoNotRetryIOException) {
+          throw e;
+        }
+        // try to handle all other IOExceptions by restarting
+        // the scanner, if the second call fails, it will be rethrown
+        LOG.info("recovered from " + StringUtils.stringifyException(e));
+        if (lastSuccessfulRow == null) {
+          LOG.warn("We are restarting the first next() invocation," +
+              " if your mapper has restarted a few other times like this" +
+              " then you should consider killing this job and investigate" +
+              " why it's taking so long.");
+        }
+        if (lastSuccessfulRow == null) {
+          restart(scan.getStartRow());
+        } else {
+          restart(lastSuccessfulRow);
+          scanner.next();    // skip presumed already mapped row
+        }
+        value = scanner.next();
+        if (value != null && value.isStale()) numStale++;
+        numRestarts++;
+      }
+      if (value != null && value.size() > 0) {
+        key.set(value.getRow());
+        lastSuccessfulRow = key.get();
+        return true;
+      }
+
+      updateCounters();
+      return false;
+    } catch (IOException ioe) {
+      if (logScannerActivity) {
+        long now = System.currentTimeMillis();
+        LOG.info("Mapper took " + (now-timestamp)
+          + "ms to process " + rowcount + " rows");
+        LOG.info(ioe);
+        String lastRow = lastSuccessfulRow == null ?
+          "null" : Bytes.toStringBinary(lastSuccessfulRow);
+        LOG.info("lastSuccessfulRow=" + lastRow);
+      }
+      throw ioe;
+    }
+  }
+
+  /**
+   * If hbase runs on new version of mapreduce, RecordReader has access to
+   * counters thus can update counters based on scanMetrics.
+   * If hbase runs on old version of mapreduce, it won't be able to get
+   * access to counters and TableRecorderReader can't update counter values.
+   * @throws IOException
+   */
+  private void updateCounters() throws IOException {
+    ScanMetrics scanMetrics = scanner.getScanMetrics();
+    if (scanMetrics == null) {
+      return;
+    }
+
+    updateCounters(scanMetrics, numRestarts, getCounter, context, numStale);
+  }
+
+  protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
+      Method getCounter, TaskAttemptContext context, long numStale) {
+    // we can get access to counters only if hbase uses new mapreduce APIs
+    if (getCounter == null) {
+      return;
+    }
+
+    try {
+      for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
+        Counter ct = (Counter)getCounter.invoke(context,
+            HBASE_COUNTER_GROUP_NAME, entry.getKey());
+
+        ct.increment(entry.getValue());
+      }
+      ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
+          "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
+      ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
+          "NUM_SCAN_RESULTS_STALE")).increment(numStale);
+    } catch (Exception e) {
+      LOG.debug("can't update counter." + StringUtils.stringifyException(e));
+    }
+  }
+
+  /**
+   * The current progress of the record reader through its data.
+   *
+   * @return A number between 0.0 and 1.0, the fraction of the data read.
+   */
+  public float getProgress() {
+    // Depends on the total number of tuples
+    return 0;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
new file mode 100644
index 0000000..f0bfc74
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
@@ -0,0 +1,45 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Extends the basic <code>Reducer</code> class to add the required key and
+ * value input/output classes. While the input key and value as well as the
+ * output key can be anything handed in from the previous map phase the output
+ * value <u>must</u> be either a {@link org.apache.hadoop.hbase.client.Put Put}
+ * or a {@link org.apache.hadoop.hbase.client.Delete Delete} instance when
+ * using the {@link TableOutputFormat} class.
+ * <p>
+ * This class is extended by {@link IdentityTableReducer} but can also be
+ * subclassed to implement similar features or any custom code needed. It has
+ * the advantage to enforce the output value to a specific basic type.
+ *
+ * @param <KEYIN>  The type of the input key.
+ * @param <VALUEIN>  The type of the input value.
+ * @param <KEYOUT>  The type of the output key.
+ * @see org.apache.hadoop.mapreduce.Reducer
+ */
+@InterfaceAudience.Public
+public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT>
+extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
new file mode 100644
index 0000000..691f0c5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. The job
+ * bypasses HBase servers, and directly accesses the underlying files (hfile, recovered edits,
+ * wals, etc) directly to provide maximum performance. The snapshot is not required to be
+ * restored to the live cluster or cloned. This also allows to run the mapreduce job from an
+ * online or offline hbase cluster. The snapshot files can be exported by using the
+ * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool, to a pure-hdfs cluster,
+ * and this InputFormat can be used to run the mapreduce job directly over the snapshot files.
+ * The snapshot should not be deleted while there are jobs reading from snapshot files.
+ * <p>
+ * Usage is similar to TableInputFormat, and
+ * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job, boolean, Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Scan scan = new Scan();
+ * TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ *      scan, MyTableMapper.class, MyMapKeyOutput.class,
+ *      MyMapOutputValueWritable.class, job, true);
+ * }
+ * </pre>
+ * <p>
+ * Internally, this input format restores the snapshot into the given tmp directory. Similar to
+ * {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading
+ * from each RecordReader. An internal RegionScanner is used to execute the
+ * {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
+ * <p>
+ * HBase owns all the data and snapshot files on the filesystem. Only the 'hbase' user can read from
+ * snapshot files and data files.
+ * To read from snapshot files directly from the file system, the user who is running the MR job
+ * must have sufficient permissions to access snapshot and reference files.
+ * This means that to run mapreduce over snapshot files, the MR job has to be run as the HBase
+ * user or the user must have group or other privileges in the filesystem (See HBASE-8369).
+ * Note that, given other users access to read from snapshot/data files will completely circumvent
+ * the access control enforced by HBase.
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
+
+  public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
+    private TableSnapshotInputFormatImpl.InputSplit delegate;
+
+    // constructor for mapreduce framework / Writable
+    public TableSnapshotRegionSplit() {
+      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
+    }
+
+    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
+      this.delegate = delegate;
+    }
+
+    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
+        List<String> locations, Scan scan, Path restoreDir) {
+      this.delegate =
+          new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
+    }
+
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return delegate.getLength();
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      return delegate.getLocations();
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      delegate.write(out);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      delegate.readFields(in);
+    }
+
+    public HRegionInfo getRegionInfo() {
+      return delegate.getRegionInfo();
+    }
+
+  }
+
+  @VisibleForTesting
+  static class TableSnapshotRegionRecordReader extends
+      RecordReader<ImmutableBytesWritable, Result> {
+    private TableSnapshotInputFormatImpl.RecordReader delegate =
+      new TableSnapshotInputFormatImpl.RecordReader();
+    private TaskAttemptContext context;
+    private Method getCounter;
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
+        InterruptedException {
+      this.context = context;
+      getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
+      delegate.initialize(
+        ((TableSnapshotRegionSplit) split).delegate,
+        context.getConfiguration());
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      boolean result = delegate.nextKeyValue();
+      if (result) {
+        ScanMetrics scanMetrics = delegate.getScanner().getScanMetrics();
+        if (scanMetrics != null && context != null) {
+          TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context, 0);
+        }
+      }
+      return result;
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+      return delegate.getCurrentKey();
+    }
+
+    @Override
+    public Result getCurrentValue() throws IOException, InterruptedException {
+      return delegate.getCurrentValue();
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return delegate.getProgress();
+    }
+
+    @Override
+    public void close() throws IOException {
+      delegate.close();
+    }
+  }
+
+  @Override
+  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+      InputSplit split, TaskAttemptContext context) throws IOException {
+    return new TableSnapshotRegionRecordReader();
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
+    List<InputSplit> results = new ArrayList<>();
+    for (TableSnapshotInputFormatImpl.InputSplit split :
+        TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) {
+      results.add(new TableSnapshotRegionSplit(split));
+    }
+    return results;
+  }
+
+  /**
+   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+   * @param job the job to configure
+   * @param snapshotName the name of the snapshot to read from
+   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+   * After the job is finished, restoreDir can be deleted.
+   * @throws IOException if an error occurs
+   */
+  public static void setInput(Job job, String snapshotName, Path restoreDir)
+      throws IOException {
+    TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
+  }
+}

[28/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
new file mode 100644
index 0000000..835117c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+
+import static org.junit.Assert.fail;
+
+/**
+ * Spark creates many instances of TableOutputFormat within a single process.  We need to make
+ * sure we can have many instances and not leak connections.
+ *
+ * This test creates a few TableOutputFormats and shouldn't fail due to ZK connection exhaustion.
+ */
+@Category(MediumTests.class)
+public class TestTableOutputFormatConnectionExhaust {
+
+  private static final Log LOG =
+      LogFactory.getLog(TestTableOutputFormatConnectionExhaust.class);
+
+  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  static final String TABLE = "TestTableOutputFormatConnectionExhaust";
+  static final String FAMILY = "family";
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    // Default in ZookeeperMiniCluster is 1000, setting artificially low to trigger exhaustion.
+    // need min of 7 to properly start the default mini HBase cluster
+    UTIL.getConfiguration().setInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 10);
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws IOException {
+    LOG.info("before");
+    UTIL.ensureSomeRegionServersAvailable(1);
+    LOG.info("before done");
+  }
+
+  /**
+   * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
+   * Connection (ZK) resources, and will throw exception if they are exhausted.
+   */
+  static void openCloseTableOutputFormat(int iter)  throws IOException {
+    LOG.info("Instantiating TableOutputFormat connection  " + iter);
+    JobConf conf = new JobConf();
+    conf.addResource(UTIL.getConfiguration());
+    conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
+    TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
+        ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
+    TableOutputFormat tof = new TableOutputFormat();
+    RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
+    rw.close(null);
+  }
+
+  @Test
+  public void testConnectionExhaustion() throws IOException {
+    int MAX_INSTANCES = 5; // fails on iteration 3 if zk connections leak
+    for (int i = 0; i < MAX_INSTANCES; i++) {
+      final int iter = i;
+      try {
+        openCloseTableOutputFormat(iter);
+      } catch (Exception e) {
+        LOG.error("Exception encountered", e);
+        fail("Failed on iteration " + i);
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
new file mode 100644
index 0000000..1c72f2a
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
+
+  private static final byte[] aaa = Bytes.toBytes("aaa");
+  private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
+  private static final String COLUMNS =
+    Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
+
+  @Rule
+  public TestName name = new TestName();
+
+  @Override
+  protected byte[] getStartRow() {
+    return aaa;
+  }
+
+  @Override
+  protected byte[] getEndRow() {
+    return after_zzz;
+  }
+
+  static class TestTableSnapshotMapper extends MapReduceBase
+      implements TableMap<ImmutableBytesWritable, NullWritable> {
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+        OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
+        throws IOException {
+      verifyRowFromMap(key, value);
+      collector.collect(key, NullWritable.get());
+    }
+  }
+
+  public static class TestTableSnapshotReducer extends MapReduceBase
+      implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
+    HBaseTestingUtility.SeenRowTracker rowTracker =
+      new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
+
+    @Override
+    public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
+        OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
+        throws IOException {
+      rowTracker.addRow(key.get());
+    }
+
+    @Override
+    public void close() {
+      rowTracker.validate();
+    }
+  }
+
+  @Test
+  public void testInitTableSnapshotMapperJobConfig() throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    String snapshotName = "foo";
+
+    try {
+      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+      JobConf job = new JobConf(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+        COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+        NullWritable.class, job, false, tmpTableDir);
+
+      // TODO: would be better to examine directly the cache instance that results from this
+      // config. Currently this is not possible because BlockCache initialization is static.
+      Assert.assertEquals(
+        "Snapshot job should be configured for default LruBlockCache.",
+        HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
+        job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
+      Assert.assertEquals(
+        "Snapshot job should not use BucketCache.",
+        0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
+    } finally {
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  // TODO: mapred does not support limiting input range by startrow, endrow.
+  // Thus the following tests must override parameterverification.
+
+  @Test
+  @Override
+  public void testWithMockedMapReduceMultiRegion() throws Exception {
+    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
+  }
+
+  @Test
+  @Override
+  public void testWithMapReduceMultiRegion() throws Exception {
+    testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
+  }
+
+  @Test
+  @Override
+  // run the MR job while HBase is offline
+  public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
+    testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
+  }
+
+  @Override
+  public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+      String snapshotName, Path tmpTableDir) throws Exception {
+    JobConf job = new JobConf(UTIL.getConfiguration());
+    TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+      COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+      NullWritable.class, job, false, tmpTableDir);
+  }
+
+  @Override
+  protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+      int numRegions, int expectedNumSplits) throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    try {
+      createTableAndSnapshot(
+        util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
+
+      JobConf job = new JobConf(util.getConfiguration());
+      Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
+
+      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+        COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+        NullWritable.class, job, false, tmpTableDir);
+
+      // mapred doesn't support start and end keys? o.O
+      verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
+
+    } finally {
+      util.getAdmin().deleteSnapshot(snapshotName);
+      util.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
+      byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
+    TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
+    InputSplit[] splits = tsif.getSplits(job, 0);
+
+    Assert.assertEquals(expectedNumSplits, splits.length);
+
+    HBaseTestingUtility.SeenRowTracker rowTracker =
+      new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
+
+    for (int i = 0; i < splits.length; i++) {
+      // validate input split
+      InputSplit split = splits[i];
+      Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
+
+      // validate record reader
+      OutputCollector collector = mock(OutputCollector.class);
+      Reporter reporter = mock(Reporter.class);
+      RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
+
+      // validate we can read all the data back
+      ImmutableBytesWritable key = rr.createKey();
+      Result value = rr.createValue();
+      while (rr.next(key, value)) {
+        verifyRowFromMap(key, value);
+        rowTracker.addRow(key.copyBytes());
+      }
+
+      rr.close();
+    }
+
+    // validate all rows are seen
+    rowTracker.validate();
+  }
+
+  @Override
+  protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+      String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+      boolean shutdownCluster) throws Exception {
+    doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
+      numRegions, expectedNumSplits, shutdownCluster);
+  }
+
+  // this is also called by the IntegrationTestTableSnapshotInputFormat
+  public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
+      String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
+      int expectedNumSplits, boolean shutdownCluster) throws Exception {
+
+    //create the table and snapshot
+    createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
+
+    if (shutdownCluster) {
+      util.shutdownMiniHBaseCluster();
+    }
+
+    try {
+      // create the job
+      JobConf jobConf = new JobConf(util.getConfiguration());
+
+      jobConf.setJarByClass(util.getClass());
+      org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
+        TestTableSnapshotInputFormat.class);
+
+      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
+        TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+        NullWritable.class, jobConf, true, tableDir);
+
+      jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
+      jobConf.setNumReduceTasks(1);
+      jobConf.setOutputFormat(NullOutputFormat.class);
+
+      RunningJob job = JobClient.runJob(jobConf);
+      Assert.assertTrue(job.isSuccessful());
+    } finally {
+      if (!shutdownCluster) {
+        util.getAdmin().deleteSnapshot(snapshotName);
+        util.deleteTable(tableName);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
new file mode 100644
index 0000000..b342f64
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.security.UserProvider;
+
+/**
+ * A {@link UserProvider} that always says hadoop security is enabled, regardless of the underlying
+ * configuration. HBase security is <i>not enabled</i> as this is used to determine if SASL is used
+ * to do the authentication, which requires a Kerberos ticket (which we currently don't have in
+ * tests).
+ * <p>
+ * This should only be used for <b>TESTING</b>.
+ */
+public class HadoopSecurityEnabledUserProviderForTesting extends UserProvider {
+
+  @Override
+  public boolean isHBaseSecurityEnabled() {
+    return false;
+  }
+
+  @Override
+  public boolean isHadoopSecurityEnabled() {
+    return true;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
new file mode 100644
index 0000000..c717fa9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Base set of tests and setup for input formats touching multiple tables.
+ */
+public abstract class MultiTableInputFormatTestBase {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
+  public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  static final String TABLE_NAME = "scantest";
+  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+  static final String KEY_STARTROW = "startRow";
+  static final String KEY_LASTROW = "stpRow";
+
+  static List<String> TABLES = Lists.newArrayList();
+
+  static {
+    for (int i = 0; i < 3; i++) {
+      TABLES.add(TABLE_NAME + String.valueOf(i));
+    }
+  }
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    // switch TIF to log at DEBUG level
+    TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
+    // start mini hbase cluster
+    TEST_UTIL.startMiniCluster(3);
+    // create and fill table
+    for (String tableName : TABLES) {
+      try (Table table =
+          TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
+            INPUT_FAMILY, 4)) {
+        TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
+      }
+    }
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    Configuration c = TEST_UTIL.getConfiguration();
+    FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
+  }
+
+  /**
+   * Pass the key and value to reducer.
+   */
+  public static class ScanMapper extends
+      TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+    /**
+     * Pass the key and value to reduce.
+     *
+     * @param key The key, here "aaa", "aab" etc.
+     * @param value The value is the same as the key.
+     * @param context The task context.
+     * @throws IOException When reading the rows fails.
+     */
+    @Override
+    public void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException, InterruptedException {
+      makeAssertions(key, value);
+      context.write(key, key);
+    }
+
+    public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
+      if (value.size() != 1) {
+        throw new IOException("There should only be one input column");
+      }
+      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
+          value.getMap();
+      if (!cf.containsKey(INPUT_FAMILY)) {
+        throw new IOException("Wrong input columns. Missing: '" +
+            Bytes.toString(INPUT_FAMILY) + "'.");
+      }
+      String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
+      LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
+          ", value -> " + val);
+    }
+  }
+
+  /**
+   * Checks the last and first keys seen against the scanner boundaries.
+   */
+  public static class ScanReducer
+      extends
+      Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+          NullWritable, NullWritable> {
+    private String first = null;
+    private String last = null;
+
+    @Override
+    protected void reduce(ImmutableBytesWritable key,
+        Iterable<ImmutableBytesWritable> values, Context context)
+        throws IOException, InterruptedException {
+      makeAssertions(key, values);
+    }
+
+    protected void makeAssertions(ImmutableBytesWritable key,
+        Iterable<ImmutableBytesWritable> values) {
+      int count = 0;
+      for (ImmutableBytesWritable value : values) {
+        String val = Bytes.toStringBinary(value.get());
+        LOG.debug("reduce: key[" + count + "] -> " +
+            Bytes.toStringBinary(key.get()) + ", value -> " + val);
+        if (first == null) first = val;
+        last = val;
+        count++;
+      }
+      assertEquals(3, count);
+    }
+
+    @Override
+    protected void cleanup(Context context) throws IOException,
+        InterruptedException {
+      Configuration c = context.getConfiguration();
+      cleanup(c);
+    }
+
+    protected void cleanup(Configuration c) {
+      String startRow = c.get(KEY_STARTROW);
+      String lastRow = c.get(KEY_LASTROW);
+      LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
+          startRow + "\"");
+      LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
+          "\"");
+      if (startRow != null && startRow.length() > 0) {
+        assertEquals(startRow, first);
+      }
+      if (lastRow != null && lastRow.length() > 0) {
+        assertEquals(lastRow, last);
+      }
+    }
+  }
+
+  @Test
+  public void testScanEmptyToEmpty() throws IOException, InterruptedException,
+      ClassNotFoundException {
+    testScan(null, null, null);
+  }
+
+  @Test
+  public void testScanEmptyToAPP() throws IOException, InterruptedException,
+      ClassNotFoundException {
+    testScan(null, "app", "apo");
+  }
+
+  @Test
+  public void testScanOBBToOPP() throws IOException, InterruptedException,
+      ClassNotFoundException {
+    testScan("obb", "opp", "opo");
+  }
+
+  @Test
+  public void testScanYZYToEmpty() throws IOException, InterruptedException,
+      ClassNotFoundException {
+    testScan("yzy", null, "zzz");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  private void testScan(String start, String stop, String last)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    String jobName =
+        "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
+            (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+    LOG.info("Before map/reduce startup - job " + jobName);
+    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+
+    c.set(KEY_STARTROW, start != null ? start : "");
+    c.set(KEY_LASTROW, last != null ? last : "");
+
+    List<Scan> scans = new ArrayList<>();
+
+    for (String tableName : TABLES) {
+      Scan scan = new Scan();
+
+      scan.addFamily(INPUT_FAMILY);
+      scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
+
+      if (start != null) {
+        scan.setStartRow(Bytes.toBytes(start));
+      }
+      if (stop != null) {
+        scan.setStopRow(Bytes.toBytes(stop));
+      }
+
+      scans.add(scan);
+
+      LOG.info("scan before: " + scan);
+    }
+
+    runJob(jobName, c, scans);
+  }
+
+  protected void runJob(String jobName, Configuration c, List<Scan> scans)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    Job job = new Job(c, jobName);
+
+    initJob(scans, job);
+    job.setReducerClass(ScanReducer.class);
+    job.setNumReduceTasks(1); // one to get final "first" and "last" key
+    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+    LOG.info("Started " + job.getJobName());
+    job.waitForCompletion(true);
+    assertTrue(job.isSuccessful());
+    LOG.info("After map/reduce completion - job " + jobName);
+  }
+
+  protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
+
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
new file mode 100644
index 0000000..3203f0c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
@@ -0,0 +1,134 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Input format that creates a configurable number of map tasks
+ * each provided with a single row of NullWritables. This can be
+ * useful when trying to write mappers which don't have any real
+ * input (eg when the mapper is simply producing random data as output)
+ */
+public class NMapInputFormat extends InputFormat<NullWritable, NullWritable> {
+  private static final String NMAPS_KEY = "nmapinputformat.num.maps";
+
+  @Override
+  public RecordReader<NullWritable, NullWritable> createRecordReader(
+      InputSplit split,
+      TaskAttemptContext tac) throws IOException, InterruptedException {
+    return new SingleRecordReader<>(NullWritable.get(), NullWritable.get());
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException,
+      InterruptedException {
+    int count = getNumMapTasks(context.getConfiguration());
+    List<InputSplit> splits = new ArrayList<>(count);
+    for (int i = 0; i < count; i++) {
+      splits.add(new NullInputSplit());
+    }
+    return splits;
+  }
+
+  public static void setNumMapTasks(Configuration conf, int numTasks) {
+    conf.setInt(NMAPS_KEY, numTasks);
+  }
+
+  public static int getNumMapTasks(Configuration conf) {
+    return conf.getInt(NMAPS_KEY, 1);
+  }
+
+  private static class NullInputSplit extends InputSplit implements Writable {
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return 0;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      return new String[] {};
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+    }
+  }
+
+  private static class SingleRecordReader<K, V>
+    extends RecordReader<K, V> {
+
+    private final K key;
+    private final V value;
+    boolean providedKey = false;
+
+    SingleRecordReader(K key, V value) {
+      this.key = key;
+      this.value = value;
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public K getCurrentKey() {
+      return key;
+    }
+
+    @Override
+    public V getCurrentValue(){
+      return value;
+    }
+
+    @Override
+    public float getProgress() {
+      return 0;
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext tac) {
+    }
+
+    @Override
+    public boolean nextKeyValue() {
+      if (providedKey) return false;
+      providedKey = true;
+      return true;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
new file mode 100644
index 0000000..fa47253
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public abstract class TableSnapshotInputFormatTestBase {
+  private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatTestBase.class);
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  protected final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  protected static final int NUM_REGION_SERVERS = 2;
+  protected static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
+
+  protected FileSystem fs;
+  protected Path rootDir;
+
+  public void setupCluster() throws Exception {
+    setupConf(UTIL.getConfiguration());
+    UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
+    rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+    fs = rootDir.getFileSystem(UTIL.getConfiguration());
+  }
+
+  public void tearDownCluster() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  private static void setupConf(Configuration conf) {
+    // Enable snapshot
+    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+  }
+
+  protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+    int numRegions, int expectedNumSplits) throws Exception;
+
+  protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+    String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+    boolean shutdownCluster) throws Exception;
+
+  protected abstract byte[] getStartRow();
+
+  protected abstract byte[] getEndRow();
+
+  @Test
+  public void testWithMockedMapReduceSingleRegion() throws Exception {
+    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
+  }
+
+  @Test
+  public void testWithMockedMapReduceMultiRegion() throws Exception {
+    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
+  }
+
+  @Test
+  public void testWithMapReduceSingleRegion() throws Exception {
+    testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
+  }
+
+  @Test
+  public void testWithMapReduceMultiRegion() throws Exception {
+    testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
+  }
+
+  @Test
+  // run the MR job while HBase is offline
+  public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
+    testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
+  }
+
+  // Test that snapshot restore does not create back references in the HBase root dir.
+  @Test
+  public void testRestoreSnapshotDoesNotCreateBackRefLinks() throws Exception {
+    setupCluster();
+    TableName tableName = TableName.valueOf("testRestoreSnapshotDoesNotCreateBackRefLinks");
+    String snapshotName = "foo";
+
+    try {
+      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+      testRestoreSnapshotDoesNotCreateBackRefLinksInit(tableName, snapshotName,tmpTableDir);
+
+      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+      for (Path regionDir : FSUtils.getRegionDirs(fs, FSUtils.getTableDir(rootDir, tableName))) {
+        for (Path storeDir : FSUtils.getFamilyDirs(fs, regionDir)) {
+          for (FileStatus status : fs.listStatus(storeDir)) {
+            System.out.println(status.getPath());
+            if (StoreFileInfo.isValid(status)) {
+              Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(UTIL.getConfiguration(),
+                tableName, regionDir.getName(), storeDir.getName());
+
+              Path path = HFileLink.getBackReferencesDir(storeDir, status.getPath().getName());
+              // assert back references directory is empty
+              assertFalse("There is a back reference in " + path, fs.exists(path));
+
+              path = HFileLink.getBackReferencesDir(archiveStoreDir, status.getPath().getName());
+              // assert back references directory is empty
+              assertFalse("There is a back reference in " + path, fs.exists(path));
+            }
+          }
+        }
+      }
+    } finally {
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  public abstract void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+      String snapshotName, Path tmpTableDir) throws Exception;
+
+  protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
+      int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
+    setupCluster();
+    try {
+      Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
+      TableName tableName = TableName.valueOf("testWithMapReduce");
+      testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
+        expectedNumSplits, shutdownCluster);
+    } finally {
+      tearDownCluster();
+    }
+  }
+
+  protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result)
+    throws IOException {
+    byte[] row = key.get();
+    CellScanner scanner = result.cellScanner();
+    while (scanner.advance()) {
+      Cell cell = scanner.current();
+
+      //assert that all Cells in the Result have the same key
+      Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
+        cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
+    }
+
+    for (int j = 0; j < FAMILIES.length; j++) {
+      byte[] actual = result.getValue(FAMILIES[j], FAMILIES[j]);
+      Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
+        + " ,actual:" + Bytes.toString(actual), row, actual);
+    }
+  }
+
+  protected static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
+    String snapshotName, byte[] startRow, byte[] endRow, int numRegions)
+    throws Exception {
+    try {
+      LOG.debug("Ensuring table doesn't exist.");
+      util.deleteTable(tableName);
+    } catch(Exception ex) {
+      // ignore
+    }
+
+    LOG.info("creating table '" + tableName + "'");
+    if (numRegions > 1) {
+      util.createTable(tableName, FAMILIES, 1, startRow, endRow, numRegions);
+    } else {
+      util.createTable(tableName, FAMILIES);
+    }
+    Admin admin = util.getAdmin();
+
+    LOG.info("put some stuff in the table");
+    Table table = util.getConnection().getTable(tableName);
+    util.loadTable(table, FAMILIES);
+
+    Path rootDir = FSUtils.getRootDir(util.getConfiguration());
+    FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
+
+    LOG.info("snapshot");
+    SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
+      Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
+
+    LOG.info("load different values");
+    byte[] value = Bytes.toBytes("after_snapshot_value");
+    util.loadTable(table, FAMILIES, value);
+
+    LOG.info("cause flush to create new files in the region");
+    admin.flush(tableName);
+    table.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
new file mode 100644
index 0000000..ff623cb
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
@@ -0,0 +1,376 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.*;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestCellCounter {
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
+  private static final String FAMILY_A_STRING = "a";
+  private static final String FAMILY_B_STRING = "b";
+  private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
+  private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
+  private static final byte[] QUALIFIER = Bytes.toBytes("q");
+
+  private static Path FQ_OUTPUT_DIR;
+  private static final String OUTPUT_DIR = "target" + File.separator + "test-data" + File.separator
+      + "output";
+  private static long now = System.currentTimeMillis();
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+    FQ_OUTPUT_DIR = new Path(OUTPUT_DIR).makeQualified(new LocalFileSystem());
+    FileUtil.fullyDelete(new File(OUTPUT_DIR));
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test CellCounter all data should print to output
+   *
+   */
+  @Test (timeout=300000)
+  public void testCellCounter() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try{
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1" };
+      runCount(args);
+      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+          "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+      assertTrue(data.contains("b;q" + "\t" + "1"));
+      assertTrue(data.contains("a;q" + "\t" + "1"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+    }finally{
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+  /**
+   * Test CellCounter all data should print to output
+   */
+  @Test(timeout = 300000)
+  public void testCellCounterPrefix() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try {
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1" };
+      runCount(args);
+      FileInputStream inputStream =
+          new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+      assertTrue(data.contains("b;q" + "\t" + "1"));
+      assertTrue(data.contains("a;q" + "\t" + "1"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+    } finally {
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+  /**
+   * Test CellCounter with time range all data should print to output
+   */
+  @Test (timeout=300000)
+  public void testCellCounterStartTimeRange() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try{
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = {
+          sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "^row1",
+          "--starttime=" + now,
+          "--endtime=" + now + 2 };
+      runCount(args);
+      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+          "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+      assertTrue(data.contains("b;q" + "\t" + "1"));
+      assertTrue(data.contains("a;q" + "\t" + "1"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+    }finally{
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+  /**
+   * Test CellCounter with time range all data should print to output
+   */
+  @Test (timeout=300000)
+  public void testCellCounteEndTimeRange() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try{
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = {
+          sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "^row1",
+          "--endtime=" + now + 1 };
+      runCount(args);
+      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+          "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+      assertTrue(data.contains("b;q" + "\t" + "1"));
+      assertTrue(data.contains("a;q" + "\t" + "1"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+    }finally{
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+  /**
+   * Test CellCounter with time range all data should print to output
+   */
+  @Test (timeout=300000)
+  public void testCellCounteOutOfTimeRange() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try{
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = {
+      sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "--starttime=" + now + 1,
+          "--endtime=" + now + 2 };
+
+      runCount(args);
+      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+          "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+    inputStream.close();
+      // nothing should hace been emitted to the reducer
+      assertTrue(data.isEmpty());
+    }finally{
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+
+  private boolean runCount(String[] args) throws Exception {
+    // need to make a copy of the configuration because to make sure
+    // different temp dirs are used.
+    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new CellCounter(),
+        args);
+    return status == 0;
+  }
+
+  /**
+   * Test main method of CellCounter
+   */
+  @Test (timeout=300000)
+  public void testCellCounterMain() throws Exception {
+
+    PrintStream oldPrintStream = System.err;
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    String[] args = {};
+    System.setErr(new PrintStream(data));
+    try {
+      System.setErr(new PrintStream(data));
+
+      try {
+        CellCounter.main(args);
+        fail("should be SecurityException");
+      } catch (SecurityException e) {
+        assertEquals(-1, newSecurityManager.getExitCode());
+        assertTrue(data.toString().contains("ERROR: Wrong number of parameters:"));
+        // should be information about usage
+        assertTrue(data.toString().contains("Usage:"));
+      }
+
+    } finally {
+      System.setErr(oldPrintStream);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+  }
+
+  /**
+   * Test CellCounter for complete table all data should print to output
+   */
+  @Test(timeout = 600000)
+  public void testCellCounterForCompleteTable() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName());
+    String outputPath = OUTPUT_DIR + sourceTable;
+    LocalFileSystem localFileSystem = new LocalFileSystem();
+    Path outputDir =
+        new Path(outputPath).makeQualified(localFileSystem.getUri(),
+          localFileSystem.getWorkingDirectory());
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try {
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = { sourceTable.getNameAsString(), outputDir.toString(), ";" };
+      runCount(args);
+      FileInputStream inputStream =
+          new FileInputStream(outputPath + File.separator + "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "4"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "2"));
+      assertTrue(data.contains("b;q" + "\t" + "2"));
+      assertTrue(data.contains("a;q" + "\t" + "2"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row2;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row2;b;q_Versions" + "\t" + "1"));
+
+      FileUtil.fullyDelete(new File(outputPath));
+      args = new String[] { "-D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=a, b",
+                  sourceTable.getNameAsString(), outputDir.toString(), ";"};
+      runCount(args);
+      inputStream = new FileInputStream(outputPath + File.separator + "part-r-00000");
+      String data2 = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertEquals(data, data2);
+    } finally {
+      t.close();
+      localFileSystem.close();
+      FileUtil.fullyDelete(new File(outputPath));
+    }
+  }
+
+  @Test
+  public void TestCellCounterWithoutOutputDir() throws Exception {
+    String[] args = new String[] { "tableName" };
+    assertEquals("CellCounter should exit with -1 as output directory is not specified.", -1,
+      ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
new file mode 100644
index 0000000..0bec03b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+/**
+ * Basic test for the CopyTable M/R tool
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestCopyTable {
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final byte[] ROW1 = Bytes.toBytes("row1");
+  private static final byte[] ROW2 = Bytes.toBytes("row2");
+  private static final String FAMILY_A_STRING = "a";
+  private static final String FAMILY_B_STRING = "b";
+  private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
+  private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
+  private static final byte[] QUALIFIER = Bytes.toBytes("q");
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  private void doCopyTableTest(boolean bulkload) throws Exception {
+    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+    final byte[] FAMILY = Bytes.toBytes("family");
+    final byte[] COLUMN1 = Bytes.toBytes("c1");
+
+    try (Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+         Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);) {
+      // put rows into the first table
+      for (int i = 0; i < 10; i++) {
+        Put p = new Put(Bytes.toBytes("row" + i));
+        p.addColumn(FAMILY, COLUMN1, COLUMN1);
+        t1.put(p);
+      }
+
+      CopyTable copy = new CopyTable();
+
+      int code;
+      if (bulkload) {
+        code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+            copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
+            "--bulkload", tableName1.getNameAsString() });
+      } else {
+        code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+            copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
+            tableName1.getNameAsString() });
+      }
+      assertEquals("copy job failed", 0, code);
+
+      // verify the data was copied into table 2
+      for (int i = 0; i < 10; i++) {
+        Get g = new Get(Bytes.toBytes("row" + i));
+        Result r = t2.get(g);
+        assertEquals(1, r.size());
+        assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
+      }
+    } finally {
+      TEST_UTIL.deleteTable(tableName1);
+      TEST_UTIL.deleteTable(tableName2);
+    }
+  }
+
+  /**
+   * Simple end-to-end test
+   * @throws Exception
+   */
+  @Test
+  public void testCopyTable() throws Exception {
+    doCopyTableTest(false);
+  }
+
+  /**
+   * Simple end-to-end test with bulkload.
+   */
+  @Test
+  public void testCopyTableWithBulkload() throws Exception {
+    doCopyTableTest(true);
+  }
+
+  @Test
+  public void testStartStopRow() throws Exception {
+    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+    final byte[] FAMILY = Bytes.toBytes("family");
+    final byte[] COLUMN1 = Bytes.toBytes("c1");
+    final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
+    final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+    final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
+
+    Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+    Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
+
+    // put rows into the first table
+    Put p = new Put(ROW0);
+    p.addColumn(FAMILY, COLUMN1, COLUMN1);
+    t1.put(p);
+    p = new Put(ROW1);
+    p.addColumn(FAMILY, COLUMN1, COLUMN1);
+    t1.put(p);
+    p = new Put(ROW2);
+    p.addColumn(FAMILY, COLUMN1, COLUMN1);
+    t1.put(p);
+
+    CopyTable copy = new CopyTable();
+    assertEquals(
+      0,
+      ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+        copy, new String[] { "--new.name=" + tableName2, "--startrow=\\x01row1",
+            "--stoprow=\\x01row2", tableName1.getNameAsString() }));
+
+    // verify the data was copied into table 2
+    // row1 exist, row0, row2 do not exist
+    Get g = new Get(ROW1);
+    Result r = t2.get(g);
+    assertEquals(1, r.size());
+    assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
+
+    g = new Get(ROW0);
+    r = t2.get(g);
+    assertEquals(0, r.size());
+
+    g = new Get(ROW2);
+    r = t2.get(g);
+    assertEquals(0, r.size());
+
+    t1.close();
+    t2.close();
+    TEST_UTIL.deleteTable(tableName1);
+    TEST_UTIL.deleteTable(tableName2);
+  }
+
+  /**
+   * Test copy of table from sourceTable to targetTable all rows from family a
+   */
+  @Test
+  public void testRenameFamily() throws Exception {
+    final TableName sourceTable = TableName.valueOf(name.getMethodName() + "source");
+    final TableName targetTable = TableName.valueOf(name.getMethodName() + "-target");
+
+    byte[][] families = { FAMILY_A, FAMILY_B };
+
+    Table t = TEST_UTIL.createTable(sourceTable, families);
+    Table t2 = TEST_UTIL.createTable(targetTable, families);
+    Put p = new Put(ROW1);
+    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data11"));
+    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data12"));
+    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data13"));
+    t.put(p);
+    p = new Put(ROW2);
+    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Dat21"));
+    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data22"));
+    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data23"));
+    t.put(p);
+
+    long currentTime = System.currentTimeMillis();
+    String[] args = new String[] { "--new.name=" + targetTable, "--families=a:b", "--all.cells",
+        "--starttime=" + (currentTime - 100000), "--endtime=" + (currentTime + 100000),
+        "--versions=1", sourceTable.getNameAsString() };
+    assertNull(t2.get(new Get(ROW1)).getRow());
+
+    assertTrue(runCopy(args));
+
+    assertNotNull(t2.get(new Get(ROW1)).getRow());
+    Result res = t2.get(new Get(ROW1));
+    byte[] b1 = res.getValue(FAMILY_B, QUALIFIER);
+    assertEquals("Data13", new String(b1));
+    assertNotNull(t2.get(new Get(ROW2)).getRow());
+    res = t2.get(new Get(ROW2));
+    b1 = res.getValue(FAMILY_A, QUALIFIER);
+    // Data from the family of B is not copied
+    assertNull(b1);
+
+  }
+
+  /**
+   * Test main method of CopyTable.
+   */
+  @Test
+  public void testMainMethod() throws Exception {
+    String[] emptyArgs = { "-h" };
+    PrintStream oldWriter = System.err;
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    PrintStream writer = new PrintStream(data);
+    System.setErr(writer);
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    try {
+      CopyTable.main(emptyArgs);
+      fail("should be exit");
+    } catch (SecurityException e) {
+      assertEquals(1, newSecurityManager.getExitCode());
+    } finally {
+      System.setErr(oldWriter);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+    assertTrue(data.toString().contains("rs.class"));
+    // should print usage information
+    assertTrue(data.toString().contains("Usage:"));
+  }
+
+  private boolean runCopy(String[] args) throws Exception {
+    int status = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()), new CopyTable(),
+        args);
+    return status == 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
new file mode 100644
index 0000000..7e36602
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import static org.mockito.Mockito.*;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestGroupingTableMapper {
+
+  /**
+   * Test GroupingTableMapper class
+   */
+  @Test
+  public void testGroupingTableMapper() throws Exception {
+
+    GroupingTableMapper mapper = new GroupingTableMapper();
+    Configuration configuration = new Configuration();
+    configuration.set(GroupingTableMapper.GROUP_COLUMNS, "family1:clm family2:clm");
+    mapper.setConf(configuration);
+
+    Result result = mock(Result.class);
+    @SuppressWarnings("unchecked")
+    Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context =
+        mock(Mapper.Context.class);
+    context.write(any(ImmutableBytesWritable.class), any(Result.class));
+    List<Cell> keyValue = new ArrayList<>();
+    byte[] row = {};
+    keyValue.add(new KeyValue(row, Bytes.toBytes("family2"), Bytes.toBytes("clm"), Bytes
+        .toBytes("value1")));
+    keyValue.add(new KeyValue(row, Bytes.toBytes("family1"), Bytes.toBytes("clm"), Bytes
+        .toBytes("value2")));
+    when(result.listCells()).thenReturn(keyValue);
+    mapper.map(null, result, context);
+    // template data
+    byte[][] data = { Bytes.toBytes("value1"), Bytes.toBytes("value2") };
+    ImmutableBytesWritable ibw = mapper.createGroupKey(data);
+    verify(context).write(ibw, result);
+  }
+
+}

[36/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..4331c0f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
+import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
+import org.apache.hadoop.hbase.util.ConfigurationUtil;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Shared implementation of mapreduce code over multiple table snapshots.
+ * Utilized by both mapreduce ({@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormat} and mapred
+ * ({@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
+ */
+@InterfaceAudience.LimitedPrivate({ "HBase" })
+@InterfaceStability.Evolving
+public class MultiTableSnapshotInputFormatImpl {
+
+  private static final Log LOG = LogFactory.getLog(MultiTableSnapshotInputFormatImpl.class);
+
+  public static final String RESTORE_DIRS_KEY =
+      "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
+  public static final String SNAPSHOT_TO_SCANS_KEY =
+      "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";
+
+  /**
+   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
+   * restoreDir.
+   * Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY}
+   *
+   * @param conf
+   * @param snapshotScans
+   * @param restoreDir
+   * @throws IOException
+   */
+  public void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
+      Path restoreDir) throws IOException {
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+
+    setSnapshotToScans(conf, snapshotScans);
+    Map<String, Path> restoreDirs =
+        generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir);
+    setSnapshotDirs(conf, restoreDirs);
+    restoreSnapshots(conf, restoreDirs, fs);
+  }
+
+  /**
+   * Return the list of splits extracted from the scans/snapshots pushed to conf by
+   * {@link
+   * #setInput(org.apache.hadoop.conf.Configuration, java.util.Map, org.apache.hadoop.fs.Path)}
+   *
+   * @param conf Configuration to determine splits from
+   * @return Return the list of splits extracted from the scans/snapshots pushed to conf
+   * @throws IOException
+   */
+  public List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Configuration conf)
+      throws IOException {
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+
+    List<TableSnapshotInputFormatImpl.InputSplit> rtn = Lists.newArrayList();
+
+    Map<String, Collection<Scan>> snapshotsToScans = getSnapshotsToScans(conf);
+    Map<String, Path> snapshotsToRestoreDirs = getSnapshotDirs(conf);
+    for (Map.Entry<String, Collection<Scan>> entry : snapshotsToScans.entrySet()) {
+      String snapshotName = entry.getKey();
+
+      Path restoreDir = snapshotsToRestoreDirs.get(snapshotName);
+
+      SnapshotManifest manifest =
+          TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs);
+      List<HRegionInfo> regionInfos =
+          TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest);
+
+      for (Scan scan : entry.getValue()) {
+        List<TableSnapshotInputFormatImpl.InputSplit> splits =
+            TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf);
+        rtn.addAll(splits);
+      }
+    }
+    return rtn;
+  }
+
+  /**
+   * Retrieve the snapshot name -&gt; list&lt;scan&gt; mapping pushed to configuration by
+   * {@link #setSnapshotToScans(org.apache.hadoop.conf.Configuration, java.util.Map)}
+   *
+   * @param conf Configuration to extract name -&gt; list&lt;scan&gt; mappings from.
+   * @return the snapshot name -&gt; list&lt;scan&gt; mapping pushed to configuration
+   * @throws IOException
+   */
+  public Map<String, Collection<Scan>> getSnapshotsToScans(Configuration conf) throws IOException {
+
+    Map<String, Collection<Scan>> rtn = Maps.newHashMap();
+
+    for (Map.Entry<String, String> entry : ConfigurationUtil
+        .getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) {
+      String snapshotName = entry.getKey();
+      String scan = entry.getValue();
+
+      Collection<Scan> snapshotScans = rtn.get(snapshotName);
+      if (snapshotScans == null) {
+        snapshotScans = Lists.newArrayList();
+        rtn.put(snapshotName, snapshotScans);
+      }
+
+      snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan));
+    }
+
+    return rtn;
+  }
+
+  /**
+   * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY})
+   *
+   * @param conf
+   * @param snapshotScans
+   * @throws IOException
+   */
+  public void setSnapshotToScans(Configuration conf, Map<String, Collection<Scan>> snapshotScans)
+      throws IOException {
+    // flatten out snapshotScans for serialization to the job conf
+    List<Map.Entry<String, String>> snapshotToSerializedScans = Lists.newArrayList();
+
+    for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
+      String snapshotName = entry.getKey();
+      Collection<Scan> scans = entry.getValue();
+
+      // serialize all scans and map them to the appropriate snapshot
+      for (Scan scan : scans) {
+        snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry<>(snapshotName,
+            TableMapReduceUtil.convertScanToString(scan)));
+      }
+    }
+
+    ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans);
+  }
+
+  /**
+   * Retrieve the directories into which snapshots have been restored from
+   * ({@link #RESTORE_DIRS_KEY})
+   *
+   * @param conf Configuration to extract restore directories from
+   * @return the directories into which snapshots have been restored from
+   * @throws IOException
+   */
+  public Map<String, Path> getSnapshotDirs(Configuration conf) throws IOException {
+    List<Map.Entry<String, String>> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY);
+    Map<String, Path> rtn = Maps.newHashMapWithExpectedSize(kvps.size());
+
+    for (Map.Entry<String, String> kvp : kvps) {
+      rtn.put(kvp.getKey(), new Path(kvp.getValue()));
+    }
+
+    return rtn;
+  }
+
+  public void setSnapshotDirs(Configuration conf, Map<String, Path> snapshotDirs) {
+    Map<String, String> toSet = Maps.newHashMap();
+
+    for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
+      toSet.put(entry.getKey(), entry.getValue().toString());
+    }
+
+    ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet());
+  }
+
+  /**
+   * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and
+   * return a map from the snapshot to the restore directory.
+   *
+   * @param snapshots      collection of snapshot names to restore
+   * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored
+   * @return a mapping from snapshot name to the directory in which that snapshot has been restored
+   */
+  private Map<String, Path> generateSnapshotToRestoreDirMapping(Collection<String> snapshots,
+      Path baseRestoreDir) {
+    Map<String, Path> rtn = Maps.newHashMap();
+
+    for (String snapshotName : snapshots) {
+      Path restoreSnapshotDir =
+          new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString());
+      rtn.put(snapshotName, restoreSnapshotDir);
+    }
+
+    return rtn;
+  }
+
+  /**
+   * Restore each (snapshot name, restore directory) pair in snapshotToDir
+   *
+   * @param conf          configuration to restore with
+   * @param snapshotToDir mapping from snapshot names to restore directories
+   * @param fs            filesystem to do snapshot restoration on
+   * @throws IOException
+   */
+  public void restoreSnapshots(Configuration conf, Map<String, Path> snapshotToDir, FileSystem fs)
+      throws IOException {
+    // TODO: restore from record readers to parallelize.
+    Path rootDir = FSUtils.getRootDir(conf);
+
+    for (Map.Entry<String, Path> entry : snapshotToDir.entrySet()) {
+      String snapshotName = entry.getKey();
+      Path restoreDir = entry.getValue();
+      LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir
+          + " for MultiTableSnapshotInputFormat");
+      restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs);
+    }
+  }
+
+  void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir,
+      FileSystem fs) throws IOException {
+    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
new file mode 100644
index 0000000..a505379
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
@@ -0,0 +1,301 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.util.ReflectionUtils;
+
+
+/**
+ * Multithreaded implementation for @link org.apache.hbase.mapreduce.TableMapper
+ * <p>
+ * It can be used instead when the Map operation is not CPU
+ * bound in order to improve throughput.
+ * <p>
+ * Mapper implementations using this MapRunnable must be thread-safe.
+ * <p>
+ * The Map-Reduce job has to be configured with the mapper to use via
+ * {@link #setMapperClass} and the number of thread the thread-pool can use with the
+ * {@link #getNumberOfThreads} method. The default value is 10 threads.
+ * <p>
+ */
+
+public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
+  private static final Log LOG = LogFactory.getLog(MultithreadedTableMapper.class);
+  private Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> mapClass;
+  private Context outer;
+  private ExecutorService executor;
+  public static final String NUMBER_OF_THREADS = "hbase.mapreduce.multithreadedmapper.threads";
+  public static final String MAPPER_CLASS = "hbase.mapreduce.multithreadedmapper.mapclass";
+
+  /**
+   * The number of threads in the thread pool that will run the map function.
+   * @param job the job
+   * @return the number of threads
+   */
+  public static int getNumberOfThreads(JobContext job) {
+    return job.getConfiguration().
+        getInt(NUMBER_OF_THREADS, 10);
+  }
+
+  /**
+   * Set the number of threads in the pool for running maps.
+   * @param job the job to modify
+   * @param threads the new number of threads
+   */
+  public static void setNumberOfThreads(Job job, int threads) {
+    job.getConfiguration().setInt(NUMBER_OF_THREADS,
+        threads);
+  }
+
+  /**
+   * Get the application's mapper class.
+   * @param <K2> the map's output key type
+   * @param <V2> the map's output value type
+   * @param job the job
+   * @return the mapper class to run
+   */
+  @SuppressWarnings("unchecked")
+  public static <K2,V2>
+  Class<Mapper<ImmutableBytesWritable, Result,K2,V2>> getMapperClass(JobContext job) {
+    return (Class<Mapper<ImmutableBytesWritable, Result,K2,V2>>)
+        job.getConfiguration().getClass( MAPPER_CLASS,
+            Mapper.class);
+  }
+
+  /**
+   * Set the application's mapper class.
+   * @param <K2> the map output key type
+   * @param <V2> the map output value type
+   * @param job the job to modify
+   * @param cls the class to use as the mapper
+   */
+  public static <K2,V2>
+  void setMapperClass(Job job,
+      Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
+    if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
+      throw new IllegalArgumentException("Can't have recursive " +
+          "MultithreadedTableMapper instances.");
+    }
+    job.getConfiguration().setClass(MAPPER_CLASS,
+        cls, Mapper.class);
+  }
+
+  /**
+   * Run the application's maps using a thread pool.
+   */
+  @Override
+  public void run(Context context) throws IOException, InterruptedException {
+    outer = context;
+    int numberOfThreads = getNumberOfThreads(context);
+    mapClass = getMapperClass(context);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Configuring multithread runner to use " + numberOfThreads +
+          " threads");
+    }
+    executor = Executors.newFixedThreadPool(numberOfThreads);
+    for(int i=0; i < numberOfThreads; ++i) {
+      MapRunner thread = new MapRunner(context);
+      executor.execute(thread);
+    }
+    executor.shutdown();
+    while (!executor.isTerminated()) {
+      // wait till all the threads are done
+      Thread.sleep(1000);
+    }
+  }
+
+  private class SubMapRecordReader
+  extends RecordReader<ImmutableBytesWritable, Result> {
+    private ImmutableBytesWritable key;
+    private Result value;
+    private Configuration conf;
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return 0;
+    }
+
+    @Override
+    public void initialize(InputSplit split,
+        TaskAttemptContext context
+        ) throws IOException, InterruptedException {
+      conf = context.getConfiguration();
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      synchronized (outer) {
+        if (!outer.nextKeyValue()) {
+          return false;
+        }
+        key = ReflectionUtils.copy(outer.getConfiguration(),
+            outer.getCurrentKey(), key);
+        value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
+        return true;
+      }
+    }
+
+    public ImmutableBytesWritable getCurrentKey() {
+      return key;
+    }
+
+    @Override
+    public Result getCurrentValue() {
+      return value;
+    }
+  }
+
+  private class SubMapRecordWriter extends RecordWriter<K2,V2> {
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException,
+    InterruptedException {
+    }
+
+    @Override
+    public void write(K2 key, V2 value) throws IOException,
+    InterruptedException {
+      synchronized (outer) {
+        outer.write(key, value);
+      }
+    }
+  }
+
+  private class SubMapStatusReporter extends StatusReporter {
+
+    @Override
+    public Counter getCounter(Enum<?> name) {
+      return outer.getCounter(name);
+    }
+
+    @Override
+    public Counter getCounter(String group, String name) {
+      return outer.getCounter(group, name);
+    }
+
+    @Override
+    public void progress() {
+      outer.progress();
+    }
+
+    @Override
+    public void setStatus(String status) {
+      outer.setStatus(status);
+    }
+
+    public float getProgress() {
+      return 0;
+    }
+  }
+
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
+      justification="Don't understand why FB is complaining about this one. We do throw exception")
+  private class MapRunner implements Runnable {
+    private Mapper<ImmutableBytesWritable, Result, K2,V2> mapper;
+    private Context subcontext;
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    MapRunner(Context context) throws IOException, InterruptedException {
+      mapper = ReflectionUtils.newInstance(mapClass,
+          context.getConfiguration());
+      try {
+        Constructor c = context.getClass().getConstructor(
+          Mapper.class,
+          Configuration.class,
+          TaskAttemptID.class,
+          RecordReader.class,
+          RecordWriter.class,
+          OutputCommitter.class,
+          StatusReporter.class,
+          InputSplit.class);
+        c.setAccessible(true);
+        subcontext = (Context) c.newInstance(
+          mapper,
+          outer.getConfiguration(),
+          outer.getTaskAttemptID(),
+          new SubMapRecordReader(),
+          new SubMapRecordWriter(),
+          context.getOutputCommitter(),
+          new SubMapStatusReporter(),
+          outer.getInputSplit());
+      } catch (Exception e) {
+        try {
+          Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
+            Configuration.class,
+            TaskAttemptID.class,
+            RecordReader.class,
+            RecordWriter.class,
+            OutputCommitter.class,
+            StatusReporter.class,
+            InputSplit.class);
+          c.setAccessible(true);
+          MapContext mc = (MapContext) c.newInstance(
+            outer.getConfiguration(),
+            outer.getTaskAttemptID(),
+            new SubMapRecordReader(),
+            new SubMapRecordWriter(),
+            context.getOutputCommitter(),
+            new SubMapStatusReporter(),
+            outer.getInputSplit());
+          Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+          Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
+          subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
+        } catch (Exception ee) { // FindBugs: REC_CATCH_EXCEPTION
+          // rethrow as IOE
+          throw new IOException(e);
+        }
+      }
+    }
+
+    @Override
+    public void run() {
+      try {
+        mapper.run(subcontext);
+      } catch (Throwable ie) {
+        LOG.error("Problem in running map.", ie);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
new file mode 100644
index 0000000..d5faab5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class MutationSerialization implements Serialization<Mutation> {
+  @Override
+  public boolean accept(Class<?> c) {
+    return Mutation.class.isAssignableFrom(c);
+  }
+
+  @Override
+  public Deserializer<Mutation> getDeserializer(Class<Mutation> c) {
+    return new MutationDeserializer();
+  }
+
+  @Override
+  public Serializer<Mutation> getSerializer(Class<Mutation> c) {
+    return new MutationSerializer();
+  }
+
+  private static class MutationDeserializer implements Deserializer<Mutation> {
+    private InputStream in;
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public Mutation deserialize(Mutation mutation) throws IOException {
+      MutationProto proto = MutationProto.parseDelimitedFrom(in);
+      return ProtobufUtil.toMutation(proto);
+    }
+
+    @Override
+    public void open(InputStream in) throws IOException {
+      this.in = in;
+    }
+
+  }
+  private static class MutationSerializer implements Serializer<Mutation> {
+    private OutputStream out;
+
+    @Override
+    public void close() throws IOException {
+      out.close();
+    }
+
+    @Override
+    public void open(OutputStream out) throws IOException {
+      this.out = out;
+    }
+
+    @Override
+    public void serialize(Mutation mutation) throws IOException {
+      MutationType type;
+      if (mutation instanceof Put) {
+        type = MutationType.PUT;
+      } else if (mutation instanceof Delete) {
+        type = MutationType.DELETE;
+      } else {
+        throw new IllegalArgumentException("Only Put and Delete are supported");
+      }
+      ProtobufUtil.toMutation(type, mutation).writeDelimitedTo(out);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
new file mode 100644
index 0000000..f01e84f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
@@ -0,0 +1,98 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Combine Puts. Merges Put instances grouped by <code>K</code> into a single
+ * instance.
+ * @see TableMapReduceUtil
+ */
+@InterfaceAudience.Public
+public class PutCombiner<K> extends Reducer<K, Put, K, Put> {
+  private static final Log LOG = LogFactory.getLog(PutCombiner.class);
+
+  @Override
+  protected void reduce(K row, Iterable<Put> vals, Context context)
+      throws IOException, InterruptedException {
+    // Using HeapSize to create an upper bound on the memory size of
+    // the puts and flush some portion of the content while looping. This
+    // flush could result in multiple Puts for a single rowkey. That is
+    // acceptable because Combiner is run as an optimization and it's not
+    // critical that all Puts are grouped perfectly.
+    long threshold = context.getConfiguration().getLong(
+        "putcombiner.row.threshold", 1L * (1<<30));
+    int cnt = 0;
+    long curSize = 0;
+    Put put = null;
+    Map<byte[], List<Cell>> familyMap = null;
+    for (Put p : vals) {
+      cnt++;
+      if (put == null) {
+        put = p;
+        familyMap = put.getFamilyCellMap();
+      } else {
+        for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap()
+            .entrySet()) {
+          List<Cell> cells = familyMap.get(entry.getKey());
+          List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
+          for (Cell cell : entry.getValue()) {
+            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+            curSize += kv.heapSize();
+            if (kvs != null) {
+              kvs.add(kv);
+            }
+          }
+          if (cells == null) {
+            familyMap.put(entry.getKey(), entry.getValue());
+          }
+        }
+        if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
+        if (curSize > threshold) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
+          }
+          context.write(row, put);
+          put = null;
+          curSize = 0;
+          cnt = 0;
+        }
+      }
+    }
+    if (put != null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
+      }
+      context.write(row, put);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
new file mode 100644
index 0000000..17ab9cb
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
@@ -0,0 +1,147 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.TagUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Emits sorted Puts.
+ * Reads in all Puts from passed Iterator, sorts them, then emits
+ * Puts in sorted order.  If lots of columns per row, it will use lots of
+ * memory sorting.
+ * @see HFileOutputFormat2
+ * @see KeyValueSortReducer
+ */
+@InterfaceAudience.Public
+public class PutSortReducer extends
+    Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
+  // the cell creator
+  private CellCreator kvCreator;
+
+  @Override
+  protected void
+      setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
+          throws IOException, InterruptedException {
+    Configuration conf = context.getConfiguration();
+    this.kvCreator = new CellCreator(conf);
+  }
+
+  @Override
+  protected void reduce(
+      ImmutableBytesWritable row,
+      java.lang.Iterable<Put> puts,
+      Reducer<ImmutableBytesWritable, Put,
+              ImmutableBytesWritable, KeyValue>.Context context)
+      throws java.io.IOException, InterruptedException
+  {
+    // although reduce() is called per-row, handle pathological case
+    long threshold = context.getConfiguration().getLong(
+        "putsortreducer.row.threshold", 1L * (1<<30));
+    Iterator<Put> iter = puts.iterator();
+    while (iter.hasNext()) {
+      TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
+      long curSize = 0;
+      // stop at the end or the RAM threshold
+      List<Tag> tags = new ArrayList<>();
+      while (iter.hasNext() && curSize < threshold) {
+        // clear the tags
+        tags.clear();
+        Put p = iter.next();
+        long t = p.getTTL();
+        if (t != Long.MAX_VALUE) {
+          // add TTL tag if found
+          tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
+        }
+        byte[] acl = p.getACL();
+        if (acl != null) {
+          // add ACL tag if found
+          tags.add(new ArrayBackedTag(TagType.ACL_TAG_TYPE, acl));
+        }
+        try {
+          CellVisibility cellVisibility = p.getCellVisibility();
+          if (cellVisibility != null) {
+            // add the visibility labels if any
+            tags.addAll(kvCreator.getVisibilityExpressionResolver()
+                .createVisibilityExpTags(cellVisibility.getExpression()));
+          }
+        } catch (DeserializationException e) {
+          // We just throw exception here. Should we allow other mutations to proceed by
+          // just ignoring the bad one?
+          throw new IOException("Invalid visibility expression found in mutation " + p, e);
+        }
+        for (List<Cell> cells: p.getFamilyCellMap().values()) {
+          for (Cell cell: cells) {
+            // Creating the KV which needs to be directly written to HFiles. Using the Facade
+            // KVCreator for creation of kvs.
+            KeyValue kv = null;
+            TagUtil.carryForwardTags(tags, cell);
+            if (!tags.isEmpty()) {
+              kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
+                cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
+                cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
+                cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
+                cell.getValueOffset(), cell.getValueLength(), tags);
+            } else {
+              kv = KeyValueUtil.ensureKeyValue(cell);
+            }
+            if (map.add(kv)) {// don't count duplicated kv into size
+              curSize += kv.heapSize();
+            }
+          }
+        }
+      }
+      context.setStatus("Read " + map.size() + " entries of " + map.getClass()
+          + "(" + StringUtils.humanReadableInt(curSize) + ")");
+      int index = 0;
+      for (KeyValue kv : map) {
+        context.write(row, kv);
+        if (++index % 100 == 0)
+          context.setStatus("Wrote " + index);
+      }
+
+      // if we have more entries to process
+      if (iter.hasNext()) {
+        // force flush because we cannot guarantee intra-row sorted order
+        context.write(null, null);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
new file mode 100644
index 0000000..f14cd90
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Computes size of each region for given table and given column families.
+ * The value is used by MapReduce for better scheduling.
+ * */
+@InterfaceAudience.Private
+public class RegionSizeCalculator {
+
+  private static final Log LOG = LogFactory.getLog(RegionSizeCalculator.class);
+
+  /**
+   * Maps each region to its size in bytes.
+   * */
+  private final Map<byte[], Long> sizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+
+  static final String ENABLE_REGIONSIZECALCULATOR = "hbase.regionsizecalculator.enable";
+  private static final long MEGABYTE = 1024L * 1024L;
+
+  /**
+   * Computes size of each region for table and given column families.
+   * */
+  public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) throws IOException {
+    init(regionLocator, admin);
+  }
+
+  private void init(RegionLocator regionLocator, Admin admin)
+      throws IOException {
+    if (!enabled(admin.getConfiguration())) {
+      LOG.info("Region size calculation disabled.");
+      return;
+    }
+
+    if (regionLocator.getName().isSystemTable()) {
+      LOG.info("Region size calculation disabled for system tables.");
+      return;
+    }
+
+    LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\".");
+
+    // Get the servers which host regions of the table
+    Set<ServerName> tableServers = getRegionServersOfTable(regionLocator);
+
+    for (ServerName tableServerName : tableServers) {
+      Map<byte[], RegionLoad> regionLoads =
+          admin.getRegionLoad(tableServerName, regionLocator.getName());
+      for (RegionLoad regionLoad : regionLoads.values()) {
+
+        byte[] regionId = regionLoad.getName();
+        long regionSizeBytes = regionLoad.getStorefileSizeMB() * MEGABYTE;
+        sizeMap.put(regionId, regionSizeBytes);
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes);
+        }
+      }
+    }
+    LOG.debug("Region sizes calculated");
+  }
+
+  private Set<ServerName> getRegionServersOfTable(RegionLocator regionLocator)
+      throws IOException {
+
+    Set<ServerName> tableServers = Sets.newHashSet();
+    for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) {
+      tableServers.add(regionLocation.getServerName());
+    }
+    return tableServers;
+  }
+
+  boolean enabled(Configuration configuration) {
+    return configuration.getBoolean(ENABLE_REGIONSIZECALCULATOR, true);
+  }
+
+  /**
+   * Returns size of given region in bytes. Returns 0 if region was not found.
+   * */
+  public long getRegionSize(byte[] regionId) {
+    Long size = sizeMap.get(regionId);
+    if (size == null) {
+      LOG.debug("Unknown region:" + Arrays.toString(regionId));
+      return 0;
+    } else {
+      return size;
+    }
+  }
+
+  public Map<byte[], Long> getRegionSizeMap() {
+    return Collections.unmodifiableMap(sizeMap);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
new file mode 100644
index 0000000..dff04b6
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class ResultSerialization extends Configured implements Serialization<Result> {
+  private static final Log LOG = LogFactory.getLog(ResultSerialization.class);
+  // The following configuration property indicates import file format version.
+  public static final String IMPORT_FORMAT_VER = "hbase.import.version";
+
+  @Override
+  public boolean accept(Class<?> c) {
+    return Result.class.isAssignableFrom(c);
+  }
+
+  @Override
+  public Deserializer<Result> getDeserializer(Class<Result> c) {
+    // check input format version
+    Configuration conf = getConf();
+    if (conf != null) {
+      String inputVersion = conf.get(IMPORT_FORMAT_VER);
+      if (inputVersion != null && inputVersion.equals("0.94")) {
+        LOG.info("Load exported file using deserializer for HBase 0.94 format");
+        return new Result94Deserializer();
+      }
+    }
+
+    return new ResultDeserializer();
+  }
+
+  @Override
+  public Serializer<Result> getSerializer(Class<Result> c) {
+    return new ResultSerializer();
+  }
+
+  /**
+   * The following deserializer class is used to load exported file of 0.94
+   */
+  private static class Result94Deserializer implements Deserializer<Result> {
+    private DataInputStream in;
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public Result deserialize(Result mutation) throws IOException {
+      int totalBuffer = in.readInt();
+      if (totalBuffer == 0) {
+        return Result.EMPTY_RESULT;
+      }
+      byte[] buf = new byte[totalBuffer];
+      readChunked(in, buf, 0, totalBuffer);
+      List<Cell> kvs = new ArrayList<>();
+      int offset = 0;
+      while (offset < totalBuffer) {
+        int keyLength = Bytes.toInt(buf, offset);
+        offset += Bytes.SIZEOF_INT;
+        kvs.add(new KeyValue(buf, offset, keyLength));
+        offset += keyLength;
+      }
+      return Result.create(kvs);
+    }
+
+    @Override
+    public void open(InputStream in) throws IOException {
+      if (!(in instanceof DataInputStream)) {
+        throw new IOException("Wrong input stream instance passed in");
+      }
+      this.in = (DataInputStream) in;
+    }
+
+    private void readChunked(final DataInput in, byte[] dest, int ofs, int len) throws IOException {
+      int maxRead = 8192;
+
+      for (; ofs < len; ofs += maxRead)
+        in.readFully(dest, ofs, Math.min(len - ofs, maxRead));
+    }
+  }
+
+  private static class ResultDeserializer implements Deserializer<Result> {
+    private InputStream in;
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public Result deserialize(Result mutation) throws IOException {
+      ClientProtos.Result proto = ClientProtos.Result.parseDelimitedFrom(in);
+      return ProtobufUtil.toResult(proto);
+    }
+
+    @Override
+    public void open(InputStream in) throws IOException {
+      this.in = in;
+    }
+  }
+
+  private static class ResultSerializer implements Serializer<Result> {
+    private OutputStream out;
+
+    @Override
+    public void close() throws IOException {
+      out.close();
+    }
+
+    @Override
+    public void open(OutputStream out) throws IOException {
+      this.out = out;
+    }
+
+    @Override
+    public void serialize(Result result) throws IOException {
+      ProtobufUtil.toResult(result).writeDelimitedTo(out);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
new file mode 100644
index 0000000..2e0591e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -0,0 +1,265 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a just a map phase to count rows. Map outputs table rows IF the
+ * input row has columns that have content.
+ */
+@InterfaceAudience.Public
+public class RowCounter extends Configured implements Tool {
+
+  private static final Log LOG = LogFactory.getLog(RowCounter.class);
+
+  /** Name of this 'program'. */
+  static final String NAME = "rowcounter";
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+  private final static String EXPECTED_COUNT_KEY = RowCounter.class.getName() + ".expected_count";
+
+  /**
+   * Mapper that runs the count.
+   */
+  static class RowCounterMapper
+  extends TableMapper<ImmutableBytesWritable, Result> {
+
+    /** Counter enumeration to count the actual rows. */
+    public static enum Counters {ROWS}
+
+    /**
+     * Maps the data.
+     *
+     * @param row  The current table row key.
+     * @param values  The columns.
+     * @param context  The current context.
+     * @throws IOException When something is broken with the data.
+     * @see org.apache.hadoop.mapreduce.Mapper#map(Object, Object, Context)
+     */
+    @Override
+    public void map(ImmutableBytesWritable row, Result values,
+      Context context)
+    throws IOException {
+      // Count every row containing data, whether it's in qualifiers or values
+      context.getCounter(Counters.ROWS).increment(1);
+    }
+  }
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param conf  The current configuration.
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public static Job createSubmittableJob(Configuration conf, String[] args)
+  throws IOException {
+    String tableName = args[0];
+    List<MultiRowRangeFilter.RowRange> rowRangeList = null;
+    long startTime = 0;
+    long endTime = 0;
+
+    StringBuilder sb = new StringBuilder();
+
+    final String rangeSwitch = "--range=";
+    final String startTimeArgKey = "--starttime=";
+    final String endTimeArgKey = "--endtime=";
+    final String expectedCountArg = "--expected-count=";
+
+    // First argument is table name, starting from second
+    for (int i = 1; i < args.length; i++) {
+      if (args[i].startsWith(rangeSwitch)) {
+        try {
+          rowRangeList = parseRowRangeParameter(args[i], rangeSwitch);
+        } catch (IllegalArgumentException e) {
+          return null;
+        }
+        continue;
+      }
+      if (args[i].startsWith(startTimeArgKey)) {
+        startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
+        continue;
+      }
+      if (args[i].startsWith(endTimeArgKey)) {
+        endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
+        continue;
+      }
+      if (args[i].startsWith(expectedCountArg)) {
+        conf.setLong(EXPECTED_COUNT_KEY,
+            Long.parseLong(args[i].substring(expectedCountArg.length())));
+        continue;
+      }
+      // if no switch, assume column names
+      sb.append(args[i]);
+      sb.append(" ");
+    }
+    if (endTime < startTime) {
+      printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
+      return null;
+    }
+
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJarByClass(RowCounter.class);
+    Scan scan = new Scan();
+    scan.setCacheBlocks(false);
+    setScanFilter(scan, rowRangeList);
+    if (sb.length() > 0) {
+      for (String columnName : sb.toString().trim().split(" ")) {
+        String family = StringUtils.substringBefore(columnName, ":");
+        String qualifier = StringUtils.substringAfter(columnName, ":");
+
+        if (StringUtils.isBlank(qualifier)) {
+          scan.addFamily(Bytes.toBytes(family));
+        }
+        else {
+          scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
+        }
+      }
+    }
+    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
+    job.setOutputFormatClass(NullOutputFormat.class);
+    TableMapReduceUtil.initTableMapperJob(tableName, scan,
+      RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
+    job.setNumReduceTasks(0);
+    return job;
+  }
+
+  private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(
+    String arg, String rangeSwitch) {
+    final String[] ranges = arg.substring(rangeSwitch.length()).split(";");
+    final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
+    for (String range : ranges) {
+      String[] startEnd = range.split(",", 2);
+      if (startEnd.length != 2 || startEnd[1].contains(",")) {
+        printUsage("Please specify range in such format as \"--range=a,b\" " +
+            "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
+        throw new IllegalArgumentException("Wrong range specification: " + range);
+      }
+      String startKey = startEnd[0];
+      String endKey = startEnd[1];
+      rangeList.add(new MultiRowRangeFilter.RowRange(
+        Bytes.toBytesBinary(startKey), true,
+        Bytes.toBytesBinary(endKey), false));
+    }
+    return rangeList;
+  }
+
+  /**
+   * Sets filter {@link FilterBase} to the {@link Scan} instance.
+   * If provided rowRangeList contains more than one element,
+   * method sets filter which is instance of {@link MultiRowRangeFilter}.
+   * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}.
+   * If rowRangeList contains exactly one element, startRow and stopRow are set to the scan.
+   * @param scan
+   * @param rowRangeList
+   */
+  private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) {
+    final int size = rowRangeList == null ? 0 : rowRangeList.size();
+    if (size <= 1) {
+      scan.setFilter(new FirstKeyOnlyFilter());
+    }
+    if (size == 1) {
+      MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
+      scan.setStartRow(range.getStartRow()); //inclusive
+      scan.setStopRow(range.getStopRow());   //exclusive
+    } else if (size > 1) {
+      scan.setFilter(new MultiRowRangeFilter(rowRangeList));
+    }
+  }
+
+  /*
+   * @param errorMessage Can attach a message when error occurs.
+   */
+  private static void printUsage(String errorMessage) {
+    System.err.println("ERROR: " + errorMessage);
+    printUsage();
+  }
+
+  /**
+   * Prints usage without error message.
+   * Note that we don't document --expected-count, because it's intended for test.
+   */
+  private static void printUsage() {
+    System.err.println("Usage: RowCounter [options] <tablename> " +
+        "[--starttime=[start] --endtime=[end] " +
+        "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
+    System.err.println("For performance consider the following options:\n"
+        + "-Dhbase.client.scanner.caching=100\n"
+        + "-Dmapreduce.map.speculative=false");
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 1) {
+      printUsage("Wrong number of parameters: " + args.length);
+      return -1;
+    }
+    Job job = createSubmittableJob(getConf(), args);
+    if (job == null) {
+      return -1;
+    }
+    boolean success = job.waitForCompletion(true);
+    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
+    if (success && expectedCount != -1) {
+      final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
+      success = expectedCount == counter.getValue();
+      if (!success) {
+        LOG.error("Failing job because count of '" + counter.getValue() +
+            "' does not match expected count of '" + expectedCount + "'");
+      }
+    }
+    return (success ? 0 : 1);
+  }
+
+  /**
+   * Main entry point.
+   * @param args The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
+    System.exit(errCode);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
new file mode 100644
index 0000000..01a919c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
@@ -0,0 +1,143 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * A partitioner that takes start and end keys and uses bigdecimal to figure
+ * which reduce a key belongs to.  Pass the start and end
+ * keys in the Configuration using <code>hbase.simpletotalorder.start</code>
+ * and <code>hbase.simpletotalorder.end</code>.  The end key needs to be
+ * exclusive; i.e. one larger than the biggest key in your key space.
+ * You may be surprised at how this class partitions the space; it may not
+ * align with preconceptions; e.g. a start key of zero and an end key of 100
+ * divided in ten will not make regions whose range is 0-10, 10-20, and so on.
+ * Make your own partitioner if you need the region spacing to come out a
+ * particular way.
+ * @param <VALUE>
+ * @see #START
+ * @see #END
+ */
+@InterfaceAudience.Public
+public class SimpleTotalOrderPartitioner<VALUE> extends Partitioner<ImmutableBytesWritable, VALUE>
+implements Configurable {
+  private final static Log LOG = LogFactory.getLog(SimpleTotalOrderPartitioner.class);
+
+  @Deprecated
+  public static final String START = "hbase.simpletotalorder.start";
+  @Deprecated
+  public static final String END = "hbase.simpletotalorder.end";
+
+  static final String START_BASE64 = "hbase.simpletotalorder.start.base64";
+  static final String END_BASE64 = "hbase.simpletotalorder.end.base64";
+
+  private Configuration c;
+  private byte [] startkey;
+  private byte [] endkey;
+  private byte [][] splits;
+  private int lastReduces = -1;
+
+  public static void setStartKey(Configuration conf, byte[] startKey) {
+    conf.set(START_BASE64, Base64.encodeBytes(startKey));
+  }
+
+  public static void setEndKey(Configuration conf, byte[] endKey) {
+    conf.set(END_BASE64, Base64.encodeBytes(endKey));
+  }
+
+  @SuppressWarnings("deprecation")
+  static byte[] getStartKey(Configuration conf) {
+    return getKeyFromConf(conf, START_BASE64, START);
+  }
+
+  @SuppressWarnings("deprecation")
+  static byte[] getEndKey(Configuration conf) {
+    return getKeyFromConf(conf, END_BASE64, END);
+  }
+
+  private static byte[] getKeyFromConf(Configuration conf,
+      String base64Key, String deprecatedKey) {
+    String encoded = conf.get(base64Key);
+    if (encoded != null) {
+      return Base64.decode(encoded);
+    }
+    String oldStyleVal = conf.get(deprecatedKey);
+    if (oldStyleVal == null) {
+      return null;
+    }
+    LOG.warn("Using deprecated configuration " + deprecatedKey +
+        " - please use static accessor methods instead.");
+    return Bytes.toBytesBinary(oldStyleVal);
+  }
+
+  @Override
+  public int getPartition(final ImmutableBytesWritable key, final VALUE value,
+      final int reduces) {
+    if (reduces == 1) return 0;
+    if (this.lastReduces != reduces) {
+      this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
+      for (int i = 0; i < splits.length; i++) {
+        LOG.info(Bytes.toStringBinary(splits[i]));
+      }
+      this.lastReduces = reduces;
+    }
+    int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
+      key.getLength());
+    // Below code is from hfile index search.
+    if (pos < 0) {
+      pos++;
+      pos *= -1;
+      if (pos == 0) {
+        // falls before the beginning of the file.
+        throw new RuntimeException("Key outside start/stop range: " +
+          key.toString());
+      }
+      pos--;
+    }
+    return pos;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return this.c;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.c = conf;
+    this.startkey = getStartKey(conf);
+    this.endkey = getEndKey(conf);
+    if (startkey == null || endkey == null) {
+      throw new RuntimeException(this.getClass() + " not configured");
+    }
+    LOG.info("startkey=" + Bytes.toStringBinary(startkey) +
+        ", endkey=" + Bytes.toStringBinary(endkey));
+    // Reset last reduces count on change of Start / End key
+    this.lastReduces = -1;
+  }
+}

[40/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
new file mode 100644
index 0000000..9811a97
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
@@ -0,0 +1,313 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a
+ * byte[] of input columns and optionally a {@link Filter}.
+ * Subclasses may use other TableRecordReader implementations.
+ *
+ * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
+ * function properly. Each of the entry points to this class used by the MapReduce framework,
+ * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
+ * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information. If your subclass overrides either of these
+ * methods, either call the parent version or call initialize yourself.
+ *
+ * <p>
+ * An example of a subclass:
+ * <pre>
+ *   class ExampleTIF extends TableInputFormatBase {
+ *
+ *     {@literal @}Override
+ *     protected void initialize(JobConf context) throws IOException {
+ *       // We are responsible for the lifecycle of this connection until we hand it over in
+ *       // initializeTable.
+ *       Connection connection =
+ *          ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ *       TableName tableName = TableName.valueOf("exampleTable");
+ *       // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
+ *       initializeTable(connection, tableName);
+ *       byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ *         Bytes.toBytes("columnB") };
+ *       // mandatory
+ *       setInputColumns(inputColumns);
+ *       // optional, by default we'll get everything for the given columns.
+ *       Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ *       setRowFilter(exampleFilter);
+ *     }
+ *   }
+ * </pre>
+ */
+
+@InterfaceAudience.Public
+public abstract class TableInputFormatBase
+implements InputFormat<ImmutableBytesWritable, Result> {
+  private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
+  private byte [][] inputColumns;
+  private Table table;
+  private RegionLocator regionLocator;
+  private Connection connection;
+  private TableRecordReader tableRecordReader;
+  private Filter rowFilter;
+
+  private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
+      "initialized. Ensure you call initializeTable either in your constructor or initialize " +
+      "method";
+  private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
+            " previous error. Please look at the previous logs lines from" +
+            " the task's full log for more details.";
+
+  /**
+   * Builds a TableRecordReader. If no TableRecordReader was provided, uses
+   * the default.
+   *
+   * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
+   *      JobConf, Reporter)
+   */
+  public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
+      InputSplit split, JobConf job, Reporter reporter)
+  throws IOException {
+    // In case a subclass uses the deprecated approach or calls initializeTable directly
+    if (table == null) {
+      initialize(job);
+    }
+    // null check in case our child overrides getTable to not throw.
+    try {
+      if (getTable() == null) {
+        // initialize() must not have been implemented in the subclass.
+        throw new IOException(INITIALIZATION_ERROR);
+      }
+    } catch (IllegalStateException exception) {
+      throw new IOException(INITIALIZATION_ERROR, exception);
+    }
+
+    TableSplit tSplit = (TableSplit) split;
+    // if no table record reader was provided use default
+    final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :
+        this.tableRecordReader;
+    trr.setStartRow(tSplit.getStartRow());
+    trr.setEndRow(tSplit.getEndRow());
+    trr.setHTable(this.table);
+    trr.setInputColumns(this.inputColumns);
+    trr.setRowFilter(this.rowFilter);
+    trr.init();
+    return new RecordReader<ImmutableBytesWritable, Result>() {
+
+      @Override
+      public void close() throws IOException {
+        trr.close();
+        closeTable();
+      }
+
+      @Override
+      public ImmutableBytesWritable createKey() {
+        return trr.createKey();
+      }
+
+      @Override
+      public Result createValue() {
+        return trr.createValue();
+      }
+
+      @Override
+      public long getPos() throws IOException {
+        return trr.getPos();
+      }
+
+      @Override
+      public float getProgress() throws IOException {
+        return trr.getProgress();
+      }
+
+      @Override
+      public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
+        return trr.next(key, value);
+      }
+    };
+  }
+
+  /**
+   * Calculates the splits that will serve as input for the map tasks.
+   *
+   * Splits are created in number equal to the smallest between numSplits and
+   * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table.
+   * If the number of splits is smaller than the number of
+   * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across
+   * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s
+   * and are grouped the most evenly possible. In the
+   * case splits are uneven the bigger splits are placed first in the
+   * {@link InputSplit} array.
+   *
+   * @param job the map task {@link JobConf}
+   * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
+   *
+   * @return the input splits
+   *
+   * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
+   */
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    if (this.table == null) {
+      initialize(job);
+    }
+    // null check in case our child overrides getTable to not throw.
+    try {
+      if (getTable() == null) {
+        // initialize() must not have been implemented in the subclass.
+        throw new IOException(INITIALIZATION_ERROR);
+      }
+    } catch (IllegalStateException exception) {
+      throw new IOException(INITIALIZATION_ERROR, exception);
+    }
+
+    byte [][] startKeys = this.regionLocator.getStartKeys();
+    if (startKeys == null || startKeys.length == 0) {
+      throw new IOException("Expecting at least one region");
+    }
+    if (this.inputColumns == null || this.inputColumns.length == 0) {
+      throw new IOException("Expecting at least one column");
+    }
+    int realNumSplits = numSplits > startKeys.length? startKeys.length:
+      numSplits;
+    InputSplit[] splits = new InputSplit[realNumSplits];
+    int middle = startKeys.length / realNumSplits;
+    int startPos = 0;
+    for (int i = 0; i < realNumSplits; i++) {
+      int lastPos = startPos + middle;
+      lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
+      String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).
+        getHostname();
+      splits[i] = new TableSplit(this.table.getName(),
+        startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
+          HConstants.EMPTY_START_ROW, regionLocation);
+      LOG.info("split: " + i + "->" + splits[i]);
+      startPos = lastPos;
+    }
+    return splits;
+  }
+
+  /**
+   * Allows subclasses to initialize the table information.
+   *
+   * @param connection  The Connection to the HBase cluster. MUST be unmanaged. We will close.
+   * @param tableName  The {@link TableName} of the table to process.
+   * @throws IOException
+   */
+  protected void initializeTable(Connection connection, TableName tableName) throws IOException {
+    if (this.table != null || this.connection != null) {
+      LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
+          "reference; TableInputFormatBase will not close these old references when done.");
+    }
+    this.table = connection.getTable(tableName);
+    this.regionLocator = connection.getRegionLocator(tableName);
+    this.connection = connection;
+  }
+
+  /**
+   * @param inputColumns to be passed in {@link Result} to the map task.
+   */
+  protected void setInputColumns(byte [][] inputColumns) {
+    this.inputColumns = inputColumns;
+  }
+
+  /**
+   * Allows subclasses to get the {@link Table}.
+   */
+  protected Table getTable() {
+    if (table == null) {
+      throw new IllegalStateException(NOT_INITIALIZED);
+    }
+    return this.table;
+  }
+
+  /**
+   * Allows subclasses to set the {@link TableRecordReader}.
+   *
+   * @param tableRecordReader
+   *                to provide other {@link TableRecordReader} implementations.
+   */
+  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+    this.tableRecordReader = tableRecordReader;
+  }
+
+  /**
+   * Allows subclasses to set the {@link Filter} to be used.
+   *
+   * @param rowFilter
+   */
+  protected void setRowFilter(Filter rowFilter) {
+    this.rowFilter = rowFilter;
+  }
+
+  /**
+   * Handle subclass specific set up.
+   * Each of the entry points used by the MapReduce framework,
+   * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
+   * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
+   * retrieving the necessary configuration information and calling
+   * {@link #initializeTable(Connection, TableName)}.
+   *
+   * Subclasses should implement their initialize call such that it is safe to call multiple times.
+   * The current TableInputFormatBase implementation relies on a non-null table reference to decide
+   * if an initialize call is needed, but this behavior may change in the future. In particular,
+   * it is critical that initializeTable not be called multiple times since this will leak
+   * Connection instances.
+   *
+   */
+  protected void initialize(JobConf job) throws IOException {
+  }
+
+  /**
+   * Close the Table and related objects that were initialized via
+   * {@link #initializeTable(Connection, TableName)}.
+   *
+   * @throws IOException
+   */
+  protected void closeTable() throws IOException {
+    close(table, connection);
+    table = null;
+    connection = null;
+  }
+
+  private void close(Closeable... closables) throws IOException {
+    for (Closeable c : closables) {
+      if(c != null) { c.close(); }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
new file mode 100644
index 0000000..a9f1e61
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.Mapper;
+
+/**
+ * Scan an HBase table to sort by a specified sort column.
+ * If the column does not exist, the record is not passed to Reduce.
+ *
+ * @param <K> WritableComparable key class
+ * @param <V> Writable value class
+ */
+@InterfaceAudience.Public
+public interface TableMap<K extends WritableComparable<? super K>, V>
+extends Mapper<ImmutableBytesWritable, Result, K, V> {
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
new file mode 100644
index 0000000..63ec418
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
@@ -0,0 +1,376 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
+import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.token.TokenUtil;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Utility for {@link TableMap} and {@link TableReduce}
+ */
+@InterfaceAudience.Public
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class TableMapReduceUtil {
+
+  /**
+   * Use this before submitting a TableMap job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The table name to read from.
+   * @param columns  The columns to scan.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job configuration to adjust.
+   */
+  public static void initTableMapJob(String table, String columns,
+    Class<? extends TableMap> mapper,
+    Class<?> outputKeyClass,
+    Class<?> outputValueClass, JobConf job) {
+    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
+      true, TableInputFormat.class);
+  }
+
+  public static void initTableMapJob(String table, String columns,
+    Class<? extends TableMap> mapper,
+    Class<?> outputKeyClass,
+    Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
+    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
+      addDependencyJars, TableInputFormat.class);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The table name to read from.
+   * @param columns  The columns to scan.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job configuration to adjust.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   */
+  public static void initTableMapJob(String table, String columns,
+    Class<? extends TableMap> mapper,
+    Class<?> outputKeyClass,
+    Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
+    Class<? extends InputFormat> inputFormat) {
+
+    job.setInputFormat(inputFormat);
+    job.setMapOutputValueClass(outputValueClass);
+    job.setMapOutputKeyClass(outputKeyClass);
+    job.setMapperClass(mapper);
+    job.setStrings("io.serializations", job.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName());
+    FileInputFormat.addInputPaths(job, table);
+    job.set(TableInputFormat.COLUMN_LIST, columns);
+    if (addDependencyJars) {
+      try {
+        addDependencyJars(job);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+    try {
+      initCredentials(job);
+    } catch (IOException ioe) {
+      // just spit out the stack trace?  really?
+      ioe.printStackTrace();
+    }
+  }
+
+  /**
+   * Sets up the job for reading from one or more multiple table snapshots, with one or more scans
+   * per snapshot.
+   * It bypasses hbase servers and read directly from snapshot files.
+   *
+   * @param snapshotScans     map of snapshot name to scans on that snapshot.
+   * @param mapper            The mapper class to use.
+   * @param outputKeyClass    The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job               The current job to adjust.  Make sure the passed job is
+   *                          carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *                          job classes via the distributed cache (tmpjars).
+   */
+  public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
+      Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
+      JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
+    MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
+
+    job.setInputFormat(MultiTableSnapshotInputFormat.class);
+    if (outputValueClass != null) {
+      job.setMapOutputValueClass(outputValueClass);
+    }
+    if (outputKeyClass != null) {
+      job.setMapOutputKeyClass(outputKeyClass);
+    }
+    job.setMapperClass(mapper);
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
+
+    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
+  }
+
+  /**
+   * Sets up the job for reading from a table snapshot. It bypasses hbase servers
+   * and read directly from snapshot files.
+   *
+   * @param snapshotName The name of the snapshot (of a table) to read from.
+   * @param columns  The columns to scan.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
+   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+   * After the job is finished, restore directory can be deleted.
+   * @throws IOException When setting up the details fails.
+   * @see TableSnapshotInputFormat
+   */
+  public static void initTableSnapshotMapJob(String snapshotName, String columns,
+      Class<? extends TableMap> mapper,
+      Class<?> outputKeyClass,
+      Class<?> outputValueClass, JobConf job,
+      boolean addDependencyJars, Path tmpRestoreDir)
+  throws IOException {
+    TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
+    initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
+      addDependencyJars, TableSnapshotInputFormat.class);
+    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job configuration to adjust.
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReduceJob(String table,
+    Class<? extends TableReduce> reducer, JobConf job)
+  throws IOException {
+    initTableReduceJob(table, reducer, job, null);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job configuration to adjust.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReduceJob(String table,
+    Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
+  throws IOException {
+    initTableReduceJob(table, reducer, job, partitioner, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job configuration to adjust.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReduceJob(String table,
+    Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
+    boolean addDependencyJars) throws IOException {
+    job.setOutputFormat(TableOutputFormat.class);
+    job.setReducerClass(reducer);
+    job.set(TableOutputFormat.OUTPUT_TABLE, table);
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(Put.class);
+    job.setStrings("io.serializations", job.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName());
+    if (partitioner == HRegionPartitioner.class) {
+      job.setPartitionerClass(HRegionPartitioner.class);
+      int regions =
+        MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+      if (job.getNumReduceTasks() > regions) {
+        job.setNumReduceTasks(regions);
+      }
+    } else if (partitioner != null) {
+      job.setPartitionerClass(partitioner);
+    }
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
+    initCredentials(job);
+  }
+
+  public static void initCredentials(JobConf job) throws IOException {
+    UserProvider userProvider = UserProvider.instantiate(job);
+    if (userProvider.isHadoopSecurityEnabled()) {
+      // propagate delegation related props from launcher job to MR job
+      if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+        job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+      }
+    }
+
+    if (userProvider.isHBaseSecurityEnabled()) {
+      Connection conn = ConnectionFactory.createConnection(job);
+      try {
+        // login the server principal (if using secure Hadoop)
+        User user = userProvider.getCurrent();
+        TokenUtil.addTokenForJob(conn, job, user);
+      } catch (InterruptedException ie) {
+        ie.printStackTrace();
+        Thread.currentThread().interrupt();
+      } finally {
+        conn.close();
+      }
+    }
+  }
+
+  /**
+   * Ensures that the given number of reduce tasks for the given job
+   * configuration does not exceed the number of regions for the given table.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job configuration to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  // Used by tests.
+  public static void limitNumReduceTasks(String table, JobConf job)
+  throws IOException {
+    int regions =
+      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+    if (job.getNumReduceTasks() > regions)
+      job.setNumReduceTasks(regions);
+  }
+
+  /**
+   * Ensures that the given number of map tasks for the given job
+   * configuration does not exceed the number of regions for the given table.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job configuration to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  // Used by tests.
+  public static void limitNumMapTasks(String table, JobConf job)
+  throws IOException {
+    int regions =
+      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+    if (job.getNumMapTasks() > regions)
+      job.setNumMapTasks(regions);
+  }
+
+  /**
+   * Sets the number of reduce tasks for the given job configuration to the
+   * number of regions the given table has.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job configuration to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  public static void setNumReduceTasks(String table, JobConf job)
+  throws IOException {
+    job.setNumReduceTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
+      TableName.valueOf(table)));
+  }
+
+  /**
+   * Sets the number of map tasks for the given job configuration to the
+   * number of regions the given table has.
+   *
+   * @param table  The table to get the region count for.
+   * @param job  The current job configuration to adjust.
+   * @throws IOException When retrieving the table details fails.
+   */
+  public static void setNumMapTasks(String table, JobConf job)
+  throws IOException {
+    job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
+      TableName.valueOf(table)));
+  }
+
+  /**
+   * Sets the number of rows to return and cache with each scanner iteration.
+   * Higher caching values will enable faster mapreduce jobs at the expense of
+   * requiring more heap to contain the cached rows.
+   *
+   * @param job The current job configuration to adjust.
+   * @param batchSize The number of rows to return in batch with each scanner
+   * iteration.
+   */
+  public static void setScannerCaching(JobConf job, int batchSize) {
+    job.setInt("hbase.client.scanner.caching", batchSize);
+  }
+
+  /**
+   * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
+   */
+  public static void addDependencyJars(JobConf job) throws IOException {
+    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
+    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(
+      job,
+      // when making changes here, consider also mapreduce.TableMapReduceUtil
+      // pull job classes
+      job.getMapOutputKeyClass(),
+      job.getMapOutputValueClass(),
+      job.getOutputKeyClass(),
+      job.getOutputValueClass(),
+      job.getPartitionerClass(),
+      job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
+      job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
+      job.getCombinerClass());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
new file mode 100644
index 0000000..06b28ed
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
@@ -0,0 +1,134 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.InvalidJobConfException;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * Convert Map/Reduce output and write it to an HBase table
+ */
+@InterfaceAudience.Public
+public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable, Put> {
+
+  /** JobConf parameter that specifies the output table */
+  public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
+
+  /**
+   * Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
+   * and write to an HBase table.
+   */
+  protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
+    private BufferedMutator m_mutator;
+    private Connection conn;
+
+
+    /**
+     * Instantiate a TableRecordWriter with the HBase HClient for writing.
+     *
+     * @deprecated Please use {@code #TableRecordWriter(JobConf)}  This version does not clean up
+     * connections and will leak connections (removed in 2.0)
+     */
+    @Deprecated
+    public TableRecordWriter(final BufferedMutator mutator) throws IOException {
+      this.m_mutator = mutator;
+      this.conn = null;
+    }
+
+    /**
+     * Instantiate a TableRecordWriter with a BufferedMutator for batch writing.
+     */
+    public TableRecordWriter(JobConf job) throws IOException {
+      // expecting exactly one path
+      TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
+      try {
+        this.conn = ConnectionFactory.createConnection(job);
+        this.m_mutator = conn.getBufferedMutator(tableName);
+      } finally {
+        if (this.m_mutator == null) {
+          conn.close();
+          conn = null;
+        }
+      }
+    }
+
+    public void close(Reporter reporter) throws IOException {
+      try {
+        if (this.m_mutator != null) {
+          this.m_mutator.close();
+        }
+      } finally {
+        if (conn != null) {
+          this.conn.close();
+        }
+      }
+    }
+
+    public void write(ImmutableBytesWritable key, Put value) throws IOException {
+      m_mutator.mutate(new Put(value));
+    }
+  }
+
+  /**
+   * Creates a new record writer.
+   *
+   * Be aware that the baseline javadoc gives the impression that there is a single
+   * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
+   * RecordWriter per call of this method. You must close the returned RecordWriter when done.
+   * Failure to do so will drop writes.
+   *
+   * @param ignored Ignored filesystem
+   * @param job Current JobConf
+   * @param name Name of the job
+   * @param progress
+   * @return The newly created writer instance.
+   * @throws IOException When creating the writer fails.
+   */
+  @Override
+  public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
+      Progressable progress)
+  throws IOException {
+    // Clear write buffer on fail is true by default so no need to reset it.
+    return new TableRecordWriter(job);
+  }
+
+  @Override
+  public void checkOutputSpecs(FileSystem ignored, JobConf job)
+  throws FileAlreadyExistsException, InvalidJobConfException, IOException {
+    String tableName = job.get(OUTPUT_TABLE);
+    if (tableName == null) {
+      throw new IOException("Must specify table name");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
new file mode 100644
index 0000000..cecef7d
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
@@ -0,0 +1,139 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.RecordReader;
+
+
+/**
+ * Iterate over an HBase table data, return (Text, RowResult) pairs
+ */
+@InterfaceAudience.Public
+public class TableRecordReader
+implements RecordReader<ImmutableBytesWritable, Result> {
+
+  private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
+
+  /**
+   * Restart from survivable exceptions by creating a new scanner.
+   *
+   * @param firstRow
+   * @throws IOException
+   */
+  public void restart(byte[] firstRow) throws IOException {
+    this.recordReaderImpl.restart(firstRow);
+  }
+
+  /**
+   * Build the scanner. Not done in constructor to allow for extension.
+   *
+   * @throws IOException
+   */
+  public void init() throws IOException {
+    this.recordReaderImpl.restart(this.recordReaderImpl.getStartRow());
+  }
+
+  /**
+   * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+   */
+  public void setHTable(Table htable) {
+    this.recordReaderImpl.setHTable(htable);
+  }
+
+  /**
+   * @param inputColumns the columns to be placed in {@link Result}.
+   */
+  public void setInputColumns(final byte [][] inputColumns) {
+    this.recordReaderImpl.setInputColumns(inputColumns);
+  }
+
+  /**
+   * @param startRow the first row in the split
+   */
+  public void setStartRow(final byte [] startRow) {
+    this.recordReaderImpl.setStartRow(startRow);
+  }
+
+  /**
+   *
+   * @param endRow the last row in the split
+   */
+  public void setEndRow(final byte [] endRow) {
+    this.recordReaderImpl.setEndRow(endRow);
+  }
+
+  /**
+   * @param rowFilter the {@link Filter} to be used.
+   */
+  public void setRowFilter(Filter rowFilter) {
+    this.recordReaderImpl.setRowFilter(rowFilter);
+  }
+
+  public void close() {
+    this.recordReaderImpl.close();
+  }
+
+  /**
+   * @return ImmutableBytesWritable
+   *
+   * @see org.apache.hadoop.mapred.RecordReader#createKey()
+   */
+  public ImmutableBytesWritable createKey() {
+    return this.recordReaderImpl.createKey();
+  }
+
+  /**
+   * @return RowResult
+   *
+   * @see org.apache.hadoop.mapred.RecordReader#createValue()
+   */
+  public Result createValue() {
+    return this.recordReaderImpl.createValue();
+  }
+
+  public long getPos() {
+
+    // This should be the ordinal tuple in the range;
+    // not clear how to calculate...
+    return this.recordReaderImpl.getPos();
+  }
+
+  public float getProgress() {
+    // Depends on the total number of tuples and getPos
+    return this.recordReaderImpl.getPos();
+  }
+
+  /**
+   * @param key HStoreKey as input key.
+   * @param value MapWritable as input value
+   * @return true if there was more data
+   * @throws IOException
+   */
+  public boolean next(ImmutableBytesWritable key, Result value)
+  throws IOException {
+    return this.recordReaderImpl.next(key, value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
new file mode 100644
index 0000000..f6b79c3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
@@ -0,0 +1,259 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.ScannerCallable;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.StringUtils;
+
+import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT;
+
+/**
+ * Iterate over an HBase table data, return (Text, RowResult) pairs
+ */
+@InterfaceAudience.Public
+public class TableRecordReaderImpl {
+  private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
+
+  private byte [] startRow;
+  private byte [] endRow;
+  private byte [] lastSuccessfulRow;
+  private Filter trrRowFilter;
+  private ResultScanner scanner;
+  private Table htable;
+  private byte [][] trrInputColumns;
+  private long timestamp;
+  private int rowcount;
+  private boolean logScannerActivity = false;
+  private int logPerRowCount = 100;
+
+  /**
+   * Restart from survivable exceptions by creating a new scanner.
+   *
+   * @param firstRow
+   * @throws IOException
+   */
+  public void restart(byte[] firstRow) throws IOException {
+    Scan currentScan;
+    if ((endRow != null) && (endRow.length > 0)) {
+      if (trrRowFilter != null) {
+        Scan scan = new Scan(firstRow, endRow);
+        TableInputFormat.addColumns(scan, trrInputColumns);
+        scan.setFilter(trrRowFilter);
+        scan.setCacheBlocks(false);
+        this.scanner = this.htable.getScanner(scan);
+        currentScan = scan;
+      } else {
+        LOG.debug("TIFB.restart, firstRow: " +
+            Bytes.toStringBinary(firstRow) + ", endRow: " +
+            Bytes.toStringBinary(endRow));
+        Scan scan = new Scan(firstRow, endRow);
+        TableInputFormat.addColumns(scan, trrInputColumns);
+        this.scanner = this.htable.getScanner(scan);
+        currentScan = scan;
+      }
+    } else {
+      LOG.debug("TIFB.restart, firstRow: " +
+          Bytes.toStringBinary(firstRow) + ", no endRow");
+
+      Scan scan = new Scan(firstRow);
+      TableInputFormat.addColumns(scan, trrInputColumns);
+      scan.setFilter(trrRowFilter);
+      this.scanner = this.htable.getScanner(scan);
+      currentScan = scan;
+    }
+    if (logScannerActivity) {
+      LOG.info("Current scan=" + currentScan.toString());
+      timestamp = System.currentTimeMillis();
+      rowcount = 0;
+    }
+  }
+
+  /**
+   * Build the scanner. Not done in constructor to allow for extension.
+   *
+   * @throws IOException
+   */
+  public void init() throws IOException {
+    restart(startRow);
+  }
+
+  byte[] getStartRow() {
+    return this.startRow;
+  }
+  /**
+   * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+   */
+  public void setHTable(Table htable) {
+    Configuration conf = htable.getConfiguration();
+    logScannerActivity = conf.getBoolean(
+      ScannerCallable.LOG_SCANNER_ACTIVITY, false);
+    logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
+    this.htable = htable;
+  }
+
+  /**
+   * @param inputColumns the columns to be placed in {@link Result}.
+   */
+  public void setInputColumns(final byte [][] inputColumns) {
+    this.trrInputColumns = inputColumns;
+  }
+
+  /**
+   * @param startRow the first row in the split
+   */
+  public void setStartRow(final byte [] startRow) {
+    this.startRow = startRow;
+  }
+
+  /**
+   *
+   * @param endRow the last row in the split
+   */
+  public void setEndRow(final byte [] endRow) {
+    this.endRow = endRow;
+  }
+
+  /**
+   * @param rowFilter the {@link Filter} to be used.
+   */
+  public void setRowFilter(Filter rowFilter) {
+    this.trrRowFilter = rowFilter;
+  }
+
+  public void close() {
+    if (this.scanner != null) {
+      this.scanner.close();
+    }
+    try {
+      this.htable.close();
+    } catch (IOException ioe) {
+      LOG.warn("Error closing table", ioe);
+    }
+  }
+
+  /**
+   * @return ImmutableBytesWritable
+   *
+   * @see org.apache.hadoop.mapred.RecordReader#createKey()
+   */
+  public ImmutableBytesWritable createKey() {
+    return new ImmutableBytesWritable();
+  }
+
+  /**
+   * @return RowResult
+   *
+   * @see org.apache.hadoop.mapred.RecordReader#createValue()
+   */
+  public Result createValue() {
+    return new Result();
+  }
+
+  public long getPos() {
+    // This should be the ordinal tuple in the range;
+    // not clear how to calculate...
+    return 0;
+  }
+
+  public float getProgress() {
+    // Depends on the total number of tuples and getPos
+    return 0;
+  }
+
+  /**
+   * @param key HStoreKey as input key.
+   * @param value MapWritable as input value
+   * @return true if there was more data
+   * @throws IOException
+   */
+  public boolean next(ImmutableBytesWritable key, Result value)
+  throws IOException {
+    Result result;
+    try {
+      try {
+        result = this.scanner.next();
+        if (logScannerActivity) {
+          rowcount ++;
+          if (rowcount >= logPerRowCount) {
+            long now = System.currentTimeMillis();
+            LOG.info("Mapper took " + (now-timestamp)
+              + "ms to process " + rowcount + " rows");
+            timestamp = now;
+            rowcount = 0;
+          }
+        }
+      } catch (IOException e) {
+        // do not retry if the exception tells us not to do so
+        if (e instanceof DoNotRetryIOException) {
+          throw e;
+        }
+        // try to handle all other IOExceptions by restarting
+        // the scanner, if the second call fails, it will be rethrown
+        LOG.debug("recovered from " + StringUtils.stringifyException(e));
+        if (lastSuccessfulRow == null) {
+          LOG.warn("We are restarting the first next() invocation," +
+              " if your mapper has restarted a few other times like this" +
+              " then you should consider killing this job and investigate" +
+              " why it's taking so long.");
+        }
+        if (lastSuccessfulRow == null) {
+          restart(startRow);
+        } else {
+          restart(lastSuccessfulRow);
+          this.scanner.next();    // skip presumed already mapped row
+        }
+        result = this.scanner.next();
+      }
+
+      if (result != null && result.size() > 0) {
+        key.set(result.getRow());
+        lastSuccessfulRow = key.get();
+        value.copyFrom(result);
+        return true;
+      }
+      return false;
+    } catch (IOException ioe) {
+      if (logScannerActivity) {
+        long now = System.currentTimeMillis();
+        LOG.info("Mapper took " + (now-timestamp)
+          + "ms to process " + rowcount + " rows");
+        LOG.info(ioe);
+        String lastRow = lastSuccessfulRow == null ?
+          "null" : Bytes.toStringBinary(lastSuccessfulRow);
+        LOG.info("lastSuccessfulRow=" + lastRow);
+      }
+      throw ioe;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
new file mode 100644
index 0000000..91fb4a1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.Reducer;
+
+/**
+ * Write a table, sorting by the input key
+ *
+ * @param <K> key class
+ * @param <V> value class
+ */
+@InterfaceAudience.Public
+@SuppressWarnings("unchecked")
+public interface TableReduce<K extends WritableComparable, V>
+extends Reducer<K, V, ImmutableBytesWritable, Put> {
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
new file mode 100644
index 0000000..d7b49ff
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
+ * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
+ *
+ * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ */
+@InterfaceAudience.Public
+public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
+
+  public static class TableSnapshotRegionSplit implements InputSplit {
+    private TableSnapshotInputFormatImpl.InputSplit delegate;
+
+    // constructor for mapreduce framework / Writable
+    public TableSnapshotRegionSplit() {
+      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
+    }
+
+    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
+      this.delegate = delegate;
+    }
+
+    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
+        List<String> locations, Scan scan, Path restoreDir) {
+      this.delegate =
+          new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
+    }
+
+    @Override
+    public long getLength() throws IOException {
+      return delegate.getLength();
+    }
+
+    @Override
+    public String[] getLocations() throws IOException {
+      return delegate.getLocations();
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      delegate.write(out);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      delegate.readFields(in);
+    }
+  }
+
+  static class TableSnapshotRecordReader
+    implements RecordReader<ImmutableBytesWritable, Result> {
+
+    private TableSnapshotInputFormatImpl.RecordReader delegate;
+
+    public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
+        throws IOException {
+      delegate = new TableSnapshotInputFormatImpl.RecordReader();
+      delegate.initialize(split.delegate, job);
+    }
+
+    @Override
+    public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
+      if (!delegate.nextKeyValue()) {
+        return false;
+      }
+      ImmutableBytesWritable currentKey = delegate.getCurrentKey();
+      key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
+      value.copyFrom(delegate.getCurrentValue());
+      return true;
+    }
+
+    @Override
+    public ImmutableBytesWritable createKey() {
+      return new ImmutableBytesWritable();
+    }
+
+    @Override
+    public Result createValue() {
+      return new Result();
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return delegate.getPos();
+    }
+
+    @Override
+    public void close() throws IOException {
+      delegate.close();
+    }
+
+    @Override
+    public float getProgress() throws IOException {
+      return delegate.getProgress();
+    }
+  }
+
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    List<TableSnapshotInputFormatImpl.InputSplit> splits =
+      TableSnapshotInputFormatImpl.getSplits(job);
+    InputSplit[] results = new InputSplit[splits.size()];
+    for (int i = 0; i < splits.size(); i++) {
+      results[i] = new TableSnapshotRegionSplit(splits.get(i));
+    }
+    return results;
+  }
+
+  @Override
+  public RecordReader<ImmutableBytesWritable, Result>
+  getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
+  }
+
+  /**
+   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+   * @param job the job to configure
+   * @param snapshotName the name of the snapshot to read from
+   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+   * After the job is finished, restoreDir can be deleted.
+   * @throws IOException if an error occurs
+   */
+  public static void setInput(JobConf job, String snapshotName, Path restoreDir)
+      throws IOException {
+    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
new file mode 100644
index 0000000..0784e5e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
@@ -0,0 +1,154 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * A table split corresponds to a key range [low, high)
+ */
+@InterfaceAudience.Public
+public class TableSplit implements InputSplit, Comparable<TableSplit> {
+  private TableName m_tableName;
+  private byte [] m_startRow;
+  private byte [] m_endRow;
+  private String m_regionLocation;
+
+  /** default constructor */
+  public TableSplit() {
+    this((TableName)null, HConstants.EMPTY_BYTE_ARRAY,
+      HConstants.EMPTY_BYTE_ARRAY, "");
+  }
+
+  /**
+   * Constructor
+   * @param tableName
+   * @param startRow
+   * @param endRow
+   * @param location
+   */
+  public TableSplit(TableName tableName, byte [] startRow, byte [] endRow,
+      final String location) {
+    this.m_tableName = tableName;
+    this.m_startRow = startRow;
+    this.m_endRow = endRow;
+    this.m_regionLocation = location;
+  }
+
+  public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
+      final String location) {
+    this(TableName.valueOf(tableName), startRow, endRow,
+      location);
+  }
+
+  /** @return table name */
+  public TableName getTable() {
+    return this.m_tableName;
+  }
+
+  /** @return table name */
+   public byte [] getTableName() {
+     return this.m_tableName.getName();
+   }
+
+  /** @return starting row key */
+  public byte [] getStartRow() {
+    return this.m_startRow;
+  }
+
+  /** @return end row key */
+  public byte [] getEndRow() {
+    return this.m_endRow;
+  }
+
+  /** @return the region's hostname */
+  public String getRegionLocation() {
+    return this.m_regionLocation;
+  }
+
+  public String[] getLocations() {
+    return new String[] {this.m_regionLocation};
+  }
+
+  public long getLength() {
+    // Not clear how to obtain this... seems to be used only for sorting splits
+    return 0;
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    this.m_tableName = TableName.valueOf(Bytes.readByteArray(in));
+    this.m_startRow = Bytes.readByteArray(in);
+    this.m_endRow = Bytes.readByteArray(in);
+    this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
+  }
+
+  public void write(DataOutput out) throws IOException {
+    Bytes.writeByteArray(out, this.m_tableName.getName());
+    Bytes.writeByteArray(out, this.m_startRow);
+    Bytes.writeByteArray(out, this.m_endRow);
+    Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
+  }
+
+  @Override
+  public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("HBase table split(");
+      sb.append("table name: ").append(m_tableName);
+      sb.append(", start row: ").append(Bytes.toStringBinary(m_startRow));
+      sb.append(", end row: ").append(Bytes.toStringBinary(m_endRow));
+      sb.append(", region location: ").append(m_regionLocation);
+      sb.append(")");
+      return sb.toString();
+  }
+
+  @Override
+  public int compareTo(TableSplit o) {
+    return Bytes.compareTo(getStartRow(), o.getStartRow());
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == null || !(o instanceof TableSplit)) {
+      return false;
+    }
+    TableSplit other = (TableSplit)o;
+    return m_tableName.equals(other.m_tableName) &&
+      Bytes.equals(m_startRow, other.m_startRow) &&
+      Bytes.equals(m_endRow, other.m_endRow) &&
+      m_regionLocation.equals(other.m_regionLocation);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = m_tableName != null ? m_tableName.hashCode() : 0;
+    result = 31 * result + Arrays.hashCode(m_startRow);
+    result = 31 * result + Arrays.hashCode(m_endRow);
+    result = 31 * result + (m_regionLocation != null ? m_regionLocation.hashCode() : 0);
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
new file mode 100644
index 0000000..1da3a52
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
@@ -0,0 +1,26 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
+Input/OutputFormats, a table indexing MapReduce job, and utility methods.
+
+<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
+in the HBase Reference Guide for mapreduce over hbase documentation.
+*/
+package org.apache.hadoop.hbase.mapred;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
new file mode 100644
index 0000000..078033e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
@@ -0,0 +1,333 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+
+
+/**
+ * A job with a a map and reduce phase to count cells in a table.
+ * The counter lists the following stats for a given table:
+ * <pre>
+ * 1. Total number of rows in the table
+ * 2. Total number of CFs across all rows
+ * 3. Total qualifiers across all rows
+ * 4. Total occurrence of each CF
+ * 5. Total occurrence  of each qualifier
+ * 6. Total number of versions of each qualifier.
+ * </pre>
+ *
+ * The cellcounter can take optional parameters to use a user
+ * supplied row/family/qualifier string to use in the report and
+ * second a regex based or prefix based row filter to restrict the
+ * count operation to a limited subset of rows from the table or a
+ * start time and/or end time to limit the count to a time range.
+ */
+@InterfaceAudience.Public
+public class CellCounter extends Configured implements Tool {
+  private static final Log LOG =
+    LogFactory.getLog(CellCounter.class.getName());
+
+
+  /**
+   * Name of this 'program'.
+   */
+  static final String NAME = "CellCounter";
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  /**
+   * Mapper that runs the count.
+   */
+  static class CellCounterMapper
+  extends TableMapper<Text, IntWritable> {
+    /**
+     * Counter enumeration to count the actual rows.
+     */
+    public static enum Counters {
+      ROWS,
+      CELLS
+    }
+
+    private Configuration conf;
+    private String separator;
+
+    // state of current row, family, column needs to persist across map() invocations
+    // in order to properly handle scanner batching, where a single qualifier may have too
+    // many versions for a single map() call
+    private byte[] lastRow;
+    private String currentRowKey;
+    byte[] currentFamily = null;
+    String currentFamilyName = null;
+    byte[] currentQualifier = null;
+    // family + qualifier
+    String currentQualifierName = null;
+    // rowkey + family + qualifier
+    String currentRowQualifierName = null;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      conf = context.getConfiguration();
+      separator = conf.get("ReportSeparator",":");
+    }
+
+    /**
+     * Maps the data.
+     *
+     * @param row     The current table row key.
+     * @param values  The columns.
+     * @param context The current context.
+     * @throws IOException When something is broken with the data.
+     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
+     *      org.apache.hadoop.mapreduce.Mapper.Context)
+     */
+
+    @Override
+    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
+      justification="Findbugs is blind to the Precondition null check")
+    public void map(ImmutableBytesWritable row, Result values,
+                    Context context)
+        throws IOException {
+      Preconditions.checkState(values != null,
+          "values passed to the map is null");
+
+      try {
+        byte[] currentRow = values.getRow();
+        if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {
+          lastRow = currentRow;
+          currentRowKey = Bytes.toStringBinary(currentRow);
+          currentFamily = null;
+          currentQualifier = null;
+          context.getCounter(Counters.ROWS).increment(1);
+          context.write(new Text("Total ROWS"), new IntWritable(1));
+        }
+        if (!values.isEmpty()) {
+          int cellCount = 0;
+          for (Cell value : values.listCells()) {
+            cellCount++;
+            if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {
+              currentFamily = CellUtil.cloneFamily(value);
+              currentFamilyName = Bytes.toStringBinary(currentFamily);
+              currentQualifier = null;
+              context.getCounter("CF", currentFamilyName).increment(1);
+              if (1 == context.getCounter("CF", currentFamilyName).getValue()) {
+                context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
+                context.write(new Text(currentFamily), new IntWritable(1));
+              }
+            }
+            if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {
+              currentQualifier = CellUtil.cloneQualifier(value);
+              currentQualifierName = currentFamilyName + separator +
+                  Bytes.toStringBinary(currentQualifier);
+              currentRowQualifierName = currentRowKey + separator + currentQualifierName;
+
+              context.write(new Text("Total Qualifiers across all Rows"),
+                  new IntWritable(1));
+              context.write(new Text(currentQualifierName), new IntWritable(1));
+            }
+            // Increment versions
+            context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));
+          }
+          context.getCounter(Counters.CELLS).increment(cellCount);
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
+  static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
+      Key, IntWritable> {
+
+    private IntWritable result = new IntWritable();
+    public void reduce(Key key, Iterable<IntWritable> values,
+      Context context)
+    throws IOException, InterruptedException {
+      int sum = 0;
+      for (IntWritable val : values) {
+        sum += val.get();
+      }
+      result.set(sum);
+      context.write(key, result);
+    }
+  }
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param conf The current configuration.
+   * @param args The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public static Job createSubmittableJob(Configuration conf, String[] args)
+      throws IOException {
+    String tableName = args[0];
+    Path outputDir = new Path(args[1]);
+    String reportSeparatorString = (args.length > 2) ? args[2]: ":";
+    conf.set("ReportSeparator", reportSeparatorString);
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJarByClass(CellCounter.class);
+    Scan scan = getConfiguredScanForJob(conf, args);
+    TableMapReduceUtil.initTableMapperJob(tableName, scan,
+        CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
+    job.setNumReduceTasks(1);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(IntWritable.class);
+    FileOutputFormat.setOutputPath(job, outputDir);
+    job.setReducerClass(IntSumReducer.class);
+    return job;
+  }
+
+  private static Scan getConfiguredScanForJob(Configuration conf, String[] args)
+      throws IOException {
+    // create scan with any properties set from TableInputFormat
+    Scan s = TableInputFormat.createScanFromConfiguration(conf);
+    // Set Scan Versions
+    if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {
+      // default to all versions unless explicitly set
+      s.setMaxVersions(Integer.MAX_VALUE);
+    }
+    s.setCacheBlocks(false);
+    // Set RowFilter or Prefix Filter if applicable.
+    Filter rowFilter = getRowFilter(args);
+    if (rowFilter!= null) {
+      LOG.info("Setting Row Filter for counter.");
+      s.setFilter(rowFilter);
+    }
+    // Set TimeRange if defined
+    long timeRange[] = getTimeRange(args);
+    if (timeRange != null) {
+      LOG.info("Setting TimeRange for counter.");
+      s.setTimeRange(timeRange[0], timeRange[1]);
+    }
+    return s;
+  }
+
+
+  private static Filter getRowFilter(String[] args) {
+    Filter rowFilter = null;
+    String filterCriteria = (args.length > 3) ? args[3]: null;
+    if (filterCriteria == null) return null;
+    if (filterCriteria.startsWith("^")) {
+      String regexPattern = filterCriteria.substring(1, filterCriteria.length());
+      rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
+    } else {
+      rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
+    }
+    return rowFilter;
+  }
+
+  private static long[] getTimeRange(String[] args) throws IOException {
+    final String startTimeArgKey = "--starttime=";
+    final String endTimeArgKey = "--endtime=";
+    long startTime = 0L;
+    long endTime = 0L;
+
+    for (int i = 1; i < args.length; i++) {
+      System.out.println("i:" + i + "arg[i]" + args[i]);
+      if (args[i].startsWith(startTimeArgKey)) {
+        startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
+      }
+      if (args[i].startsWith(endTimeArgKey)) {
+        endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
+      }
+    }
+
+    if (startTime == 0 && endTime == 0)
+      return null;
+
+    endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;
+    return new long [] {startTime, endTime};
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("ERROR: Wrong number of parameters: " + args.length);
+      System.err.println("Usage: CellCounter ");
+      System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
+        "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
+      System.err.println("  Note: -D properties will be applied to the conf used. ");
+      System.err.println("  Additionally, all of the SCAN properties from TableInputFormat");
+      System.err.println("  can be specified to get fine grained control on what is counted..");
+      System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
+      System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
+      System.err.println("   -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
+      System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
+      System.err.println("   -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
+      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
+      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
+      System.err.println("   -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
+      System.err.println("   -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
+      System.err.println("   -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
+      System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
+          "string : used to separate the rowId/column family name and qualifier name.");
+      System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
+          "operation to a limited subset of rows from the table based on regex or prefix pattern.");
+      return -1;
+    }
+    Job job = createSubmittableJob(getConf(), args);
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+
+  /**
+   * Main entry point.
+   * @param args The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);
+    System.exit(errCode);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
new file mode 100644
index 0000000..1d4d37b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * Facade to create Cells for HFileOutputFormat. The created Cells are of <code>Put</code> type.
+ */
+@InterfaceAudience.Public
+public class CellCreator {
+
+  public static final String VISIBILITY_EXP_RESOLVER_CLASS =
+      "hbase.mapreduce.visibility.expression.resolver.class";
+
+  private VisibilityExpressionResolver visExpResolver;
+
+  public CellCreator(Configuration conf) {
+    Class<? extends VisibilityExpressionResolver> clazz = conf.getClass(
+        VISIBILITY_EXP_RESOLVER_CLASS, DefaultVisibilityExpressionResolver.class,
+        VisibilityExpressionResolver.class);
+    this.visExpResolver = ReflectionUtils.newInstance(clazz, conf);
+    this.visExpResolver.init();
+  }
+
+  /**
+   * @param row row key
+   * @param roffset row offset
+   * @param rlength row length
+   * @param family family name
+   * @param foffset family offset
+   * @param flength family length
+   * @param qualifier column qualifier
+   * @param qoffset qualifier offset
+   * @param qlength qualifier length
+   * @param timestamp version timestamp
+   * @param value column value
+   * @param voffset value offset
+   * @param vlength value length
+   * @return created Cell
+   * @throws IOException
+   */
+  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+      int vlength) throws IOException {
+    return create(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength,
+        timestamp, value, voffset, vlength, (List<Tag>)null);
+  }
+
+  /**
+   * @param row row key
+   * @param roffset row offset
+   * @param rlength row length
+   * @param family family name
+   * @param foffset family offset
+   * @param flength family length
+   * @param qualifier column qualifier
+   * @param qoffset qualifier offset
+   * @param qlength qualifier length
+   * @param timestamp version timestamp
+   * @param value column value
+   * @param voffset value offset
+   * @param vlength value length
+   * @param visExpression visibility expression to be associated with cell
+   * @return created Cell
+   * @throws IOException
+   */
+  @Deprecated
+  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+      int vlength, String visExpression) throws IOException {
+    List<Tag> visTags = null;
+    if (visExpression != null) {
+      visTags = this.visExpResolver.createVisibilityExpTags(visExpression);
+    }
+    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
+        qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, visTags);
+  }
+
+  /**
+   * @param row row key
+   * @param roffset row offset
+   * @param rlength row length
+   * @param family family name
+   * @param foffset family offset
+   * @param flength family length
+   * @param qualifier column qualifier
+   * @param qoffset qualifier offset
+   * @param qlength qualifier length
+   * @param timestamp version timestamp
+   * @param value column value
+   * @param voffset value offset
+   * @param vlength value length
+   * @param tags
+   * @return created Cell
+   * @throws IOException
+   */
+  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+      int vlength, List<Tag> tags) throws IOException {
+    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
+        qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, tags);
+  }
+
+  /**
+   * @return Visibility expression resolver
+   */
+  public VisibilityExpressionResolver getVisibilityExpressionResolver() {
+    return this.visExpResolver;
+  }
+}

[29/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
new file mode 100644
index 0000000..e669f14
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
@@ -0,0 +1,406 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableSnapshotScanner;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.AbstractHBaseTool;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Stopwatch;
+
+/**
+ * A simple performance evaluation tool for single client and MR scans
+ * and snapshot scans.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class ScanPerformanceEvaluation extends AbstractHBaseTool {
+
+  private static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
+
+  private String type;
+  private String file;
+  private String tablename;
+  private String snapshotName;
+  private String restoreDir;
+  private String caching;
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    Path rootDir;
+    try {
+      rootDir = FSUtils.getRootDir(conf);
+      rootDir.getFileSystem(conf);
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  @Override
+  protected void addOptions() {
+    this.addRequiredOptWithArg("t", "type", "the type of the test. One of the following: streaming|scan|snapshotscan|scanmapreduce|snapshotscanmapreduce");
+    this.addOptWithArg("f", "file", "the filename to read from");
+    this.addOptWithArg("tn", "table", "the tablename to read from");
+    this.addOptWithArg("sn", "snapshot", "the snapshot name to read from");
+    this.addOptWithArg("rs", "restoredir", "the directory to restore the snapshot");
+    this.addOptWithArg("ch", "caching", "scanner caching value");
+  }
+
+  @Override
+  protected void processOptions(CommandLine cmd) {
+    type = cmd.getOptionValue("type");
+    file = cmd.getOptionValue("file");
+    tablename = cmd.getOptionValue("table");
+    snapshotName = cmd.getOptionValue("snapshot");
+    restoreDir = cmd.getOptionValue("restoredir");
+    caching = cmd.getOptionValue("caching");
+  }
+
+  protected void testHdfsStreaming(Path filename) throws IOException {
+    byte[] buf = new byte[1024];
+    FileSystem fs = filename.getFileSystem(getConf());
+
+    // read the file from start to finish
+    Stopwatch fileOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch streamTimer = Stopwatch.createUnstarted();
+
+    fileOpenTimer.start();
+    FSDataInputStream in = fs.open(filename);
+    fileOpenTimer.stop();
+
+    long totalBytes = 0;
+    streamTimer.start();
+    while (true) {
+      int read = in.read(buf);
+      if (read < 0) {
+        break;
+      }
+      totalBytes += read;
+    }
+    streamTimer.stop();
+
+    double throughput = (double)totalBytes / streamTimer.elapsed(TimeUnit.SECONDS);
+
+    System.out.println("HDFS streaming: ");
+    System.out.println("total time to open: " +
+      fileOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to read: " + streamTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total bytes: " + totalBytes + " bytes ("
+        + StringUtils.humanReadableInt(totalBytes) + ")");
+    System.out.println("throghput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+  }
+
+  private Scan getScan() {
+    Scan scan = new Scan(); // default scan settings
+    scan.setCacheBlocks(false);
+    scan.setMaxVersions(1);
+    scan.setScanMetricsEnabled(true);
+    if (caching != null) {
+      scan.setCaching(Integer.parseInt(caching));
+    }
+
+    return scan;
+  }
+
+  public void testScan() throws IOException {
+    Stopwatch tableOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+    tableOpenTimer.start();
+    Connection connection = ConnectionFactory.createConnection(getConf());
+    Table table = connection.getTable(TableName.valueOf(tablename));
+    tableOpenTimer.stop();
+
+    Scan scan = getScan();
+    scanOpenTimer.start();
+    ResultScanner scanner = table.getScanner(scan);
+    scanOpenTimer.stop();
+
+    long numRows = 0;
+    long numCells = 0;
+    scanTimer.start();
+    while (true) {
+      Result result = scanner.next();
+      if (result == null) {
+        break;
+      }
+      numRows++;
+
+      numCells += result.rawCells().length;
+    }
+    scanTimer.stop();
+    scanner.close();
+    table.close();
+    connection.close();
+
+    ScanMetrics metrics = scan.getScanMetrics();
+    long totalBytes = metrics.countOfBytesInResults.get();
+    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+    System.out.println("HBase scan: ");
+    System.out.println("total time to open table: " +
+      tableOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to open scanner: " +
+      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to scan: " +
+      scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+    System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
+
+    System.out.println("total bytes: " + totalBytes + " bytes ("
+        + StringUtils.humanReadableInt(totalBytes) + ")");
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+    System.out.println("total rows  : " + numRows);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+    System.out.println("total cells : " + numCells);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+  }
+
+
+  public void testSnapshotScan() throws IOException {
+    Stopwatch snapshotRestoreTimer = Stopwatch.createUnstarted();
+    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+    Path restoreDir = new Path(this.restoreDir);
+
+    snapshotRestoreTimer.start();
+    restoreDir.getFileSystem(conf).delete(restoreDir, true);
+    snapshotRestoreTimer.stop();
+
+    Scan scan = getScan();
+    scanOpenTimer.start();
+    TableSnapshotScanner scanner = new TableSnapshotScanner(conf, restoreDir, snapshotName, scan);
+    scanOpenTimer.stop();
+
+    long numRows = 0;
+    long numCells = 0;
+    scanTimer.start();
+    while (true) {
+      Result result = scanner.next();
+      if (result == null) {
+        break;
+      }
+      numRows++;
+
+      numCells += result.rawCells().length;
+    }
+    scanTimer.stop();
+    scanner.close();
+
+    ScanMetrics metrics = scanner.getScanMetrics();
+    long totalBytes = metrics.countOfBytesInResults.get();
+    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+    System.out.println("HBase scan snapshot: ");
+    System.out.println("total time to restore snapshot: " +
+      snapshotRestoreTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to open scanner: " +
+      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to scan: " +
+      scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+    System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
+
+    System.out.println("total bytes: " + totalBytes + " bytes ("
+        + StringUtils.humanReadableInt(totalBytes) + ")");
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+    System.out.println("total rows  : " + numRows);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+    System.out.println("total cells : " + numCells);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+
+  }
+
+  public static enum ScanCounter {
+    NUM_ROWS,
+    NUM_CELLS,
+  }
+
+  public static class MyMapper<KEYOUT, VALUEOUT> extends TableMapper<KEYOUT, VALUEOUT> {
+    @Override
+    protected void map(ImmutableBytesWritable key, Result value,
+        Context context) throws IOException,
+        InterruptedException {
+      context.getCounter(ScanCounter.NUM_ROWS).increment(1);
+      context.getCounter(ScanCounter.NUM_CELLS).increment(value.rawCells().length);
+    }
+  }
+
+  public void testScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
+    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+    Scan scan = getScan();
+
+    String jobName = "testScanMapReduce";
+
+    Job job = new Job(conf);
+    job.setJobName(jobName);
+
+    job.setJarByClass(getClass());
+
+    TableMapReduceUtil.initTableMapperJob(
+        this.tablename,
+        scan,
+        MyMapper.class,
+        NullWritable.class,
+        NullWritable.class,
+        job
+    );
+
+    job.setNumReduceTasks(0);
+    job.setOutputKeyClass(NullWritable.class);
+    job.setOutputValueClass(NullWritable.class);
+    job.setOutputFormatClass(NullOutputFormat.class);
+
+    scanTimer.start();
+    job.waitForCompletion(true);
+    scanTimer.stop();
+
+    Counters counters = job.getCounters();
+    long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
+    long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
+
+    long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
+    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+    System.out.println("HBase scan mapreduce: ");
+    System.out.println("total time to open scanner: " +
+      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+    System.out.println("total bytes: " + totalBytes + " bytes ("
+        + StringUtils.humanReadableInt(totalBytes) + ")");
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+    System.out.println("total rows  : " + numRows);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+    System.out.println("total cells : " + numCells);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+  }
+
+  public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
+    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+    Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+    Scan scan = getScan();
+
+    String jobName = "testSnapshotScanMapReduce";
+
+    Job job = new Job(conf);
+    job.setJobName(jobName);
+
+    job.setJarByClass(getClass());
+
+    TableMapReduceUtil.initTableSnapshotMapperJob(
+        this.snapshotName,
+        scan,
+        MyMapper.class,
+        NullWritable.class,
+        NullWritable.class,
+        job,
+        true,
+        new Path(restoreDir)
+    );
+
+    job.setNumReduceTasks(0);
+    job.setOutputKeyClass(NullWritable.class);
+    job.setOutputValueClass(NullWritable.class);
+    job.setOutputFormatClass(NullOutputFormat.class);
+
+    scanTimer.start();
+    job.waitForCompletion(true);
+    scanTimer.stop();
+
+    Counters counters = job.getCounters();
+    long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
+    long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
+
+    long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
+    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+    System.out.println("HBase scan mapreduce: ");
+    System.out.println("total time to open scanner: " +
+      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+    System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+    System.out.println("total bytes: " + totalBytes + " bytes ("
+        + StringUtils.humanReadableInt(totalBytes) + ")");
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+    System.out.println("total rows  : " + numRows);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+    System.out.println("total cells : " + numCells);
+    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+  }
+
+  @Override
+  protected int doWork() throws Exception {
+    if (type.equals("streaming")) {
+      testHdfsStreaming(new Path(file));
+    } else if (type.equals("scan")){
+      testScan();
+    } else if (type.equals("snapshotscan")) {
+      testSnapshotScan();
+    } else if (type.equals("scanmapreduce")) {
+      testScanMapReduce();
+    } else if (type.equals("snapshotscanmapreduce")) {
+      testSnapshotScanMapReduce();
+    }
+    return 0;
+  }
+
+  public static void main (String[] args) throws Exception {
+    int ret = ToolRunner.run(HBaseConfiguration.create(), new ScanPerformanceEvaluation(), args);
+    System.exit(ret);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
new file mode 100644
index 0000000..86a3d3f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import static org.junit.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.NoSuchElementException;
+import java.util.Queue;
+import java.util.Random;
+import java.util.LinkedList;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.PerformanceEvaluation.RandomReadTest;
+import org.apache.hadoop.hbase.PerformanceEvaluation.TestOptions;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.codehaus.jackson.JsonGenerationException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Snapshot;
+import com.codahale.metrics.UniformReservoir;
+
+@Category({MiscTests.class, SmallTests.class})
+public class TestPerformanceEvaluation {
+  private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
+
+  @Test
+  public void testSerialization()
+  throws JsonGenerationException, JsonMappingException, IOException {
+    PerformanceEvaluation.TestOptions options = new PerformanceEvaluation.TestOptions();
+    assertTrue(!options.isAutoFlush());
+    options.setAutoFlush(true);
+    ObjectMapper mapper = new ObjectMapper();
+    String optionsString = mapper.writeValueAsString(options);
+    PerformanceEvaluation.TestOptions optionsDeserialized =
+        mapper.readValue(optionsString, PerformanceEvaluation.TestOptions.class);
+    assertTrue(optionsDeserialized.isAutoFlush());
+  }
+
+  /**
+   * Exercise the mr spec writing.  Simple assertions to make sure it is basically working.
+   * @throws IOException
+   */
+  @Ignore @Test
+  public void testWriteInputFile() throws IOException {
+    TestOptions opts = new PerformanceEvaluation.TestOptions();
+    final int clients = 10;
+    opts.setNumClientThreads(clients);
+    opts.setPerClientRunRows(10);
+    Path dir =
+      PerformanceEvaluation.writeInputFile(HTU.getConfiguration(), opts, HTU.getDataTestDir());
+    FileSystem fs = FileSystem.get(HTU.getConfiguration());
+    Path p = new Path(dir, PerformanceEvaluation.JOB_INPUT_FILENAME);
+    long len = fs.getFileStatus(p).getLen();
+    assertTrue(len > 0);
+    byte [] content = new byte[(int)len];
+    FSDataInputStream dis = fs.open(p);
+    try {
+      dis.readFully(content);
+      BufferedReader br =
+        new BufferedReader(new InputStreamReader(new ByteArrayInputStream(content)));
+      int count = 0;
+      while (br.readLine() != null) {
+        count++;
+      }
+      assertEquals(clients, count);
+    } finally {
+      dis.close();
+    }
+  }
+
+  @Test
+  public void testSizeCalculation() {
+    TestOptions opts = new PerformanceEvaluation.TestOptions();
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    int rows = opts.getPerClientRunRows();
+    // Default row count
+    final int defaultPerClientRunRows = 1024 * 1024;
+    assertEquals(defaultPerClientRunRows, rows);
+    // If size is 2G, then twice the row count.
+    opts.setSize(2.0f);
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
+    // If two clients, then they get half the rows each.
+    opts.setNumClientThreads(2);
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    assertEquals(defaultPerClientRunRows, opts.getPerClientRunRows());
+    // What if valueSize is 'random'? Then half of the valueSize so twice the rows.
+    opts.valueRandom = true;
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
+  }
+
+  @Test
+  public void testRandomReadCalculation() {
+    TestOptions opts = new PerformanceEvaluation.TestOptions();
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    int rows = opts.getPerClientRunRows();
+    // Default row count
+    final int defaultPerClientRunRows = 1024 * 1024;
+    assertEquals(defaultPerClientRunRows, rows);
+    // If size is 2G, then twice the row count.
+    opts.setSize(2.0f);
+    opts.setPerClientRunRows(1000);
+    opts.setCmdName(PerformanceEvaluation.RANDOM_READ);
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    assertEquals(1000, opts.getPerClientRunRows());
+    // If two clients, then they get half the rows each.
+    opts.setNumClientThreads(2);
+    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+    assertEquals(1000, opts.getPerClientRunRows());
+    Random random = new Random();
+    // assuming we will get one before this loop expires
+    boolean foundValue = false;
+    for (int i = 0; i < 10000000; i++) {
+      int randomRow = PerformanceEvaluation.generateRandomRow(random, opts.totalRows);
+      if (randomRow > 1000) {
+        foundValue = true;
+        break;
+      }
+    }
+    assertTrue("We need to get a value more than 1000", foundValue);
+  }
+
+  @Test
+  public void testZipfian()
+  throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException,
+      IllegalArgumentException, InvocationTargetException {
+    TestOptions opts = new PerformanceEvaluation.TestOptions();
+    opts.setValueZipf(true);
+    final int valueSize = 1024;
+    opts.setValueSize(valueSize);
+    RandomReadTest rrt = new RandomReadTest(null, opts, null);
+    Constructor<?> ctor =
+      Histogram.class.getDeclaredConstructor(com.codahale.metrics.Reservoir.class);
+    ctor.setAccessible(true);
+    Histogram histogram = (Histogram)ctor.newInstance(new UniformReservoir(1024 * 500));
+    for (int i = 0; i < 100; i++) {
+      histogram.update(rrt.getValueLength(null));
+    }
+    Snapshot snapshot = histogram.getSnapshot();
+    double stddev = snapshot.getStdDev();
+    assertTrue(stddev != 0 && stddev != 1.0);
+    assertTrue(snapshot.getStdDev() != 0);
+    double median = snapshot.getMedian();
+    assertTrue(median != 0 && median != 1 && median != valueSize);
+  }
+
+  @Test
+  public void testParseOptsWithThreads() {
+    Queue<String> opts = new LinkedList<>();
+    String cmdName = "sequentialWrite";
+    int threads = 1;
+    opts.offer(cmdName);
+    opts.offer(String.valueOf(threads));
+    PerformanceEvaluation.TestOptions options = PerformanceEvaluation.parseOpts(opts);
+    assertNotNull(options);
+    assertNotNull(options.getCmdName());
+    assertEquals(cmdName, options.getCmdName());
+    assertEquals(threads, options.getNumClientThreads());
+  }
+
+  @Test
+  public void testParseOptsWrongThreads() {
+    Queue<String> opts = new LinkedList<>();
+    String cmdName = "sequentialWrite";
+    opts.offer(cmdName);
+    opts.offer("qq");
+    try {
+      PerformanceEvaluation.parseOpts(opts);
+    } catch (IllegalArgumentException e) {
+      System.out.println(e.getMessage());
+      assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
+      assertTrue(e.getCause() instanceof NumberFormatException);
+    }
+  }
+
+  @Test
+  public void testParseOptsNoThreads() {
+    Queue<String> opts = new LinkedList<>();
+    String cmdName = "sequentialWrite";
+    try {
+      PerformanceEvaluation.parseOpts(opts);
+    } catch (IllegalArgumentException e) {
+      System.out.println(e.getMessage());
+      assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
+      assertTrue(e.getCause() instanceof NoSuchElementException);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
new file mode 100644
index 0000000..d085c21
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
@@ -0,0 +1,41 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.util.ProgramDriver;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestDriver {
+
+  @Test
+  public void testDriverMainMethod() throws Throwable {
+    ProgramDriver programDriverMock = mock(ProgramDriver.class);
+    Driver.setProgramDriver(programDriverMock);
+    Driver.main(new String[]{});
+    verify(programDriverMock).driver(Mockito.any(String[].class));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
new file mode 100644
index 0000000..7131cf9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
@@ -0,0 +1,181 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestGroupingTableMap {
+
+  @Test
+  @SuppressWarnings({ "deprecation", "unchecked" })
+  public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes()
+      throws Exception {
+    GroupingTableMap gTableMap = null;
+    try {
+      Result result = mock(Result.class);
+      Reporter reporter = mock(Reporter.class);
+      gTableMap = new GroupingTableMap();
+      Configuration cfg = new Configuration();
+      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+      JobConf jobConf = new JobConf(cfg);
+      gTableMap.configure(jobConf);
+
+      byte[] row = {};
+      List<Cell> keyValues = ImmutableList.<Cell>of(
+          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
+          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")),
+          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
+      when(result.listCells()).thenReturn(keyValues);
+      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+          mock(OutputCollector.class);
+      gTableMap.map(null, result, outputCollectorMock, reporter);
+      verify(result).listCells();
+      verifyZeroInteractions(outputCollectorMock);
+    } finally {
+      if (gTableMap != null)
+        gTableMap.close();
+    }
+  }
+
+  @Test
+  @SuppressWarnings({ "deprecation", "unchecked" })
+  public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
+    GroupingTableMap gTableMap = null;
+    try {
+      Result result = mock(Result.class);
+      Reporter reporter = mock(Reporter.class);
+      gTableMap = new GroupingTableMap();
+      Configuration cfg = new Configuration();
+      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+      JobConf jobConf = new JobConf(cfg);
+      gTableMap.configure(jobConf);
+
+      byte[] row = {};
+      List<Cell> keyValues = ImmutableList.<Cell>of(
+          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
+          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")),
+          new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
+      when(result.listCells()).thenReturn(keyValues);
+      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+          mock(OutputCollector.class);
+      gTableMap.map(null, result, outputCollectorMock, reporter);
+      verify(result).listCells();
+      verify(outputCollectorMock, times(1))
+        .collect(any(ImmutableBytesWritable.class), any(Result.class));
+      verifyNoMoreInteractions(outputCollectorMock);
+    } finally {
+      if (gTableMap != null)
+        gTableMap.close();
+    }
+  }
+
+  @Test
+  @SuppressWarnings({ "deprecation" })
+  public void shouldCreateNewKey() throws Exception {
+    GroupingTableMap gTableMap = null;
+    try {
+      Result result = mock(Result.class);
+      Reporter reporter = mock(Reporter.class);
+      final byte[] bSeparator = Bytes.toBytes(" ");
+      gTableMap = new GroupingTableMap();
+      Configuration cfg = new Configuration();
+      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+      JobConf jobConf = new JobConf(cfg);
+      gTableMap.configure(jobConf);
+
+      final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
+      final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
+      byte[] row = {};
+      List<Cell> cells = ImmutableList.<Cell>of(
+          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), firstPartKeyValue),
+          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), secondPartKeyValue));
+      when(result.listCells()).thenReturn(cells);
+
+      final AtomicBoolean outputCollected = new AtomicBoolean();
+      OutputCollector<ImmutableBytesWritable, Result> outputCollector =
+          new OutputCollector<ImmutableBytesWritable, Result>() {
+        @Override
+        public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
+          assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
+            Bytes.concat(firstPartKeyValue, bSeparator,
+              secondPartKeyValue), arg.copyBytes());
+          outputCollected.set(true);
+        }
+      };
+
+      gTableMap.map(null, result, outputCollector, reporter);
+      verify(result).listCells();
+      Assert.assertTrue("Output not received", outputCollected.get());
+
+      final byte[] firstPartValue = Bytes.toBytes("238947928");
+      final byte[] secondPartValue = Bytes.toBytes("4678456942345");
+      byte[][] data = { firstPartValue, secondPartValue };
+      ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
+      assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
+        Bytes.concat(firstPartValue,
+          bSeparator, secondPartValue), byteWritable.get());
+    } finally {
+      if (gTableMap != null)
+        gTableMap.close();
+    }
+  }
+
+  @Test
+  @SuppressWarnings({ "deprecation" })
+  public void shouldReturnNullFromCreateGroupKey() throws Exception {
+    GroupingTableMap gTableMap = null;
+    try {
+      gTableMap = new GroupingTableMap();
+      assertNull(gTableMap.createGroupKey(null));
+    } finally {
+      if(gTableMap != null)
+        gTableMap.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
new file mode 100644
index 0000000..e222d0b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
@@ -0,0 +1,64 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestIdentityTableMap {
+
+  @Test
+  @SuppressWarnings({ "deprecation", "unchecked" })
+  public void shouldCollectPredefinedTimes() throws IOException {
+    int recordNumber = 999;
+    Result resultMock = mock(Result.class);
+    IdentityTableMap identityTableMap = null;
+    try {
+      Reporter reporterMock = mock(Reporter.class);
+      identityTableMap = new IdentityTableMap();
+      ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class);
+      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+          mock(OutputCollector.class);
+
+      for (int i = 0; i < recordNumber; i++)
+        identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock,
+            reporterMock);
+
+      verify(outputCollectorMock, times(recordNumber)).collect(
+          Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class));
+    } finally {
+      if (identityTableMap != null)
+        identityTableMap.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..665c547
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+@Category({ VerySlowMapReduceTests.class, LargeTests.class })
+public class TestMultiTableSnapshotInputFormat
+    extends org.apache.hadoop.hbase.mapreduce.TestMultiTableSnapshotInputFormat {
+
+  private static final Log LOG = LogFactory.getLog(TestMultiTableSnapshotInputFormat.class);
+
+  @Override
+  protected void runJob(String jobName, Configuration c, List<Scan> scans)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    JobConf job = new JobConf(TEST_UTIL.getConfiguration());
+
+    job.setJobName(jobName);
+    job.setMapperClass(Mapper.class);
+    job.setReducerClass(Reducer.class);
+
+    TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
+        ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
+
+    TableMapReduceUtil.addDependencyJars(job);
+
+    job.setReducerClass(Reducer.class);
+    job.setNumReduceTasks(1); // one to get final "first" and "last" key
+    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+    LOG.info("Started " + job.getJobName());
+
+    RunningJob runningJob = JobClient.runJob(job);
+    runningJob.waitForCompletion();
+    assertTrue(runningJob.isSuccessful());
+    LOG.info("After map/reduce completion - job " + jobName);
+  }
+
+  public static class Mapper extends TestMultiTableSnapshotInputFormat.ScanMapper
+      implements TableMap<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+        OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector,
+        Reporter reporter) throws IOException {
+      makeAssertions(key, value);
+      outputCollector.collect(key, key);
+    }
+
+    /**
+     * Closes this stream and releases any system resources associated
+     * with it. If the stream is already closed then invoking this
+     * method has no effect.
+     *
+     * @throws IOException if an I/O error occurs
+     */
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf jobConf) {
+
+    }
+  }
+
+  public static class Reducer extends TestMultiTableSnapshotInputFormat.ScanReducer implements
+      org.apache.hadoop.mapred.Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+          NullWritable, NullWritable> {
+
+    private JobConf jobConf;
+
+    @Override
+    public void reduce(ImmutableBytesWritable key, Iterator<ImmutableBytesWritable> values,
+        OutputCollector<NullWritable, NullWritable> outputCollector, Reporter reporter)
+        throws IOException {
+      makeAssertions(key, Lists.newArrayList(values));
+    }
+
+    /**
+     * Closes this stream and releases any system resources associated
+     * with it. If the stream is already closed then invoking this
+     * method has no effect.
+     *
+     * @throws IOException if an I/O error occurs
+     */
+    @Override
+    public void close() throws IOException {
+      super.cleanup(this.jobConf);
+    }
+
+    @Override
+    public void configure(JobConf jobConf) {
+      this.jobConf = jobConf;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
new file mode 100644
index 0000000..4ebd8bf
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
@@ -0,0 +1,163 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.RowCounter.RowCounterMapper;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestRowCounter {
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void shouldPrintUsage() throws Exception {
+    String expectedOutput = "rowcounter <outputdir> <tablename> <column1> [<column2>...]";
+    String result = new OutputReader(System.out) {
+      @Override
+      void doRead() {
+        assertEquals(-1, RowCounter.printUsage());
+      }
+    }.read();
+
+    assertTrue(result.startsWith(expectedOutput));
+  }
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void shouldExitAndPrintUsageSinceParameterNumberLessThanThree()
+      throws Exception {
+    final String[] args = new String[] { "one", "two" };
+    String line = "ERROR: Wrong number of parameters: " + args.length;
+    String result = new OutputReader(System.err) {
+      @Override
+      void doRead() throws Exception {
+        assertEquals(-1, new RowCounter().run(args));
+      }
+    }.read();
+
+    assertTrue(result.startsWith(line));
+  }
+
+  @Test
+  @SuppressWarnings({ "deprecation", "unchecked" })
+  public void shouldRegInReportEveryIncomingRow() throws IOException {
+    int iterationNumber = 999;
+    RowCounter.RowCounterMapper mapper = new RowCounter.RowCounterMapper();
+    Reporter reporter = mock(Reporter.class);
+    for (int i = 0; i < iterationNumber; i++)
+      mapper.map(mock(ImmutableBytesWritable.class), mock(Result.class),
+          mock(OutputCollector.class), reporter);
+
+    Mockito.verify(reporter, times(iterationNumber)).incrCounter(
+        any(Enum.class), anyInt());
+  }
+
+  @Test
+  @SuppressWarnings({ "deprecation" })
+  public void shouldCreateAndRunSubmittableJob() throws Exception {
+    RowCounter rCounter = new RowCounter();
+    rCounter.setConf(HBaseConfiguration.create());
+    String[] args = new String[] { "\temp", "tableA", "column1", "column2",
+        "column3" };
+    JobConf jobConfig = rCounter.createSubmittableJob(args);
+
+    assertNotNull(jobConfig);
+    assertEquals(0, jobConfig.getNumReduceTasks());
+    assertEquals("rowcounter", jobConfig.getJobName());
+    assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
+    assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
+    assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
+        .join("column1", "column2", "column3"));
+    assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
+  }
+
+  enum Outs {
+    OUT, ERR
+  }
+
+  private static abstract class OutputReader {
+    private final PrintStream ps;
+    private PrintStream oldPrintStream;
+    private Outs outs;
+
+    protected OutputReader(PrintStream ps) {
+      this.ps = ps;
+    }
+
+    protected String read() throws Exception {
+      ByteArrayOutputStream outBytes = new ByteArrayOutputStream();
+      if (ps == System.out) {
+        oldPrintStream = System.out;
+        outs = Outs.OUT;
+        System.setOut(new PrintStream(outBytes));
+      } else if (ps == System.err) {
+        oldPrintStream = System.err;
+        outs = Outs.ERR;
+        System.setErr(new PrintStream(outBytes));
+      } else {
+        throw new IllegalStateException("OutputReader: unsupported PrintStream");
+      }
+
+      try {
+        doRead();
+        return new String(outBytes.toByteArray());
+      } finally {
+        switch (outs) {
+        case OUT: {
+          System.setOut(oldPrintStream);
+          break;
+        }
+        case ERR: {
+          System.setErr(oldPrintStream);
+          break;
+        }
+        default:
+          throw new IllegalStateException(
+              "OutputReader: unsupported PrintStream");
+        }
+      }
+    }
+
+    abstract void doRead() throws Exception;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
new file mode 100644
index 0000000..2655ac2
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
@@ -0,0 +1,116 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestSplitTable {
+  @Rule
+  public TestName name = new TestName();
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void testSplitTableCompareTo() {
+    TableSplit aTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+        Bytes.toBytes("aaa"), Bytes.toBytes("ddd"), "locationA");
+
+    TableSplit bTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+        Bytes.toBytes("iii"), Bytes.toBytes("kkk"), "locationA");
+
+    TableSplit cTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+        Bytes.toBytes("lll"), Bytes.toBytes("zzz"), "locationA");
+
+    assertTrue(aTableSplit.compareTo(aTableSplit) == 0);
+    assertTrue(bTableSplit.compareTo(bTableSplit) == 0);
+    assertTrue(cTableSplit.compareTo(cTableSplit) == 0);
+
+    assertTrue(aTableSplit.compareTo(bTableSplit) < 0);
+    assertTrue(bTableSplit.compareTo(aTableSplit) > 0);
+
+    assertTrue(aTableSplit.compareTo(cTableSplit) < 0);
+    assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
+
+    assertTrue(bTableSplit.compareTo(cTableSplit) < 0);
+    assertTrue(cTableSplit.compareTo(bTableSplit) > 0);
+
+    assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
+  }
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void testSplitTableEquals() {
+    byte[] tableA = Bytes.toBytes("tableA");
+    byte[] aaa = Bytes.toBytes("aaa");
+    byte[] ddd = Bytes.toBytes("ddd");
+    String locationA = "locationA";
+
+    TableSplit tablesplit = new TableSplit(tableA, aaa, ddd, locationA);
+
+    TableSplit tableB = new TableSplit(Bytes.toBytes("tableB"), aaa, ddd, locationA);
+    assertNotEquals(tablesplit.hashCode(), tableB.hashCode());
+    assertNotEquals(tablesplit, tableB);
+
+    TableSplit startBbb = new TableSplit(tableA, Bytes.toBytes("bbb"), ddd, locationA);
+    assertNotEquals(tablesplit.hashCode(), startBbb.hashCode());
+    assertNotEquals(tablesplit, startBbb);
+
+    TableSplit endEee = new TableSplit(tableA, aaa, Bytes.toBytes("eee"), locationA);
+    assertNotEquals(tablesplit.hashCode(), endEee.hashCode());
+    assertNotEquals(tablesplit, endEee);
+
+    TableSplit locationB = new TableSplit(tableA, aaa, ddd, "locationB");
+    assertNotEquals(tablesplit.hashCode(), locationB.hashCode());
+    assertNotEquals(tablesplit, locationB);
+
+    TableSplit same = new TableSplit(tableA, aaa, ddd, locationA);
+    assertEquals(tablesplit.hashCode(), same.hashCode());
+    assertEquals(tablesplit, same);
+  }
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void testToString() {
+    TableSplit split =
+        new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
+            "location");
+    String str =
+        "HBase table split(table name: " + name.getMethodName() + ", start row: row-start, "
+            + "end row: row-end, region location: location)";
+    Assert.assertEquals(str, split.toString());
+
+    split = new TableSplit((TableName) null, null, null, null);
+    str =
+        "HBase table split(table name: null, start row: null, "
+            + "end row: null, region location: null)";
+    Assert.assertEquals(str, split.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
new file mode 100644
index 0000000..f39a7f5
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
@@ -0,0 +1,460 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * This tests the TableInputFormat and its recovery semantics
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTableInputFormat {
+
+  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
+
+  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  static final byte[] FAMILY = Bytes.toBytes("family");
+
+  private static final byte[][] columns = new byte[][] { FAMILY };
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws IOException {
+    LOG.info("before");
+    UTIL.ensureSomeRegionServersAvailable(1);
+    LOG.info("before done");
+  }
+
+  /**
+   * Setup a table with two rows and values.
+   *
+   * @param tableName
+   * @return
+   * @throws IOException
+   */
+  public static Table createTable(byte[] tableName) throws IOException {
+    return createTable(tableName, new byte[][] { FAMILY });
+  }
+
+  /**
+   * Setup a table with two rows and values per column family.
+   *
+   * @param tableName
+   * @return
+   * @throws IOException
+   */
+  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
+    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
+    Put p = new Put("aaa".getBytes());
+    for (byte[] family : families) {
+      p.addColumn(family, null, "value aaa".getBytes());
+    }
+    table.put(p);
+    p = new Put("bbb".getBytes());
+    for (byte[] family : families) {
+      p.addColumn(family, null, "value bbb".getBytes());
+    }
+    table.put(p);
+    return table;
+  }
+
+  /**
+   * Verify that the result and key have expected values.
+   *
+   * @param r
+   * @param key
+   * @param expectedKey
+   * @param expectedValue
+   * @return
+   */
+  static boolean checkResult(Result r, ImmutableBytesWritable key,
+      byte[] expectedKey, byte[] expectedValue) {
+    assertEquals(0, key.compareTo(expectedKey));
+    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
+    byte[] value = vals.values().iterator().next();
+    assertTrue(Arrays.equals(value, expectedValue));
+    return true; // if succeed
+  }
+
+  /**
+   * Create table data and run tests on specified htable using the
+   * o.a.h.hbase.mapred API.
+   *
+   * @param table
+   * @throws IOException
+   */
+  static void runTestMapred(Table table) throws IOException {
+    org.apache.hadoop.hbase.mapred.TableRecordReader trr =
+        new org.apache.hadoop.hbase.mapred.TableRecordReader();
+    trr.setStartRow("aaa".getBytes());
+    trr.setEndRow("zzz".getBytes());
+    trr.setHTable(table);
+    trr.setInputColumns(columns);
+
+    trr.init();
+    Result r = new Result();
+    ImmutableBytesWritable key = new ImmutableBytesWritable();
+
+    boolean more = trr.next(key, r);
+    assertTrue(more);
+    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
+
+    more = trr.next(key, r);
+    assertTrue(more);
+    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
+
+    // no more data
+    more = trr.next(key, r);
+    assertFalse(more);
+  }
+
+  /**
+   * Create a table that IOE's on first scanner next call
+   *
+   * @throws IOException
+   */
+  static Table createIOEScannerTable(byte[] name, final int failCnt)
+      throws IOException {
+    // build up a mock scanner stuff to fail the first time
+    Answer<ResultScanner> a = new Answer<ResultScanner>() {
+      int cnt = 0;
+
+      @Override
+      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+        // first invocation return the busted mock scanner
+        if (cnt++ < failCnt) {
+          // create mock ResultScanner that always fails.
+          Scan scan = mock(Scan.class);
+          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+          ResultScanner scanner = mock(ResultScanner.class);
+          // simulate TimeoutException / IOException
+          doThrow(new IOException("Injected exception")).when(scanner).next();
+          return scanner;
+        }
+
+        // otherwise return the real scanner.
+        return (ResultScanner) invocation.callRealMethod();
+      }
+    };
+
+    Table htable = spy(createTable(name));
+    doAnswer(a).when(htable).getScanner((Scan) anyObject());
+    return htable;
+  }
+
+  /**
+   * Create a table that throws a DoNoRetryIOException on first scanner next
+   * call
+   *
+   * @throws IOException
+   */
+  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
+      throws IOException {
+    // build up a mock scanner stuff to fail the first time
+    Answer<ResultScanner> a = new Answer<ResultScanner>() {
+      int cnt = 0;
+
+      @Override
+      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+        // first invocation return the busted mock scanner
+        if (cnt++ < failCnt) {
+          // create mock ResultScanner that always fails.
+          Scan scan = mock(Scan.class);
+          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+          ResultScanner scanner = mock(ResultScanner.class);
+
+          invocation.callRealMethod(); // simulate NotServingRegionException
+          doThrow(
+              new NotServingRegionException("Injected simulated TimeoutException"))
+              .when(scanner).next();
+          return scanner;
+        }
+
+        // otherwise return the real scanner.
+        return (ResultScanner) invocation.callRealMethod();
+      }
+    };
+
+    Table htable = spy(createTable(name));
+    doAnswer(a).when(htable).getScanner((Scan) anyObject());
+    return htable;
+  }
+
+  /**
+   * Run test assuming no errors using mapred api.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testTableRecordReader() throws IOException {
+    Table table = createTable("table1".getBytes());
+    runTestMapred(table);
+  }
+
+  /**
+   * Run test assuming Scanner IOException failure using mapred api,
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testTableRecordReaderScannerFail() throws IOException {
+    Table htable = createIOEScannerTable("table2".getBytes(), 1);
+    runTestMapred(htable);
+  }
+
+  /**
+   * Run test assuming Scanner IOException failure using mapred api,
+   *
+   * @throws IOException
+   */
+  @Test(expected = IOException.class)
+  public void testTableRecordReaderScannerFailTwice() throws IOException {
+    Table htable = createIOEScannerTable("table3".getBytes(), 2);
+    runTestMapred(htable);
+  }
+
+  /**
+   * Run test assuming NotServingRegionException using mapred api.
+   *
+   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+   */
+  @Test
+  public void testTableRecordReaderScannerTimeout() throws IOException {
+    Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
+    runTestMapred(htable);
+  }
+
+  /**
+   * Run test assuming NotServingRegionException using mapred api.
+   *
+   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+   */
+  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
+  public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
+    Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
+    runTestMapred(htable);
+  }
+
+  /**
+   * Verify the example we present in javadocs on TableInputFormatBase
+   */
+  @Test
+  public void testExtensionOfTableInputFormatBase() throws IOException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
+    final Table table = createTable(Bytes.toBytes("exampleTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleTIF.class);
+  }
+
+  @Test
+  public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
+        + "as it was given in 0.98.");
+    final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleDeprecatedTIF.class);
+  }
+
+  @Test
+  public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
+        + "using JobConfigurable.");
+    final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleJobConfigurableTIF.class);
+  }
+
+  void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
+    Configuration conf = UTIL.getConfiguration();
+    final JobConf job = new JobConf(conf);
+    job.setInputFormat(clazz);
+    job.setOutputFormat(NullOutputFormat.class);
+    job.setMapperClass(ExampleVerifier.class);
+    job.setNumReduceTasks(0);
+    LOG.debug("submitting job.");
+    final RunningJob run = JobClient.runJob(job);
+    assertTrue("job failed!", run.isSuccessful());
+    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
+    assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
+    assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
+    assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
+    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
+    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
+  }
+
+  public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
+
+    @Override
+    public void configure(JobConf conf) {
+    }
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+        OutputCollector<NullWritable,NullWritable> output,
+        Reporter reporter) throws IOException {
+      for (Cell cell : value.listCells()) {
+        reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
+            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
+            .increment(1l);
+        reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
+            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
+            .increment(1l);
+        reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
+            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
+            .increment(1l);
+      }
+    }
+
+    @Override
+    public void close() {
+    }
+
+  }
+
+  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
+
+    @Override
+    public void configure(JobConf job) {
+      try {
+        Connection connection = ConnectionFactory.createConnection(job);
+        Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
+        // mandatory
+        initializeTable(connection, exampleTable.getName());
+        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+          Bytes.toBytes("columnB") };
+        // mandatory
+        setInputColumns(inputColumns);
+        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+        // optional
+        setRowFilter(exampleFilter);
+      } catch (IOException exception) {
+        throw new RuntimeException("Failed to configure for job.", exception);
+      }
+    }
+
+  }
+
+  public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
+
+    @Override
+    public void configure(JobConf job) {
+      try {
+        initialize(job);
+      } catch (IOException exception) {
+        throw new RuntimeException("Failed to initialize.", exception);
+      }
+    }
+
+    @Override
+    protected void initialize(JobConf job) throws IOException {
+      initialize(job, "exampleJobConfigurableTable");
+    }
+  }
+
+
+  public static class ExampleTIF extends TableInputFormatBase {
+
+    @Override
+    protected void initialize(JobConf job) throws IOException {
+      initialize(job, "exampleTable");
+    }
+
+    protected void initialize(JobConf job, String table) throws IOException {
+      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+      TableName tableName = TableName.valueOf(table);
+      // mandatory
+      initializeTable(connection, tableName);
+      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+        Bytes.toBytes("columnB") };
+      // mandatory
+      setInputColumns(inputColumns);
+      Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+      // optional
+      setRowFilter(exampleFilter);
+    }
+
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
new file mode 100644
index 0000000..3f905cf
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
@@ -0,0 +1,103 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TestTableMapReduceBase;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+@SuppressWarnings("deprecation")
+public class TestTableMapReduce extends TestTableMapReduceBase {
+  private static final Log LOG =
+    LogFactory.getLog(TestTableMapReduce.class.getName());
+
+  protected Log getLog() { return LOG; }
+
+  /**
+   * Pass the given key and processed record reduce
+   */
+  static class ProcessContentsMapper extends MapReduceBase implements
+      TableMap<ImmutableBytesWritable, Put> {
+
+    /**
+     * Pass the key, and reversed value to reduce
+     */
+    public void map(ImmutableBytesWritable key, Result value,
+      OutputCollector<ImmutableBytesWritable, Put> output,
+      Reporter reporter)
+    throws IOException {
+      output.collect(key, TestTableMapReduceBase.map(key, value));
+    }
+  }
+
+  @Override
+  protected void runTestOnTable(Table table) throws IOException {
+    JobConf jobConf = null;
+    try {
+      LOG.info("Before map/reduce startup");
+      jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
+      jobConf.setJobName("process column contents");
+      jobConf.setNumReduceTasks(1);
+      TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(),
+        Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
+        ImmutableBytesWritable.class, Put.class, jobConf);
+      TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(),
+        IdentityTableReduce.class, jobConf);
+
+      LOG.info("Started " + table.getName());
+      RunningJob job = JobClient.runJob(jobConf);
+      assertTrue(job.isSuccessful());
+      LOG.info("After map/reduce completion");
+
+      // verify map-reduce results
+      verify(table.getName());
+    } finally {
+      if (jobConf != null) {
+        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+      }
+    }
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
new file mode 100644
index 0000000..ac2f20d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
@@ -0,0 +1,272 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableSet;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTableMapReduceUtil {
+
+  private static final Log LOG = LogFactory
+      .getLog(TestTableMapReduceUtil.class);
+
+  private static Table presidentsTable;
+  private static final String TABLE_NAME = "People";
+
+  private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info");
+  private static final byte[] COLUMN_QUALIFIER = Bytes.toBytes("name");
+
+  private static ImmutableSet<String> presidentsRowKeys = ImmutableSet.of(
+      "president1", "president2", "president3");
+  private static Iterator<String> presidentNames = ImmutableSet.of(
+      "John F. Kennedy", "George W. Bush", "Barack Obama").iterator();
+
+  private static ImmutableSet<String> actorsRowKeys = ImmutableSet.of("actor1",
+      "actor2");
+  private static Iterator<String> actorNames = ImmutableSet.of(
+      "Jack Nicholson", "Martin Freeman").iterator();
+
+  private static String PRESIDENT_PATTERN = "president";
+  private static String ACTOR_PATTERN = "actor";
+  private static ImmutableMap<String, ImmutableSet<String>> relation = ImmutableMap
+      .of(PRESIDENT_PATTERN, presidentsRowKeys, ACTOR_PATTERN, actorsRowKeys);
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+    presidentsTable = createAndFillTable(TableName.valueOf(TABLE_NAME));
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws IOException {
+    LOG.info("before");
+    UTIL.ensureSomeRegionServersAvailable(1);
+    LOG.info("before done");
+  }
+
+  public static Table createAndFillTable(TableName tableName) throws IOException {
+    Table table = UTIL.createTable(tableName, COLUMN_FAMILY);
+    createPutCommand(table);
+    return table;
+  }
+
+  private static void createPutCommand(Table table) throws IOException {
+    for (String president : presidentsRowKeys) {
+      if (presidentNames.hasNext()) {
+        Put p = new Put(Bytes.toBytes(president));
+        p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(presidentNames.next()));
+        table.put(p);
+      }
+    }
+
+    for (String actor : actorsRowKeys) {
+      if (actorNames.hasNext()) {
+        Put p = new Put(Bytes.toBytes(actor));
+        p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(actorNames.next()));
+        table.put(p);
+      }
+    }
+  }
+
+  /**
+   * Check what the given number of reduce tasks for the given job configuration
+   * does not exceed the number of regions for the given table.
+   */
+  @Test
+  public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
+      throws IOException {
+    Assert.assertNotNull(presidentsTable);
+    Configuration cfg = UTIL.getConfiguration();
+    JobConf jobConf = new JobConf(cfg);
+    TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
+    TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
+    TableMapReduceUtil.setScannerCaching(jobConf, 100);
+    assertEquals(1, jobConf.getNumReduceTasks());
+    assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));
+
+    jobConf.setNumReduceTasks(10);
+    TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
+    TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
+    assertEquals(1, jobConf.getNumReduceTasks());
+  }
+
+  @Test
+  public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
+      throws IOException {
+    Configuration cfg = UTIL.getConfiguration();
+    JobConf jobConf = new JobConf(cfg);
+    TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
+    TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
+    assertEquals(1, jobConf.getNumMapTasks());
+
+    jobConf.setNumMapTasks(10);
+    TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
+    TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
+    assertEquals(1, jobConf.getNumMapTasks());
+  }
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void shoudBeValidMapReduceEvaluation() throws Exception {
+    Configuration cfg = UTIL.getConfiguration();
+    JobConf jobConf = new JobConf(cfg);
+    try {
+      jobConf.setJobName("process row task");
+      jobConf.setNumReduceTasks(1);
+      TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
+          ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
+          jobConf);
+      TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
+          ClassificatorRowReduce.class, jobConf);
+      RunningJob job = JobClient.runJob(jobConf);
+      assertTrue(job.isSuccessful());
+    } finally {
+      if (jobConf != null)
+        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+    }
+  }
+
+  @Test
+  @SuppressWarnings("deprecation")
+  public void shoudBeValidMapReduceWithPartitionerEvaluation()
+      throws IOException {
+    Configuration cfg = UTIL.getConfiguration();
+    JobConf jobConf = new JobConf(cfg);
+    try {
+      jobConf.setJobName("process row task");
+      jobConf.setNumReduceTasks(2);
+      TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
+          ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
+          jobConf);
+
+      TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
+          ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
+      RunningJob job = JobClient.runJob(jobConf);
+      assertTrue(job.isSuccessful());
+    } finally {
+      if (jobConf != null)
+        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+    }
+  }
+
+  @SuppressWarnings("deprecation")
+  static class ClassificatorRowReduce extends MapReduceBase implements
+      TableReduce<ImmutableBytesWritable, Put> {
+
+    @Override
+    public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
+        OutputCollector<ImmutableBytesWritable, Put> output, Reporter reporter)
+        throws IOException {
+      String strKey = Bytes.toString(key.get());
+      List<Put> result = new ArrayList<>();
+      while (values.hasNext())
+        result.add(values.next());
+
+      if (relation.keySet().contains(strKey)) {
+        Set<String> set = relation.get(strKey);
+        if (set != null) {
+          assertEquals(set.size(), result.size());
+        } else {
+          throwAccertionError("Test infrastructure error: set is null");
+        }
+      } else {
+        throwAccertionError("Test infrastructure error: key not found in map");
+      }
+    }
+
+    private void throwAccertionError(String errorMessage) throws AssertionError {
+      throw new AssertionError(errorMessage);
+    }
+  }
+
+  @SuppressWarnings("deprecation")
+  static class ClassificatorMapper extends MapReduceBase implements
+      TableMap<ImmutableBytesWritable, Put> {
+
+    @Override
+    public void map(ImmutableBytesWritable row, Result result,
+        OutputCollector<ImmutableBytesWritable, Put> outCollector,
+        Reporter reporter) throws IOException {
+      String rowKey = Bytes.toString(result.getRow());
+      final ImmutableBytesWritable pKey = new ImmutableBytesWritable(
+          Bytes.toBytes(PRESIDENT_PATTERN));
+      final ImmutableBytesWritable aKey = new ImmutableBytesWritable(
+          Bytes.toBytes(ACTOR_PATTERN));
+      ImmutableBytesWritable outKey = null;
+
+      if (rowKey.startsWith(PRESIDENT_PATTERN)) {
+        outKey = pKey;
+      } else if (rowKey.startsWith(ACTOR_PATTERN)) {
+        outKey = aKey;
+      } else {
+        throw new AssertionError("unexpected rowKey");
+      }
+
+      String name = Bytes.toString(result.getValue(COLUMN_FAMILY,
+          COLUMN_QUALIFIER));
+      outCollector.collect(outKey,
+              new Put(Bytes.toBytes("rowKey2"))
+              .addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(name)));
+    }
+  }
+}

[25/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
new file mode 100644
index 0000000..7b6e684
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
@@ -0,0 +1,571 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestImportTsv implements Configurable {
+
+  private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
+  protected static final String NAME = TestImportTsv.class.getSimpleName();
+  protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+  // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.
+  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+  /**
+   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+   */
+  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+  private final String FAMILY = "FAM";
+  private TableName tn;
+  private Map<String, String> args;
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
+  public Configuration getConf() {
+    return util.getConfiguration();
+  }
+
+  public void setConf(Configuration conf) {
+    throw new IllegalArgumentException("setConf not supported");
+  }
+
+  @BeforeClass
+  public static void provisionCluster() throws Exception {
+    util.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void releaseCluster() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setup() throws Exception {
+    tn = TableName.valueOf("test-" + UUID.randomUUID());
+    args = new HashMap<>();
+    // Prepare the arguments required for the test.
+    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");
+    args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");
+  }
+
+  @Test
+  public void testMROnTable() throws Exception {
+    util.createTable(tn, FAMILY);
+    doMROnTableTest(null, 1);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testMROnTableWithTimestamp() throws Exception {
+    util.createTable(tn, FAMILY);
+    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+    String data = "KEY,1234,VALUE1,VALUE2\n";
+
+    doMROnTableTest(data, 1);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testMROnTableWithCustomMapper()
+  throws Exception {
+    util.createTable(tn, FAMILY);
+    args.put(ImportTsv.MAPPER_CONF_KEY,
+        "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");
+
+    doMROnTableTest(null, 3);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testBulkOutputWithoutAnExistingTable() throws Exception {
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+
+    doMROnTableTest(null, 3);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testBulkOutputWithAnExistingTable() throws Exception {
+    util.createTable(tn, FAMILY);
+
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+
+    doMROnTableTest(null, 3);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {
+    util.createTable(tn, FAMILY);
+
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+    args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");
+    doMROnTableTest(null, 3);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+    String INPUT_FILE = "InputFile1.csv";
+    // Prepare the arguments required for the test.
+    String[] args =
+        new String[] {
+            "-D" + ImportTsv.MAPPER_CONF_KEY
+                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+            "-D" + ImportTsv.COLUMNS_CONF_KEY
+                + "=HBASE_ROW_KEY,FAM:A,FAM:B",
+            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
+            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+                tn.getNameAsString(),
+            INPUT_FILE
+            };
+    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+        new Configuration(util.getConfiguration()),
+        new ImportTsv() {
+          @Override
+          public int run(String[] args) throws Exception {
+            Job job = createSubmittableJob(getConf(), args);
+            assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));
+            assertTrue(job.getReducerClass().equals(TextSortReducer.class));
+            assertTrue(job.getMapOutputValueClass().equals(Text.class));
+            return 0;
+          }
+        }, args));
+    // Delete table created by createSubmittableJob.
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+    args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+    String data = "KEY\u001bVALUE4\u001bVALUE8\n";
+    doMROnTableTest(data, 4);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
+    String[] args = new String[] { tn.getNameAsString(), "/inputFile" };
+
+    Configuration conf = new Configuration(util.getConfiguration());
+    conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
+    conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
+    conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
+    exception.expect(TableNotFoundException.class);
+    assertEquals("running test job configuration failed.", 0,
+        ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {
+              @Override public int run(String[] args) throws Exception {
+                createSubmittableJob(getConf(), args);
+                return 0;
+              }
+            }, args));
+  }
+
+  @Test
+  public void testMRWithoutAnExistingTable() throws Exception {
+    String[] args =
+        new String[] { tn.getNameAsString(), "/inputFile" };
+
+    exception.expect(TableNotFoundException.class);
+    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+        new Configuration(util.getConfiguration()),
+        new ImportTsv() {
+          @Override
+          public int run(String[] args) throws Exception {
+            createSubmittableJob(getConf(), args);
+            return 0;
+          }
+        }, args));
+  }
+
+  @Test
+  public void testJobConfigurationsWithDryMode() throws Exception {
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+    String INPUT_FILE = "InputFile1.csv";
+    // Prepare the arguments required for the test.
+    String[] argsArray = new String[] {
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
+        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+        "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
+        tn.getNameAsString(),
+        INPUT_FILE };
+    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+        new Configuration(util.getConfiguration()),
+        new ImportTsv() {
+          @Override
+          public int run(String[] args) throws Exception {
+            Job job = createSubmittableJob(getConf(), args);
+            assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
+            return 0;
+          }
+        }, argsArray));
+    // Delete table created by createSubmittableJob.
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {
+    util.createTable(tn, FAMILY);
+    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+    doMROnTableTest(null, 1);
+    // Dry mode should not delete an existing table. If it's not present,
+    // this will throw TableNotFoundException.
+    util.deleteTable(tn);
+  }
+
+  /**
+   * If table is not present in non-bulk mode, dry run should fail just like
+   * normal mode.
+   */
+  @Test
+  public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {
+    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+    exception.expect(TableNotFoundException.class);
+    doMROnTableTest(null, 1);
+  }
+
+  @Test public void testDryModeWithBulkOutputAndTableExists() throws Exception {
+    util.createTable(tn, FAMILY);
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+    doMROnTableTest(null, 1);
+    // Dry mode should not delete an existing table. If it's not present,
+    // this will throw TableNotFoundException.
+    util.deleteTable(tn);
+  }
+
+  /**
+   * If table is not present in bulk mode and create.table is not set to yes,
+   * import should fail with TableNotFoundException.
+   */
+  @Test
+  public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws
+      Exception {
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+    args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
+    exception.expect(TableNotFoundException.class);
+    doMROnTableTest(null, 1);
+  }
+
+  @Test
+  public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {
+    // Prepare the arguments required for the test.
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+    args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");
+    doMROnTableTest(null, 1);
+    // Verify temporary table was deleted.
+    exception.expect(TableNotFoundException.class);
+    util.deleteTable(tn);
+  }
+
+  /**
+   * If there are invalid data rows as inputs, then only those rows should be ignored.
+   */
+  @Test
+  public void testTsvImporterTextMapperWithInvalidData() throws Exception {
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+    // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS
+    String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";
+    doMROnTableTest(util, tn, FAMILY, data, args, 1, 4);
+    util.deleteTable(tn);
+  }
+
+  @Test
+  public void testSkipEmptyColumns() throws Exception {
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+    args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");
+    // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4
+    String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";
+    doMROnTableTest(util, tn, FAMILY, data, args, 1, 3);
+    util.deleteTable(tn);
+  }
+
+  private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {
+    return doMROnTableTest(util, tn, FAMILY, data, args, valueMultiplier,-1);
+  }
+
+  protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
+      String family, String data, Map<String, String> args) throws Exception {
+    return doMROnTableTest(util, table, family, data, args, 1,-1);
+  }
+
+  /**
+   * Run an ImportTsv job and perform basic validation on the results.
+   * Returns the ImportTsv <code>Tool</code> instance so that other tests can
+   * inspect it for further validation as necessary. This method is static to
+   * insure non-reliance on instance's util/conf facilities.
+   * @param args Any arguments to pass BEFORE inputFile path is appended.
+   * @return The Tool instance used to run the test.
+   */
+  protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
+      String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
+  throws Exception {
+    Configuration conf = new Configuration(util.getConfiguration());
+
+    // populate input file
+    FileSystem fs = FileSystem.get(conf);
+    Path inputPath = fs.makeQualified(
+            new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+    FSDataOutputStream op = fs.create(inputPath, true);
+    if (data == null) {
+      data = "KEY\u001bVALUE1\u001bVALUE2\n";
+    }
+    op.write(Bytes.toBytes(data));
+    op.close();
+    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+      LOG.debug("Forcing combiner.");
+      conf.setInt("mapreduce.map.combine.minspills", 1);
+    }
+
+    // Build args array.
+    String[] argsArray = new String[args.size() + 2];
+    Iterator it = args.entrySet().iterator();
+    int i = 0;
+    while (it.hasNext()) {
+      Map.Entry pair = (Map.Entry) it.next();
+      argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
+      i++;
+    }
+    argsArray[i] = table.getNameAsString();
+    argsArray[i + 1] = inputPath.toString();
+
+    // run the import
+    Tool tool = new ImportTsv();
+    LOG.debug("Running ImportTsv with arguments: " + argsArray);
+    assertEquals(0, ToolRunner.run(conf, tool, argsArray));
+
+    // Perform basic validation. If the input args did not include
+    // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
+    // Otherwise, validate presence of hfiles.
+    boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
+        "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
+    if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
+      if (isDryRun) {
+        assertFalse(String.format("Dry run mode, %s should not have been created.",
+                 ImportTsv.BULK_OUTPUT_CONF_KEY),
+            fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
+      } else {
+        validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
+      }
+    } else {
+      validateTable(conf, table, family, valueMultiplier, isDryRun);
+    }
+
+    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+      LOG.debug("Deleting test subdirectory");
+      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+    }
+    return tool;
+  }
+
+  /**
+   * Confirm ImportTsv via data in online table.
+   */
+  private static void validateTable(Configuration conf, TableName tableName,
+      String family, int valueMultiplier, boolean isDryRun) throws IOException {
+
+    LOG.debug("Validating table.");
+    Connection connection = ConnectionFactory.createConnection(conf);
+    Table table = connection.getTable(tableName);
+    boolean verified = false;
+    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        Scan scan = new Scan();
+        // Scan entire family.
+        scan.addFamily(Bytes.toBytes(family));
+        ResultScanner resScanner = table.getScanner(scan);
+        int numRows = 0;
+        for (Result res : resScanner) {
+          numRows++;
+          assertEquals(2, res.size());
+          List<Cell> kvs = res.listCells();
+          assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+          assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+          assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+          assertTrue(CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+          // Only one result set is expected, so let it loop.
+        }
+        if (isDryRun) {
+          assertEquals(0, numRows);
+        } else {
+          assertEquals(1, numRows);
+        }
+        verified = true;
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty. Presume its because updates came in
+        // after the scanner had been opened. Wait a while and retry.
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    table.close();
+    connection.close();
+    assertTrue(verified);
+  }
+
+  /**
+   * Confirm ImportTsv via HFiles on fs.
+   */
+  private static void validateHFiles(FileSystem fs, String outputPath, String family,
+      int expectedKVCount) throws IOException {
+    // validate number and content of output columns
+    LOG.debug("Validating HFiles.");
+    Set<String> configFamilies = new HashSet<>();
+    configFamilies.add(family);
+    Set<String> foundFamilies = new HashSet<>();
+    int actualKVCount = 0;
+    for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
+      String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
+      String cf = elements[elements.length - 1];
+      foundFamilies.add(cf);
+      assertTrue(
+        String.format(
+          "HFile output contains a column family (%s) not present in input families (%s)",
+          cf, configFamilies),
+          configFamilies.contains(cf));
+      for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
+        assertTrue(
+          String.format("HFile %s appears to contain no data.", hfile.getPath()),
+          hfile.getLen() > 0);
+        // count the number of KVs from all the hfiles
+        if (expectedKVCount > -1) {
+          actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
+        }
+      }
+    }
+    assertTrue(String.format("HFile output does not contain the input family '%s'.", family),
+        foundFamilies.contains(family));
+    if (expectedKVCount > -1) {
+      assertTrue(String.format(
+        "KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
+        expectedKVCount), actualKVCount == expectedKVCount);
+    }
+  }
+
+  /**
+   * Method returns the total KVs in given hfile
+   * @param fs File System
+   * @param p HFile path
+   * @return KV count in the given hfile
+   * @throws IOException
+   */
+  private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
+    Configuration conf = util.getConfiguration();
+    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
+    reader.loadFileInfo();
+    HFileScanner scanner = reader.getScanner(false, false);
+    scanner.seekTo();
+    int count = 0;
+    do {
+      count++;
+    } while (scanner.next());
+    reader.close();
+    return count;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
new file mode 100644
index 0000000..3c38102
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterables;
+
+/**
+ * Tests for {@link TsvParser}.
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestImportTsvParser {
+
+  private void assertBytesEquals(byte[] a, byte[] b) {
+    assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
+  }
+
+  private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
+    ArrayList<String> parsedCols = new ArrayList<>();
+    for (int i = 0; i < parsed.getColumnCount(); i++) {
+      parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i),
+          parsed.getColumnLength(i)));
+    }
+    if (!Iterables.elementsEqual(parsedCols, expected)) {
+      fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:"
+          + Joiner.on(",").join(parsedCols));
+    }
+  }
+
+  @Test
+  public void testTsvParserSpecParsing() {
+    TsvParser parser;
+
+    parser = new TsvParser("HBASE_ROW_KEY", "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertFalse(parser.hasTimestamp());
+
+    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertFalse(parser.hasTimestamp());
+
+    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
+    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertFalse(parser.hasTimestamp());
+
+    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertTrue(parser.hasTimestamp());
+    assertEquals(2, parser.getTimestampKeyColumnIndex());
+
+    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY",
+        "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertTrue(parser.hasTimestamp());
+    assertEquals(2, parser.getTimestampKeyColumnIndex());
+    assertEquals(4, parser.getAttributesKeyColumnIndex());
+
+    parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY",
+        "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+    assertEquals(4, parser.getRowKeyColumnIndex());
+    assertTrue(parser.hasTimestamp());
+    assertEquals(2, parser.getTimestampKeyColumnIndex());
+    assertEquals(0, parser.getAttributesKeyColumnIndex());
+  }
+
+  @Test
+  public void testTsvParser() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
+    assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
+    assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
+    assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
+    assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
+    assertNull(parser.getFamily(2));
+    assertNull(parser.getQualifier(2));
+    assertEquals(2, parser.getRowKeyColumnIndex());
+
+    assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex());
+
+    byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
+    ParsedLine parsed = parser.parse(line, line.length);
+    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+  }
+
+  @Test
+  public void testTsvParserWithTimestamp() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
+    assertNull(parser.getFamily(0));
+    assertNull(parser.getQualifier(0));
+    assertNull(parser.getFamily(1));
+    assertNull(parser.getQualifier(1));
+    assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
+    assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertEquals(1, parser.getTimestampKeyColumnIndex());
+
+    byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
+    ParsedLine parsed = parser.parse(line, line.length);
+    assertEquals(1234l, parsed.getTimestamp(-1));
+    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+  }
+
+  /**
+   * Test cases that throw BadTsvLineException
+   */
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+    byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
+    parser.parse(line, line.length);
+  }
+
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+    byte[] line = Bytes.toBytes("");
+    parser.parse(line, line.length);
+  }
+
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+    byte[] line = Bytes.toBytes("key_only");
+    parser.parse(line, line.length);
+  }
+
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
+    byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
+    parser.parse(line, line.length);
+  }
+
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
+    assertEquals(1, parser.getTimestampKeyColumnIndex());
+    byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
+    ParsedLine parsed = parser.parse(line, line.length);
+    assertEquals(-1, parsed.getTimestamp(-1));
+    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+  }
+
+  @Test(expected = BadTsvLineException.class)
+  public void testTsvParserNoTimestampValue() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
+    assertEquals(2, parser.getTimestampKeyColumnIndex());
+    byte[] line = Bytes.toBytes("rowkey\tval_a");
+    parser.parse(line, line.length);
+  }
+
+  @Test
+  public void testTsvParserParseRowKey() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234");
+    Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length);
+    assertEquals(0, rowKeyOffsets.getFirst().intValue());
+    assertEquals(6, rowKeyOffsets.getSecond().intValue());
+    try {
+      line = Bytes.toBytes("\t\tval_a\t1234");
+      parser.parseRowKey(line, line.length);
+      fail("Should get BadTsvLineException on empty rowkey.");
+    } catch (BadTsvLineException b) {
+
+    }
+    parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
+    assertEquals(1, parser.getRowKeyColumnIndex());
+    line = Bytes.toBytes("val_a\trowkey\t1234");
+    rowKeyOffsets = parser.parseRowKey(line, line.length);
+    assertEquals(6, rowKeyOffsets.getFirst().intValue());
+    assertEquals(6, rowKeyOffsets.getSecond().intValue());
+    try {
+      line = Bytes.toBytes("val_a");
+      rowKeyOffsets = parser.parseRowKey(line, line.length);
+      fail("Should get BadTsvLineException when number of columns less than rowkey position.");
+    } catch (BadTsvLineException b) {
+
+    }
+    parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
+    assertEquals(2, parser.getRowKeyColumnIndex());
+    line = Bytes.toBytes("val_a\t1234\trowkey");
+    rowKeyOffsets = parser.parseRowKey(line, line.length);
+    assertEquals(11, rowKeyOffsets.getFirst().intValue());
+    assertEquals(6, rowKeyOffsets.getSecond().intValue());
+  }
+
+  @Test
+  public void testTsvParseAttributesKey() throws BadTsvLineException {
+    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t");
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value");
+    ParsedLine parse = parser.parse(line, line.length);
+    assertEquals(18, parse.getAttributeKeyOffset());
+    assertEquals(3, parser.getAttributesKeyColumnIndex());
+    String attributes[] = parse.getIndividualAttributes();
+    assertEquals(attributes[0], "key=>value");
+    try {
+      line = Bytes.toBytes("rowkey\tval_a\t1234");
+      parser.parse(line, line.length);
+      fail("Should get BadTsvLineException on empty rowkey.");
+    } catch (BadTsvLineException b) {
+
+    }
+    parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
+    assertEquals(2, parser.getRowKeyColumnIndex());
+    line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234");
+    parse = parser.parse(line, line.length);
+    assertEquals(0, parse.getAttributeKeyOffset());
+    assertEquals(0, parser.getAttributesKeyColumnIndex());
+    attributes = parse.getIndividualAttributes();
+    assertEquals(attributes[0], "key=>value");
+    try {
+      line = Bytes.toBytes("val_a");
+      ParsedLine parse2 = parser.parse(line, line.length);
+      fail("Should get BadTsvLineException when number of columns less than rowkey position.");
+    } catch (BadTsvLineException b) {
+
+    }
+    parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
+    assertEquals(3, parser.getRowKeyColumnIndex());
+    line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey");
+    parse = parser.parse(line, line.length);
+    assertEquals(1, parser.getAttributesKeyColumnIndex());
+    assertEquals(6, parse.getAttributeKeyOffset());
+    String[] attr = parse.getIndividualAttributes();
+    int i = 0;
+    for(String str :  attr) {
+      assertEquals(("key"+i+"=>"+"value"+i), str );
+      i++;
+    }
+  }
+
+  @Test
+  public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException {
+    TsvParser parser = new TsvParser(
+        "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t");
+    assertEquals(0, parser.getRowKeyColumnIndex());
+    assertEquals(4, parser.getCellVisibilityColumnIndex());
+    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET");
+    ParsedLine parse = parser.parse(line, line.length);
+    assertEquals(18, parse.getAttributeKeyOffset());
+    assertEquals(3, parser.getAttributesKeyColumnIndex());
+    String attributes[] = parse.getIndividualAttributes();
+    assertEquals(attributes[0], "key=>value");
+    assertEquals(29, parse.getCellVisibilityColumnOffset());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
new file mode 100644
index 0000000..8187b73
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+import java.text.MessageFormat;
+import java.util.Properties;
+import java.util.jar.JarInputStream;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * This file was forked from hadoop/common/branches/branch-2@1350012.
+ */
+@Category(SmallTests.class)
+public class TestJarFinder {
+
+  @Test
+  public void testJar() throws Exception {
+
+    //picking a class that is for sure in a JAR in the classpath
+    String jar = JarFinder.getJar(LogFactory.class);
+    Assert.assertTrue(new File(jar).exists());
+  }
+
+  private static void delete(File file) throws IOException {
+    if (file.getAbsolutePath().length() < 5) {
+      throw new IllegalArgumentException(
+        MessageFormat.format("Path [{0}] is too short, not deleting",
+                             file.getAbsolutePath()));
+    }
+    if (file.exists()) {
+      if (file.isDirectory()) {
+        File[] children = file.listFiles();
+        if (children != null) {
+          for (File child : children) {
+            delete(child);
+          }
+        }
+      }
+      if (!file.delete()) {
+        throw new RuntimeException(
+          MessageFormat.format("Could not delete path [{0}]",
+                               file.getAbsolutePath()));
+      }
+    }
+  }
+
+  @Test
+  public void testExpandedClasspath() throws Exception {
+    //picking a class that is for sure in a directory in the classpath
+    //in this case the JAR is created on the fly
+    String jar = JarFinder.getJar(TestJarFinder.class);
+    Assert.assertTrue(new File(jar).exists());
+  }
+
+  @Test
+  public void testExistingManifest() throws Exception {
+    File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
+                        TestJarFinder.class.getName() + "-testExistingManifest");
+    delete(dir);
+    dir.mkdirs();
+
+    File metaInfDir = new File(dir, "META-INF");
+    metaInfDir.mkdirs();
+    File manifestFile = new File(metaInfDir, "MANIFEST.MF");
+    Manifest manifest = new Manifest();
+    OutputStream os = new FileOutputStream(manifestFile);
+    manifest.write(os);
+    os.close();
+
+    File propsFile = new File(dir, "props.properties");
+    Writer writer = new FileWriter(propsFile);
+    new Properties().store(writer, "");
+    writer.close();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    JarOutputStream zos = new JarOutputStream(baos);
+    JarFinder.jarDir(dir, "", zos);
+    JarInputStream jis =
+      new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
+    Assert.assertNotNull(jis.getManifest());
+    jis.close();
+  }
+
+  @Test
+  public void testNoManifest() throws Exception {
+    File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
+                        TestJarFinder.class.getName() + "-testNoManifest");
+    delete(dir);
+    dir.mkdirs();
+    File propsFile = new File(dir, "props.properties");
+    Writer writer = new FileWriter(propsFile);
+    new Properties().store(writer, "");
+    writer.close();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    JarOutputStream zos = new JarOutputStream(baos);
+    JarFinder.jarDir(dir, "", zos);
+    JarInputStream jis =
+      new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
+    Assert.assertNotNull(jis.getManifest());
+    jis.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
new file mode 100644
index 0000000..529a448
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -0,0 +1,669 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableExistsException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ClientServiceCallable;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
+
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.RpcController;
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
+
+/**
+ * Test cases for the atomic load error handling of the bulk load functionality.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestLoadIncrementalHFilesSplitRecovery {
+  private static final Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
+
+  static HBaseTestingUtility util;
+  //used by secure subclass
+  static boolean useSecure = false;
+
+  final static int NUM_CFS = 10;
+  final static byte[] QUAL = Bytes.toBytes("qual");
+  final static int ROWCOUNT = 100;
+
+  private final static byte[][] families = new byte[NUM_CFS][];
+
+  @Rule
+  public TestName name = new TestName();
+
+  static {
+    for (int i = 0; i < NUM_CFS; i++) {
+      families[i] = Bytes.toBytes(family(i));
+    }
+  }
+
+  static byte[] rowkey(int i) {
+    return Bytes.toBytes(String.format("row_%08d", i));
+  }
+
+  static String family(int i) {
+    return String.format("family_%04d", i);
+  }
+
+  static byte[] value(int i) {
+    return Bytes.toBytes(String.format("%010d", i));
+  }
+
+  public static void buildHFiles(FileSystem fs, Path dir, int value)
+      throws IOException {
+    byte[] val = value(value);
+    for (int i = 0; i < NUM_CFS; i++) {
+      Path testIn = new Path(dir, family(i));
+
+      TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
+          Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
+    }
+  }
+
+  /**
+   * Creates a table with given table name and specified number of column
+   * families if the table does not already exist.
+   */
+  private void setupTable(final Connection connection, TableName table, int cfs)
+  throws IOException {
+    try {
+      LOG.info("Creating table " + table);
+      HTableDescriptor htd = new HTableDescriptor(table);
+      for (int i = 0; i < cfs; i++) {
+        htd.addFamily(new HColumnDescriptor(family(i)));
+      }
+      try (Admin admin = connection.getAdmin()) {
+        admin.createTable(htd);
+      }
+    } catch (TableExistsException tee) {
+      LOG.info("Table " + table + " already exists");
+    }
+  }
+
+  /**
+   * Creates a table with given table name,specified number of column families<br>
+   * and splitkeys if the table does not already exist.
+   * @param table
+   * @param cfs
+   * @param SPLIT_KEYS
+   */
+  private void setupTableWithSplitkeys(TableName table, int cfs, byte[][] SPLIT_KEYS)
+      throws IOException {
+    try {
+      LOG.info("Creating table " + table);
+      HTableDescriptor htd = new HTableDescriptor(table);
+      for (int i = 0; i < cfs; i++) {
+        htd.addFamily(new HColumnDescriptor(family(i)));
+      }
+
+      util.createTable(htd, SPLIT_KEYS);
+    } catch (TableExistsException tee) {
+      LOG.info("Table " + table + " already exists");
+    }
+  }
+
+  private Path buildBulkFiles(TableName table, int value) throws Exception {
+    Path dir = util.getDataTestDirOnTestFS(table.getNameAsString());
+    Path bulk1 = new Path(dir, table.getNameAsString() + value);
+    FileSystem fs = util.getTestFileSystem();
+    buildHFiles(fs, bulk1, value);
+    return bulk1;
+  }
+
+  /**
+   * Populate table with known values.
+   */
+  private void populateTable(final Connection connection, TableName table, int value)
+  throws Exception {
+    // create HFiles for different column families
+    LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
+    Path bulk1 = buildBulkFiles(table, value);
+    try (Table t = connection.getTable(table);
+        RegionLocator locator = connection.getRegionLocator(table);
+        Admin admin = connection.getAdmin()) {
+        lih.doBulkLoad(bulk1, admin, t, locator);
+    }
+  }
+
+  /**
+   * Split the known table in half.  (this is hard coded for this test suite)
+   */
+  private void forceSplit(TableName table) {
+    try {
+      // need to call regions server to by synchronous but isn't visible.
+      HRegionServer hrs = util.getRSForFirstRegionInTable(table);
+
+      for (HRegionInfo hri :
+          ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
+        if (hri.getTable().equals(table)) {
+          util.getAdmin().splitRegionAsync(hri.getRegionName(), rowkey(ROWCOUNT / 2));
+          //ProtobufUtil.split(null, hrs.getRSRpcServices(), hri, rowkey(ROWCOUNT / 2));
+        }
+      }
+
+      // verify that split completed.
+      int regions;
+      do {
+        regions = 0;
+        for (HRegionInfo hri :
+            ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
+          if (hri.getTable().equals(table)) {
+            regions++;
+          }
+        }
+        if (regions != 2) {
+          LOG.info("Taking some time to complete split...");
+          Thread.sleep(250);
+        }
+      } while (regions != 2);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+  }
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    util = new HBaseTestingUtility();
+    util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
+    util.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void teardownCluster() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  /**
+   * Checks that all columns have the expected value and that there is the
+   * expected number of rows.
+   * @throws IOException
+   */
+  void assertExpectedTable(TableName table, int count, int value) throws IOException {
+    HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
+    assertEquals(htds.length, 1);
+    Table t = null;
+    try {
+      t = util.getConnection().getTable(table);
+      Scan s = new Scan();
+      ResultScanner sr = t.getScanner(s);
+      int i = 0;
+      for (Result r : sr) {
+        i++;
+        for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
+          for (byte[] val : nm.values()) {
+            assertTrue(Bytes.equals(val, value(value)));
+          }
+        }
+      }
+      assertEquals(count, i);
+    } catch (IOException e) {
+      fail("Failed due to exception");
+    } finally {
+      if (t != null) t.close();
+    }
+  }
+
+  /**
+   * Test that shows that exception thrown from the RS side will result in an
+   * exception on the LIHFile client.
+   */
+  @Test(expected=IOException.class, timeout=120000)
+  public void testBulkLoadPhaseFailure() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    final AtomicInteger attmptedCalls = new AtomicInteger();
+    final AtomicInteger failedCalls = new AtomicInteger();
+    util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
+    try (Connection connection = ConnectionFactory.createConnection(util
+        .getConfiguration())) {
+      setupTable(connection, table, 10);
+      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+          util.getConfiguration()) {
+        @Override
+        protected List<LoadQueueItem> tryAtomicRegionLoad(
+            ClientServiceCallable<byte[]> serviceCallable, TableName tableName, final byte[] first,
+            Collection<LoadQueueItem> lqis) throws IOException {
+          int i = attmptedCalls.incrementAndGet();
+          if (i == 1) {
+            Connection errConn;
+            try {
+              errConn = getMockedConnection(util.getConfiguration());
+              serviceCallable = this.buildClientServiceCallable(errConn, table, first, lqis, true);
+            } catch (Exception e) {
+              LOG.fatal("mocking cruft, should never happen", e);
+              throw new RuntimeException("mocking cruft, should never happen");
+            }
+            failedCalls.incrementAndGet();
+            return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
+          }
+
+          return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
+        }
+      };
+      try {
+        // create HFiles for different column families
+        Path dir = buildBulkFiles(table, 1);
+        try (Table t = connection.getTable(table);
+            RegionLocator locator = connection.getRegionLocator(table);
+            Admin admin = connection.getAdmin()) {
+          lih.doBulkLoad(dir, admin, t, locator);
+        }
+      } finally {
+        util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
+            HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
+      }
+      fail("doBulkLoad should have thrown an exception");
+    }
+  }
+
+  /**
+   * Test that shows that exception thrown from the RS side will result in the
+   * expected number of retries set by ${@link HConstants#HBASE_CLIENT_RETRIES_NUMBER}
+   * when ${@link LoadIncrementalHFiles#RETRY_ON_IO_EXCEPTION} is set
+   */
+  @Test
+  public void testRetryOnIOException() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    final AtomicInteger calls = new AtomicInteger(1);
+    final Connection conn = ConnectionFactory.createConnection(util
+        .getConfiguration());
+    util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
+    util.getConfiguration().setBoolean(
+        LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, true);
+    final LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+        util.getConfiguration()) {
+      @Override
+      protected List<LoadQueueItem> tryAtomicRegionLoad(
+          ClientServiceCallable<byte[]> serverCallable, TableName tableName,
+          final byte[] first, Collection<LoadQueueItem> lqis)
+          throws IOException {
+        if (calls.getAndIncrement() < util.getConfiguration().getInt(
+            HConstants.HBASE_CLIENT_RETRIES_NUMBER,
+            HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER) - 1) {
+          ClientServiceCallable<byte[]> newServerCallable = new ClientServiceCallable<byte[]>(
+              conn, tableName, first, new RpcControllerFactory(
+                  util.getConfiguration()).newController(), HConstants.PRIORITY_UNSET) {
+            @Override
+            public byte[] rpcCall() throws Exception {
+              throw new IOException("Error calling something on RegionServer");
+            }
+          };
+          return super.tryAtomicRegionLoad(newServerCallable, tableName, first, lqis);
+        } else {
+          return super.tryAtomicRegionLoad(serverCallable, tableName, first, lqis);
+        }
+      }
+    };
+    setupTable(conn, table, 10);
+    Path dir = buildBulkFiles(table, 1);
+    lih.doBulkLoad(dir, conn.getAdmin(), conn.getTable(table),
+        conn.getRegionLocator(table));
+    util.getConfiguration().setBoolean(
+        LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, false);
+
+  }
+
+  @SuppressWarnings("deprecation")
+  private ClusterConnection getMockedConnection(final Configuration conf)
+  throws IOException, org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException {
+    ClusterConnection c = Mockito.mock(ClusterConnection.class);
+    Mockito.when(c.getConfiguration()).thenReturn(conf);
+    Mockito.doNothing().when(c).close();
+    // Make it so we return a particular location when asked.
+    final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
+        ServerName.valueOf("example.org", 1234, 0));
+    Mockito.when(c.getRegionLocation((TableName) Mockito.any(),
+        (byte[]) Mockito.any(), Mockito.anyBoolean())).
+      thenReturn(loc);
+    Mockito.when(c.locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any())).
+      thenReturn(loc);
+    ClientProtos.ClientService.BlockingInterface hri =
+      Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
+    Mockito.when(hri.bulkLoadHFile((RpcController)Mockito.any(), (BulkLoadHFileRequest)Mockito.any())).
+      thenThrow(new ServiceException(new IOException("injecting bulk load error")));
+    Mockito.when(c.getClient(Mockito.any(ServerName.class))).
+      thenReturn(hri);
+    return c;
+  }
+
+  /**
+   * This test exercises the path where there is a split after initial
+   * validation but before the atomic bulk load call. We cannot use presplitting
+   * to test this path, so we actually inject a split just before the atomic
+   * region load.
+   */
+  @Test (timeout=120000)
+  public void testSplitWhileBulkLoadPhase() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+      setupTable(connection, table, 10);
+      populateTable(connection, table,1);
+      assertExpectedTable(table, ROWCOUNT, 1);
+
+      // Now let's cause trouble.  This will occur after checks and cause bulk
+      // files to fail when attempt to atomically import.  This is recoverable.
+      final AtomicInteger attemptedCalls = new AtomicInteger();
+      LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(util.getConfiguration()) {
+        @Override
+        protected void bulkLoadPhase(final Table htable, final Connection conn,
+            ExecutorService pool, Deque<LoadQueueItem> queue,
+            final Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFile,
+            Map<LoadQueueItem, ByteBuffer> item2RegionMap)
+                throws IOException {
+          int i = attemptedCalls.incrementAndGet();
+          if (i == 1) {
+            // On first attempt force a split.
+            forceSplit(table);
+          }
+          super.bulkLoadPhase(htable, conn, pool, queue, regionGroups, copyFile, item2RegionMap);
+        }
+      };
+
+      // create HFiles for different column families
+      try (Table t = connection.getTable(table);
+          RegionLocator locator = connection.getRegionLocator(table);
+          Admin admin = connection.getAdmin()) {
+        Path bulk = buildBulkFiles(table, 2);
+        lih2.doBulkLoad(bulk, admin, t, locator);
+      }
+
+      // check that data was loaded
+      // The three expected attempts are 1) failure because need to split, 2)
+      // load of split top 3) load of split bottom
+      assertEquals(attemptedCalls.get(), 3);
+      assertExpectedTable(table, ROWCOUNT, 2);
+    }
+  }
+
+  /**
+   * This test splits a table and attempts to bulk load.  The bulk import files
+   * should be split before atomically importing.
+   */
+  @Test (timeout=120000)
+  public void testGroupOrSplitPresplit() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+      setupTable(connection, table, 10);
+      populateTable(connection, table, 1);
+      assertExpectedTable(connection, table, ROWCOUNT, 1);
+      forceSplit(table);
+
+      final AtomicInteger countedLqis= new AtomicInteger();
+      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+          util.getConfiguration()) {
+        @Override
+        protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+            Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+            final LoadQueueItem item, final Table htable,
+            final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+          Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
+              startEndKeys);
+          if (lqis != null && lqis.getFirst() != null) {
+            countedLqis.addAndGet(lqis.getFirst().size());
+          }
+          return lqis;
+        }
+      };
+
+      // create HFiles for different column families
+      Path bulk = buildBulkFiles(table, 2);
+      try (Table t = connection.getTable(table);
+          RegionLocator locator = connection.getRegionLocator(table);
+          Admin admin = connection.getAdmin()) {
+        lih.doBulkLoad(bulk, admin, t, locator);
+      }
+      assertExpectedTable(connection, table, ROWCOUNT, 2);
+      assertEquals(20, countedLqis.get());
+    }
+  }
+
+  /**
+   * This test creates a table with many small regions.  The bulk load files
+   * would be splitted multiple times before all of them can be loaded successfully.
+   */
+  @Test (timeout=120000)
+  public void testSplitTmpFileCleanUp() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"),
+        Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"),
+        Bytes.toBytes("row_00000040"), Bytes.toBytes("row_00000050")};
+    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+      setupTableWithSplitkeys(table, 10, SPLIT_KEYS);
+
+      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
+
+      // create HFiles
+      Path bulk = buildBulkFiles(table, 2);
+      try (Table t = connection.getTable(table);
+          RegionLocator locator = connection.getRegionLocator(table);
+          Admin admin = connection.getAdmin()) {
+        lih.doBulkLoad(bulk, admin, t, locator);
+      }
+      // family path
+      Path tmpPath = new Path(bulk, family(0));
+      // TMP_DIR under family path
+      tmpPath = new Path(tmpPath, LoadIncrementalHFiles.TMP_DIR);
+      FileSystem fs = bulk.getFileSystem(util.getConfiguration());
+      // HFiles have been splitted, there is TMP_DIR
+      assertTrue(fs.exists(tmpPath));
+      // TMP_DIR should have been cleaned-up
+      assertNull(LoadIncrementalHFiles.TMP_DIR + " should be empty.",
+        FSUtils.listStatus(fs, tmpPath));
+      assertExpectedTable(connection, table, ROWCOUNT, 2);
+    }
+  }
+
+  /**
+   * This simulates an remote exception which should cause LIHF to exit with an
+   * exception.
+   */
+  @Test(expected = IOException.class, timeout=120000)
+  public void testGroupOrSplitFailure() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+      setupTable(connection, tableName, 10);
+
+      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+          util.getConfiguration()) {
+        int i = 0;
+
+        @Override
+        protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+            Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+            final LoadQueueItem item, final Table table,
+            final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+          i++;
+
+          if (i == 5) {
+            throw new IOException("failure");
+          }
+          return super.groupOrSplit(regionGroups, item, table, startEndKeys);
+        }
+      };
+
+      // create HFiles for different column families
+      Path dir = buildBulkFiles(tableName,1);
+      try (Table t = connection.getTable(tableName);
+          RegionLocator locator = connection.getRegionLocator(tableName);
+          Admin admin = connection.getAdmin()) {
+        lih.doBulkLoad(dir, admin, t, locator);
+      }
+    }
+
+    fail("doBulkLoad should have thrown an exception");
+  }
+
+  @Test (timeout=120000)
+  public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000100") };
+    // Share connection. We were failing to find the table with our new reverse scan because it
+    // looks for first region, not any region -- that is how it works now.  The below removes first
+    // region in test.  Was reliant on the Connection caching having first region.
+    Connection connection = ConnectionFactory.createConnection(util.getConfiguration());
+    Table table = connection.getTable(tableName);
+
+    setupTableWithSplitkeys(tableName, 10, SPLIT_KEYS);
+    Path dir = buildBulkFiles(tableName, 2);
+
+    final AtomicInteger countedLqis = new AtomicInteger();
+    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) {
+
+      @Override
+      protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+          Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+          final LoadQueueItem item, final Table htable,
+          final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+        Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
+            startEndKeys);
+        if (lqis != null && lqis.getFirst() != null) {
+          countedLqis.addAndGet(lqis.getFirst().size());
+        }
+        return lqis;
+      }
+    };
+
+    // do bulkload when there is no region hole in hbase:meta.
+    try (Table t = connection.getTable(tableName);
+        RegionLocator locator = connection.getRegionLocator(tableName);
+        Admin admin = connection.getAdmin()) {
+      loader.doBulkLoad(dir, admin, t, locator);
+    } catch (Exception e) {
+      LOG.error("exeception=", e);
+    }
+    // check if all the data are loaded into the table.
+    this.assertExpectedTable(tableName, ROWCOUNT, 2);
+
+    dir = buildBulkFiles(tableName, 3);
+
+    // Mess it up by leaving a hole in the hbase:meta
+    List<HRegionInfo> regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
+    for (HRegionInfo regionInfo : regionInfos) {
+      if (Bytes.equals(regionInfo.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
+        MetaTableAccessor.deleteRegion(connection, regionInfo);
+        break;
+      }
+    }
+
+    try (Table t = connection.getTable(tableName);
+        RegionLocator locator = connection.getRegionLocator(tableName);
+        Admin admin = connection.getAdmin()) {
+      loader.doBulkLoad(dir, admin, t, locator);
+    } catch (Exception e) {
+      LOG.error("exception=", e);
+      assertTrue("IOException expected", e instanceof IOException);
+    }
+
+    table.close();
+
+    // Make sure at least the one region that still exists can be found.
+    regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
+    assertTrue(regionInfos.size() >= 1);
+
+    this.assertExpectedTable(connection, tableName, ROWCOUNT, 2);
+    connection.close();
+  }
+
+  /**
+   * Checks that all columns have the expected value and that there is the
+   * expected number of rows.
+   * @throws IOException
+   */
+  void assertExpectedTable(final Connection connection, TableName table, int count, int value)
+  throws IOException {
+    HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
+    assertEquals(htds.length, 1);
+    Table t = null;
+    try {
+      t = connection.getTable(table);
+      Scan s = new Scan();
+      ResultScanner sr = t.getScanner(s);
+      int i = 0;
+      for (Result r : sr) {
+        i++;
+        for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
+          for (byte[] val : nm.values()) {
+            assertTrue(Bytes.equals(val, value(value)));
+          }
+        }
+      }
+      assertEquals(count, i);
+    } catch (IOException e) {
+      fail("Failed due to exception");
+    } finally {
+      if (t != null) t.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
new file mode 100644
index 0000000..0c5207b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Tests various scan start and stop row scenarios. This is set in a scan and
+ * tested in a MapReduce job to see if that is handed over and done properly
+ * too.
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestMultiTableInputFormat extends MultiTableInputFormatTestBase {
+
+  @BeforeClass
+  public static void setupLogging() {
+    TEST_UTIL.enableDebug(MultiTableInputFormat.class);
+      }
+
+    @Override
+  protected void initJob(List<Scan> scans, Job job) throws IOException {
+    TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
+        ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..530d9c5
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Function;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimaps;
+import edu.umd.cs.findbugs.annotations.Nullable;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+@Category({ VerySlowMapReduceTests.class, LargeTests.class })
+public class TestMultiTableSnapshotInputFormat extends MultiTableInputFormatTestBase {
+
+  protected Path restoreDir;
+
+  @BeforeClass
+  public static void setUpSnapshots() throws Exception {
+
+    TEST_UTIL.enableDebug(MultiTableSnapshotInputFormat.class);
+    TEST_UTIL.enableDebug(MultiTableSnapshotInputFormatImpl.class);
+
+    // take a snapshot of every table we have.
+    for (String tableName : TABLES) {
+      SnapshotTestingUtils
+          .createSnapshotAndValidate(TEST_UTIL.getAdmin(), TableName.valueOf(tableName),
+              ImmutableList.of(INPUT_FAMILY), null,
+              snapshotNameForTable(tableName), FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
+              TEST_UTIL.getTestFileSystem(), true);
+    }
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    this.restoreDir = TEST_UTIL.getRandomDir();
+  }
+
+  @Override
+  protected void initJob(List<Scan> scans, Job job) throws IOException {
+    TableMapReduceUtil
+        .initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), ScanMapper.class,
+            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
+  }
+
+  protected Map<String, Collection<Scan>> getSnapshotScanMapping(final List<Scan> scans) {
+    return Multimaps.index(scans, new Function<Scan, String>() {
+      @Nullable
+      @Override
+      public String apply(Scan input) {
+        return snapshotNameForTable(
+            Bytes.toStringBinary(input.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME)));
+      }
+    }).asMap();
+  }
+
+  public static String snapshotNameForTable(String tableName) {
+    return tableName + "_snapshot";
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..1c33848
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.verify;
+
+@Category({ SmallTests.class })
+public class TestMultiTableSnapshotInputFormatImpl {
+
+  private MultiTableSnapshotInputFormatImpl subject;
+  private Map<String, Collection<Scan>> snapshotScans;
+  private Path restoreDir;
+  private Configuration conf;
+  private Path rootDir;
+
+  @Before
+  public void setUp() throws Exception {
+    this.subject = Mockito.spy(new MultiTableSnapshotInputFormatImpl());
+
+    // mock out restoreSnapshot
+    // TODO: this is kind of meh; it'd be much nicer to just inject the RestoreSnapshotHelper
+    // dependency into the
+    // input format. However, we need a new RestoreSnapshotHelper per snapshot in the current
+    // design, and it *also*
+    // feels weird to introduce a RestoreSnapshotHelperFactory and inject that, which would
+    // probably be the more "pure"
+    // way of doing things. This is the lesser of two evils, perhaps?
+    doNothing().when(this.subject).
+        restoreSnapshot(any(Configuration.class), any(String.class), any(Path.class),
+            any(Path.class), any(FileSystem.class));
+
+    this.conf = new Configuration();
+    this.rootDir = new Path("file:///test-root-dir");
+    FSUtils.setRootDir(conf, rootDir);
+    this.snapshotScans = ImmutableMap.<String, Collection<Scan>>of("snapshot1",
+        ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2"))), "snapshot2",
+        ImmutableList.of(new Scan(Bytes.toBytes("3"), Bytes.toBytes("4")),
+            new Scan(Bytes.toBytes("5"), Bytes.toBytes("6"))));
+
+    this.restoreDir = new Path(FSUtils.getRootDir(conf), "restore-dir");
+
+  }
+
+  public void callSetInput() throws IOException {
+    subject.setInput(this.conf, snapshotScans, restoreDir);
+  }
+
+  public Map<String, Collection<ScanWithEquals>> toScanWithEquals(
+      Map<String, Collection<Scan>> snapshotScans) throws IOException {
+    Map<String, Collection<ScanWithEquals>> rtn = Maps.newHashMap();
+
+    for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
+      List<ScanWithEquals> scans = Lists.newArrayList();
+
+      for (Scan scan : entry.getValue()) {
+        scans.add(new ScanWithEquals(scan));
+      }
+      rtn.put(entry.getKey(), scans);
+    }
+
+    return rtn;
+  }
+
+  public static class ScanWithEquals {
+
+    private final String startRow;
+    private final String stopRow;
+
+    /**
+     * Creates a new instance of this class while copying all values.
+     *
+     * @param scan The scan instance to copy from.
+     * @throws java.io.IOException When copying the values fails.
+     */
+    public ScanWithEquals(Scan scan) throws IOException {
+      this.startRow = Bytes.toStringBinary(scan.getStartRow());
+      this.stopRow = Bytes.toStringBinary(scan.getStopRow());
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof ScanWithEquals)) {
+        return false;
+      }
+      ScanWithEquals otherScan = (ScanWithEquals) obj;
+      return Objects.equals(this.startRow, otherScan.startRow) && Objects
+          .equals(this.stopRow, otherScan.stopRow);
+    }
+
+    @Override
+    public String toString() {
+      return org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects.
+        toStringHelper(this).add("startRow", startRow)
+          .add("stopRow", stopRow).toString();
+    }
+  }
+
+  @Test
+  public void testSetInputSetsSnapshotToScans() throws Exception {
+
+    callSetInput();
+
+    Map<String, Collection<Scan>> actual = subject.getSnapshotsToScans(conf);
+
+    // convert to scans we can use .equals on
+    Map<String, Collection<ScanWithEquals>> actualWithEquals = toScanWithEquals(actual);
+    Map<String, Collection<ScanWithEquals>> expectedWithEquals = toScanWithEquals(snapshotScans);
+
+    assertEquals(expectedWithEquals, actualWithEquals);
+  }
+
+  @Test
+  public void testSetInputPushesRestoreDirectories() throws Exception {
+    callSetInput();
+
+    Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
+
+    assertEquals(this.snapshotScans.keySet(), restoreDirs.keySet());
+  }
+
+  @Test
+  public void testSetInputCreatesRestoreDirectoriesUnderRootRestoreDir() throws Exception {
+    callSetInput();
+
+    Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
+
+    for (Path snapshotDir : restoreDirs.values()) {
+      assertEquals("Expected " + snapshotDir + " to be a child of " + restoreDir, restoreDir,
+          snapshotDir.getParent());
+    }
+  }
+
+  @Test
+  public void testSetInputRestoresSnapshots() throws Exception {
+    callSetInput();
+
+    Map<String, Path> snapshotDirs = subject.getSnapshotDirs(conf);
+
+    for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
+      verify(this.subject).restoreSnapshot(eq(this.conf), eq(entry.getKey()), eq(this.rootDir),
+          eq(entry.getValue()), any(FileSystem.class));
+    }
+  }
+}

[22/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
new file mode 100644
index 0000000..a9da98b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Just shows a simple example of how the attributes can be extracted and added
+ * to the puts
+ */
+public class TsvImporterCustomTestMapperForOprAttr extends TsvImporterMapper {
+  @Override
+  protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i)
+      throws BadTsvLineException, IOException {
+    KeyValue kv;
+    kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+        parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+        parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
+        parsed.getColumnLength(i));
+    if (parsed.getIndividualAttributes() != null) {
+      String[] attributes = parsed.getIndividualAttributes();
+      for (String attr : attributes) {
+        String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR);
+        if (split == null || split.length <= 1) {
+          throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
+        } else {
+          if (split[0].length() <= 0 || split[1].length() <= 0) {
+            throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
+          }
+          put.setAttribute(split[0], Bytes.toBytes(split[1]));
+        }
+      }
+    }
+    put.add(kv);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
new file mode 100644
index 0000000..69c4c7c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
@@ -0,0 +1,1059 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.replication;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
+import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.replication.regionserver.Replication;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSource;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+@Category({ReplicationTests.class, LargeTests.class})
+public class TestReplicationSmallTests extends TestReplicationBase {
+
+  private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
+  private static final String PEER_ID = "2";
+
+  @Rule
+  public TestName name = new TestName();
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @Before
+  public void setUp() throws Exception {
+    // Starting and stopping replication can make us miss new logs,
+    // rolling like this makes sure the most recent one gets added to the queue
+    for ( JVMClusterUtil.RegionServerThread r :
+        utility1.getHBaseCluster().getRegionServerThreads()) {
+      utility1.getAdmin().rollWALWriter(r.getRegionServer().getServerName());
+    }
+    int rowCount = utility1.countRows(tableName);
+    utility1.deleteTableData(tableName);
+    // truncating the table will send one Delete per row to the slave cluster
+    // in an async fashion, which is why we cannot just call deleteTableData on
+    // utility2 since late writes could make it to the slave in some way.
+    // Instead, we truncate the first table and wait for all the Deletes to
+    // make it to the slave.
+    Scan scan = new Scan();
+    int lastCount = 0;
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for truncate");
+      }
+      ResultScanner scanner = htable2.getScanner(scan);
+      Result[] res = scanner.next(rowCount);
+      scanner.close();
+      if (res.length != 0) {
+        if (res.length < lastCount) {
+          i--; // Don't increment timeout if we make progress
+        }
+        lastCount = res.length;
+        LOG.info("Still got " + res.length + " rows");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        break;
+      }
+    }
+  }
+
+  /**
+   * Verify that version and column delete marker types are replicated
+   * correctly.
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testDeleteTypes() throws Exception {
+    LOG.info("testDeleteTypes");
+    final byte[] v1 = Bytes.toBytes("v1");
+    final byte[] v2 = Bytes.toBytes("v2");
+    final byte[] v3 = Bytes.toBytes("v3");
+    htable1 = utility1.getConnection().getTable(tableName);
+
+    long t = EnvironmentEdgeManager.currentTime();
+    // create three versions for "row"
+    Put put = new Put(row);
+    put.addColumn(famName, row, t, v1);
+    htable1.put(put);
+
+    put = new Put(row);
+    put.addColumn(famName, row, t + 1, v2);
+    htable1.put(put);
+
+    put = new Put(row);
+    put.addColumn(famName, row, t + 2, v3);
+    htable1.put(put);
+
+    Get get = new Get(row);
+    get.setMaxVersions();
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for put replication");
+      }
+      Result res = htable2.get(get);
+      if (res.size() < 3) {
+        LOG.info("Rows not available");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
+        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
+        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
+        break;
+      }
+    }
+    // place a version delete marker (delete last version)
+    Delete d = new Delete(row);
+    d.addColumn(famName, row, t);
+    htable1.delete(d);
+
+    get = new Get(row);
+    get.setMaxVersions();
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for put replication");
+      }
+      Result res = htable2.get(get);
+      if (res.size() > 2) {
+        LOG.info("Version not deleted");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
+        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
+        break;
+      }
+    }
+
+    // place a column delete marker
+    d = new Delete(row);
+    d.addColumns(famName, row, t+2);
+    htable1.delete(d);
+
+    // now *both* of the remaining version should be deleted
+    // at the replica
+    get = new Get(row);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for del replication");
+      }
+      Result res = htable2.get(get);
+      if (res.size() >= 1) {
+        LOG.info("Rows not deleted");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        break;
+      }
+    }
+  }
+
+  /**
+   * Add a row, check it's replicated, delete it, check's gone
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testSimplePutDelete() throws Exception {
+    LOG.info("testSimplePutDelete");
+    Put put = new Put(row);
+    put.addColumn(famName, row, row);
+
+    htable1 = utility1.getConnection().getTable(tableName);
+    htable1.put(put);
+
+    Get get = new Get(row);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for put replication");
+      }
+      Result res = htable2.get(get);
+      if (res.isEmpty()) {
+        LOG.info("Row not available");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        assertArrayEquals(res.value(), row);
+        break;
+      }
+    }
+
+    Delete del = new Delete(row);
+    htable1.delete(del);
+
+    get = new Get(row);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for del replication");
+      }
+      Result res = htable2.get(get);
+      if (res.size() >= 1) {
+        LOG.info("Row not deleted");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        break;
+      }
+    }
+  }
+
+  /**
+   * Try a small batch upload using the write buffer, check it's replicated
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testSmallBatch() throws Exception {
+    LOG.info("testSmallBatch");
+    // normal Batch tests
+    loadData("", row);
+
+    Scan scan = new Scan();
+
+    ResultScanner scanner1 = htable1.getScanner(scan);
+    Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
+    scanner1.close();
+    assertEquals(NB_ROWS_IN_BATCH, res1.length);
+
+    waitForReplication(NB_ROWS_IN_BATCH, NB_RETRIES);
+  }
+
+  private void waitForReplication(int expectedRows, int retries) throws IOException, InterruptedException {
+    Scan scan;
+    for (int i = 0; i < retries; i++) {
+      scan = new Scan();
+      if (i== retries -1) {
+        fail("Waited too much time for normal batch replication");
+      }
+      ResultScanner scanner = htable2.getScanner(scan);
+      Result[] res = scanner.next(expectedRows);
+      scanner.close();
+      if (res.length != expectedRows) {
+        LOG.info("Only got " + res.length + " rows");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        break;
+      }
+    }
+  }
+
+  private void loadData(String prefix, byte[] row) throws IOException {
+    List<Put> puts = new ArrayList<>(NB_ROWS_IN_BATCH);
+    for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
+      Put put = new Put(Bytes.toBytes(prefix + Integer.toString(i)));
+      put.addColumn(famName, row, row);
+      puts.add(put);
+    }
+    htable1.put(puts);
+  }
+
+  /**
+   * Test disable/enable replication, trying to insert, make sure nothing's
+   * replicated, enable it, the insert should be replicated
+   *
+   * @throws Exception
+   */
+  @Test(timeout = 300000)
+  public void testDisableEnable() throws Exception {
+
+    // Test disabling replication
+    admin.disablePeer(PEER_ID);
+
+    byte[] rowkey = Bytes.toBytes("disable enable");
+    Put put = new Put(rowkey);
+    put.addColumn(famName, row, row);
+    htable1.put(put);
+
+    Get get = new Get(rowkey);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      Result res = htable2.get(get);
+      if (res.size() >= 1) {
+        fail("Replication wasn't disabled");
+      } else {
+        LOG.info("Row not replicated, let's wait a bit more...");
+        Thread.sleep(SLEEP_TIME);
+      }
+    }
+
+    // Test enable replication
+    admin.enablePeer(PEER_ID);
+
+    for (int i = 0; i < NB_RETRIES; i++) {
+      Result res = htable2.get(get);
+      if (res.isEmpty()) {
+        LOG.info("Row not available");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        assertArrayEquals(res.value(), row);
+        return;
+      }
+    }
+    fail("Waited too much time for put replication");
+  }
+
+  /**
+   * Integration test for TestReplicationAdmin, removes and re-add a peer
+   * cluster
+   *
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testAddAndRemoveClusters() throws Exception {
+    LOG.info("testAddAndRemoveClusters");
+    admin.removePeer(PEER_ID);
+    Thread.sleep(SLEEP_TIME);
+    byte[] rowKey = Bytes.toBytes("Won't be replicated");
+    Put put = new Put(rowKey);
+    put.addColumn(famName, row, row);
+    htable1.put(put);
+
+    Get get = new Get(rowKey);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i == NB_RETRIES-1) {
+        break;
+      }
+      Result res = htable2.get(get);
+      if (res.size() >= 1) {
+        fail("Not supposed to be replicated");
+      } else {
+        LOG.info("Row not replicated, let's wait a bit more...");
+        Thread.sleep(SLEEP_TIME);
+      }
+    }
+    ReplicationPeerConfig rpc = new ReplicationPeerConfig();
+    rpc.setClusterKey(utility2.getClusterKey());
+    admin.addPeer(PEER_ID, rpc, null);
+    Thread.sleep(SLEEP_TIME);
+    rowKey = Bytes.toBytes("do rep");
+    put = new Put(rowKey);
+    put.addColumn(famName, row, row);
+    LOG.info("Adding new row");
+    htable1.put(put);
+
+    get = new Get(rowKey);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i==NB_RETRIES-1) {
+        fail("Waited too much time for put replication");
+      }
+      Result res = htable2.get(get);
+      if (res.isEmpty()) {
+        LOG.info("Row not available");
+        Thread.sleep(SLEEP_TIME*i);
+      } else {
+        assertArrayEquals(res.value(), row);
+        break;
+      }
+    }
+  }
+
+
+  /**
+   * Do a more intense version testSmallBatch, one  that will trigger
+   * wal rolling and other non-trivial code paths
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testLoading() throws Exception {
+    LOG.info("Writing out rows to table1 in testLoading");
+    List<Put> puts = new ArrayList<>(NB_ROWS_IN_BIG_BATCH);
+    for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
+      Put put = new Put(Bytes.toBytes(i));
+      put.addColumn(famName, row, row);
+      puts.add(put);
+    }
+    // The puts will be iterated through and flushed only when the buffer
+    // size is reached.
+    htable1.put(puts);
+
+    Scan scan = new Scan();
+
+    ResultScanner scanner = htable1.getScanner(scan);
+    Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
+    scanner.close();
+
+    assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
+
+    LOG.info("Looking in table2 for replicated rows in testLoading");
+    long start = System.currentTimeMillis();
+    // Retry more than NB_RETRIES.  As it was, retries were done in 5 seconds and we'd fail
+    // sometimes.
+    final long retries = NB_RETRIES * 10;
+    for (int i = 0; i < retries; i++) {
+      scan = new Scan();
+      scanner = htable2.getScanner(scan);
+      res = scanner.next(NB_ROWS_IN_BIG_BATCH);
+      scanner.close();
+      if (res.length != NB_ROWS_IN_BIG_BATCH) {
+        if (i == retries - 1) {
+          int lastRow = -1;
+          for (Result result : res) {
+            int currentRow = Bytes.toInt(result.getRow());
+            for (int row = lastRow+1; row < currentRow; row++) {
+              LOG.error("Row missing: " + row);
+            }
+            lastRow = currentRow;
+          }
+          LOG.error("Last row: " + lastRow);
+          fail("Waited too much time for normal batch replication, " +
+            res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
+            (System.currentTimeMillis() - start) + "ms");
+        } else {
+          LOG.info("Only got " + res.length + " rows... retrying");
+          Thread.sleep(SLEEP_TIME);
+        }
+      } else {
+        break;
+      }
+    }
+  }
+
+  /**
+   * Do a small loading into a table, make sure the data is really the same,
+   * then run the VerifyReplication job to check the results. Do a second
+   * comparison where all the cells are different.
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testVerifyRepJob() throws Exception {
+    // Populate the tables, at the same time it guarantees that the tables are
+    // identical since it does the check
+    testSmallBatch();
+
+    String[] args = new String[] {PEER_ID, tableName.getNameAsString()};
+    runVerifyReplication(args, NB_ROWS_IN_BATCH, 0);
+
+    Scan scan = new Scan();
+    ResultScanner rs = htable2.getScanner(scan);
+    Put put = null;
+    for (Result result : rs) {
+      put = new Put(result.getRow());
+      Cell firstVal = result.rawCells()[0];
+      put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
+          Bytes.toBytes("diff data"));
+      htable2.put(put);
+    }
+    Delete delete = new Delete(put.getRow());
+    htable2.delete(delete);
+    runVerifyReplication(args, 0, NB_ROWS_IN_BATCH);
+  }
+
+  /**
+   * Load a row into a table, make sure the data is really the same,
+   * delete the row, make sure the delete marker is replicated,
+   * run verify replication with and without raw to check the results.
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testVerifyRepJobWithRawOptions() throws Exception {
+    LOG.info(name.getMethodName());
+
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    byte[] familyname = Bytes.toBytes("fam_raw");
+    byte[] row = Bytes.toBytes("row_raw");
+
+    Table lHtable1 = null;
+    Table lHtable2 = null;
+
+    try {
+      HTableDescriptor table = new HTableDescriptor(tableName);
+      HColumnDescriptor fam = new HColumnDescriptor(familyname);
+      fam.setMaxVersions(100);
+      fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
+      table.addFamily(fam);
+      scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+      for (HColumnDescriptor f : table.getColumnFamilies()) {
+        scopes.put(f.getName(), f.getScope());
+      }
+
+      Connection connection1 = ConnectionFactory.createConnection(conf1);
+      Connection connection2 = ConnectionFactory.createConnection(conf2);
+      try (Admin admin1 = connection1.getAdmin()) {
+        admin1.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
+      }
+      try (Admin admin2 = connection2.getAdmin()) {
+        admin2.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
+      }
+      utility1.waitUntilAllRegionsAssigned(tableName);
+      utility2.waitUntilAllRegionsAssigned(tableName);
+
+      lHtable1 = utility1.getConnection().getTable(tableName);
+      lHtable2 = utility2.getConnection().getTable(tableName);
+
+      Put put = new Put(row);
+      put.addColumn(familyname, row, row);
+      lHtable1.put(put);
+
+      Get get = new Get(row);
+      for (int i = 0; i < NB_RETRIES; i++) {
+        if (i==NB_RETRIES-1) {
+          fail("Waited too much time for put replication");
+        }
+        Result res = lHtable2.get(get);
+        if (res.isEmpty()) {
+          LOG.info("Row not available");
+          Thread.sleep(SLEEP_TIME);
+        } else {
+          assertArrayEquals(res.value(), row);
+          break;
+        }
+      }
+
+      Delete del = new Delete(row);
+      lHtable1.delete(del);
+
+      get = new Get(row);
+      for (int i = 0; i < NB_RETRIES; i++) {
+        if (i==NB_RETRIES-1) {
+          fail("Waited too much time for del replication");
+        }
+        Result res = lHtable2.get(get);
+        if (res.size() >= 1) {
+          LOG.info("Row not deleted");
+          Thread.sleep(SLEEP_TIME);
+        } else {
+          break;
+        }
+      }
+
+      // Checking verifyReplication for the default behavior.
+      String[] argsWithoutRaw = new String[] {PEER_ID, tableName.getNameAsString()};
+      runVerifyReplication(argsWithoutRaw, 0, 0);
+
+      // Checking verifyReplication with raw
+      String[] argsWithRawAsTrue = new String[] {"--raw", PEER_ID, tableName.getNameAsString()};
+      runVerifyReplication(argsWithRawAsTrue, 1, 0);
+    } finally {
+      if (lHtable1 != null) {
+        lHtable1.close();
+      }
+      if (lHtable2 != null) {
+        lHtable2.close();
+      }
+    }
+  }
+
+  private void runVerifyReplication(String[] args, int expectedGoodRows, int expectedBadRows)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    Job job = new VerifyReplication().createSubmittableJob(new Configuration(conf1), args);
+    if (job == null) {
+      fail("Job wasn't created, see the log");
+    }
+    if (!job.waitForCompletion(true)) {
+      fail("Job failed, see the log");
+    }
+    assertEquals(expectedGoodRows, job.getCounters().
+        findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+    assertEquals(expectedBadRows, job.getCounters().
+        findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+  }
+
+  @Test(timeout=300000)
+  // VerifyReplication should honor versions option
+  public void testHBase14905() throws Exception {
+    // normal Batch tests
+    byte[] qualifierName = Bytes.toBytes("f1");
+    Put put = new Put(Bytes.toBytes("r1"));
+    put.addColumn(famName, qualifierName, Bytes.toBytes("v1002"));
+    htable1.put(put);
+    put.addColumn(famName, qualifierName, Bytes.toBytes("v1001"));
+    htable1.put(put);
+    put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
+    htable1.put(put);
+
+    Scan scan = new Scan();
+    scan.setMaxVersions(100);
+    ResultScanner scanner1 = htable1.getScanner(scan);
+    Result[] res1 = scanner1.next(1);
+    scanner1.close();
+
+    assertEquals(1, res1.length);
+    assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+    for (int i = 0; i < NB_RETRIES; i++) {
+      scan = new Scan();
+      scan.setMaxVersions(100);
+      scanner1 = htable2.getScanner(scan);
+      res1 = scanner1.next(1);
+      scanner1.close();
+      if (res1.length != 1) {
+        LOG.info("Only got " + res1.length + " rows");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
+        if (cellNumber != 3) {
+          LOG.info("Only got " + cellNumber + " cells");
+          Thread.sleep(SLEEP_TIME);
+        } else {
+          break;
+        }
+      }
+      if (i == NB_RETRIES-1) {
+        fail("Waited too much time for normal batch replication");
+      }
+    }
+
+    put.addColumn(famName, qualifierName, Bytes.toBytes("v1111"));
+    htable2.put(put);
+    put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
+    htable2.put(put);
+
+    scan = new Scan();
+    scan.setMaxVersions(100);
+    scanner1 = htable2.getScanner(scan);
+    res1 = scanner1.next(NB_ROWS_IN_BATCH);
+    scanner1.close();
+
+    assertEquals(1, res1.length);
+    assertEquals(5, res1[0].getColumnCells(famName, qualifierName).size());
+
+    String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
+    runVerifyReplication(args, 0, 1);
+  }
+
+  @Test(timeout=300000)
+  // VerifyReplication should honor versions option
+  public void testVersionMismatchHBase14905() throws Exception {
+    // normal Batch tests
+    byte[] qualifierName = Bytes.toBytes("f1");
+    Put put = new Put(Bytes.toBytes("r1"));
+    long ts = System.currentTimeMillis();
+    put.addColumn(famName, qualifierName, ts + 1, Bytes.toBytes("v1"));
+    htable1.put(put);
+    put.addColumn(famName, qualifierName, ts + 2, Bytes.toBytes("v2"));
+    htable1.put(put);
+    put.addColumn(famName, qualifierName, ts + 3, Bytes.toBytes("v3"));
+    htable1.put(put);
+
+    Scan scan = new Scan();
+    scan.setMaxVersions(100);
+    ResultScanner scanner1 = htable1.getScanner(scan);
+    Result[] res1 = scanner1.next(1);
+    scanner1.close();
+
+    assertEquals(1, res1.length);
+    assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+    for (int i = 0; i < NB_RETRIES; i++) {
+      scan = new Scan();
+      scan.setMaxVersions(100);
+      scanner1 = htable2.getScanner(scan);
+      res1 = scanner1.next(1);
+      scanner1.close();
+      if (res1.length != 1) {
+        LOG.info("Only got " + res1.length + " rows");
+        Thread.sleep(SLEEP_TIME);
+      } else {
+        int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
+        if (cellNumber != 3) {
+          LOG.info("Only got " + cellNumber + " cells");
+          Thread.sleep(SLEEP_TIME);
+        } else {
+          break;
+        }
+      }
+      if (i == NB_RETRIES-1) {
+        fail("Waited too much time for normal batch replication");
+      }
+    }
+
+    try {
+      // Disabling replication and modifying the particular version of the cell to validate the feature.
+      admin.disablePeer(PEER_ID);
+      Put put2 = new Put(Bytes.toBytes("r1"));
+      put2.addColumn(famName, qualifierName, ts +2, Bytes.toBytes("v99"));
+      htable2.put(put2);
+
+      scan = new Scan();
+      scan.setMaxVersions(100);
+      scanner1 = htable2.getScanner(scan);
+      res1 = scanner1.next(NB_ROWS_IN_BATCH);
+      scanner1.close();
+      assertEquals(1, res1.length);
+      assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+      String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
+      runVerifyReplication(args, 0, 1);
+      }
+    finally {
+      admin.enablePeer(PEER_ID);
+    }
+  }
+
+  /**
+   * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
+   * the compaction WALEdit
+   * @throws Exception
+   */
+  @Test(timeout=300000)
+  public void testCompactionWALEdits() throws Exception {
+    WALProtos.CompactionDescriptor compactionDescriptor =
+        WALProtos.CompactionDescriptor.getDefaultInstance();
+    HRegionInfo hri = new HRegionInfo(htable1.getName(),
+      HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
+    WALEdit edit = WALEdit.createCompaction(hri, compactionDescriptor);
+    Replication.scopeWALEdits(new WALKey(), edit,
+      htable1.getConfiguration(), null);
+  }
+
+  /**
+   * Test for HBASE-8663
+   * Create two new Tables with colfamilies enabled for replication then run
+   * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
+   * TestReplicationAdmin is a better place for this testing but it would need mocks.
+   * @throws Exception
+   */
+  @Test(timeout = 300000)
+  public void testVerifyListReplicatedTable() throws Exception {
+    LOG.info("testVerifyListReplicatedTable");
+
+    final String tName = "VerifyListReplicated_";
+    final String colFam = "cf1";
+    final int numOfTables = 3;
+
+    Admin hadmin = utility1.getAdmin();
+
+    // Create Tables
+    for (int i = 0; i < numOfTables; i++) {
+      HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
+      HColumnDescriptor cfd = new HColumnDescriptor(colFam);
+      cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
+      ht.addFamily(cfd);
+      hadmin.createTable(ht);
+    }
+
+    // verify the result
+    List<HashMap<String, String>> replicationColFams = admin.listReplicated();
+    int[] match = new int[numOfTables]; // array of 3 with init value of zero
+
+    for (int i = 0; i < replicationColFams.size(); i++) {
+      HashMap<String, String> replicationEntry = replicationColFams.get(i);
+      String tn = replicationEntry.get(ReplicationAdmin.TNAME);
+      if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
+        int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
+        match[m]++; // should only increase once
+      }
+    }
+
+    // check the matching result
+    for (int i = 0; i < match.length; i++) {
+      assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
+    }
+
+    // drop tables
+    for (int i = 0; i < numOfTables; i++) {
+      TableName tableName = TableName.valueOf(tName + i);
+      hadmin.disableTable(tableName);
+      hadmin.deleteTable(tableName);
+    }
+
+    hadmin.close();
+  }
+
+  /**
+   *  Test for HBase-15259 WALEdits under replay will also be replicated
+   * */
+  @Test
+  public void testReplicationInReplay() throws Exception {
+    final TableName tableName = htable1.getName();
+
+    HRegion region = utility1.getMiniHBaseCluster().getRegions(tableName).get(0);
+    HRegionInfo hri = region.getRegionInfo();
+    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (byte[] fam : htable1.getTableDescriptor().getFamiliesKeys()) {
+      scopes.put(fam, 1);
+    }
+    final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
+    int index = utility1.getMiniHBaseCluster().getServerWith(hri.getRegionName());
+    WAL wal = utility1.getMiniHBaseCluster().getRegionServer(index).getWAL(region.getRegionInfo());
+    final byte[] rowName = Bytes.toBytes("testReplicationInReplay");
+    final byte[] qualifier = Bytes.toBytes("q");
+    final byte[] value = Bytes.toBytes("v");
+    WALEdit edit = new WALEdit(true);
+    long now = EnvironmentEdgeManager.currentTime();
+    edit.add(new KeyValue(rowName, famName, qualifier,
+      now, value));
+    WALKey walKey = new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes);
+    wal.append(hri, walKey, edit, true);
+    wal.sync();
+
+    Get get = new Get(rowName);
+    for (int i = 0; i < NB_RETRIES; i++) {
+      if (i == NB_RETRIES-1) {
+        break;
+      }
+      Result res = htable2.get(get);
+      if (res.size() >= 1) {
+        fail("Not supposed to be replicated for " + Bytes.toString(res.getRow()));
+      } else {
+        LOG.info("Row not replicated, let's wait a bit more...");
+        Thread.sleep(SLEEP_TIME);
+      }
+    }
+  }
+
+  @Test(timeout=300000)
+  public void testVerifyReplicationPrefixFiltering() throws Exception {
+    final byte[] prefixRow = Bytes.toBytes("prefixrow");
+    final byte[] prefixRow2 = Bytes.toBytes("secondrow");
+    loadData("prefixrow", prefixRow);
+    loadData("secondrow", prefixRow2);
+    loadData("aaa", row);
+    loadData("zzz", row);
+    waitForReplication(NB_ROWS_IN_BATCH * 4, NB_RETRIES * 4);
+    String[] args = new String[] {"--row-prefixes=prefixrow,secondrow", PEER_ID,
+        tableName.getNameAsString()};
+    runVerifyReplication(args, NB_ROWS_IN_BATCH *2, 0);
+  }
+
+  @Test(timeout = 300000)
+  public void testVerifyReplicationSnapshotArguments() {
+    String[] args =
+        new String[] { "--sourceSnapshotName=snapshot1", "2", tableName.getNameAsString() };
+    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--sourceSnapshotTmpDir=tmp", "2", tableName.getNameAsString() };
+    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=tmp", "2",
+        tableName.getNameAsString() };
+    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--peerSnapshotName=snapshot1", "2", tableName.getNameAsString() };
+    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--peerSnapshotTmpDir=/tmp/", "2", tableName.getNameAsString() };
+    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--peerSnapshotName=snapshot1", "--peerSnapshotTmpDir=/tmp/",
+        "--peerFSAddress=tempfs", "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2",
+        tableName.getNameAsString() };
+    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+    args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=/tmp/",
+        "--peerSnapshotName=snapshot2", "--peerSnapshotTmpDir=/tmp/", "--peerFSAddress=tempfs",
+        "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2", tableName.getNameAsString() };
+
+    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+  }
+
+  @Test(timeout = 300000)
+  public void testVerifyReplicationWithSnapshotSupport() throws Exception {
+    // Populate the tables, at the same time it guarantees that the tables are
+    // identical since it does the check
+    testSmallBatch();
+
+    // Take source and target tables snapshot
+    Path rootDir = FSUtils.getRootDir(conf1);
+    FileSystem fs = rootDir.getFileSystem(conf1);
+    String sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
+    SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
+      new String(famName), sourceSnapshotName, rootDir, fs, true);
+
+    // Take target snapshot
+    Path peerRootDir = FSUtils.getRootDir(conf2);
+    FileSystem peerFs = peerRootDir.getFileSystem(conf2);
+    String peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
+    SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
+      new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
+
+    String peerFSAddress = peerFs.getUri().toString();
+    String temPath1 = utility1.getRandomDir().toString();
+    String temPath2 = "/tmp2";
+
+    String[] args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
+        "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
+        "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
+        "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
+
+    Job job = new VerifyReplication().createSubmittableJob(conf1, args);
+    if (job == null) {
+      fail("Job wasn't created, see the log");
+    }
+    if (!job.waitForCompletion(true)) {
+      fail("Job failed, see the log");
+    }
+    assertEquals(NB_ROWS_IN_BATCH,
+      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+    assertEquals(0,
+      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+
+    Scan scan = new Scan();
+    ResultScanner rs = htable2.getScanner(scan);
+    Put put = null;
+    for (Result result : rs) {
+      put = new Put(result.getRow());
+      Cell firstVal = result.rawCells()[0];
+      put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
+        Bytes.toBytes("diff data"));
+      htable2.put(put);
+    }
+    Delete delete = new Delete(put.getRow());
+    htable2.delete(delete);
+
+    sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
+    SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
+      new String(famName), sourceSnapshotName, rootDir, fs, true);
+
+    peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
+    SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
+      new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
+
+    args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
+        "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
+        "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
+        "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
+
+    job = new VerifyReplication().createSubmittableJob(conf1, args);
+    if (job == null) {
+      fail("Job wasn't created, see the log");
+    }
+    if (!job.waitForCompletion(true)) {
+      fail("Job failed, see the log");
+    }
+    assertEquals(0,
+      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+    assertEquals(NB_ROWS_IN_BATCH,
+      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+  }
+
+  @Test
+  public void testEmptyWALRecovery() throws Exception {
+    final int numRs = utility1.getHBaseCluster().getRegionServerThreads().size();
+
+    // for each RS, create an empty wal with same walGroupId
+    final List<Path> emptyWalPaths = new ArrayList<>();
+    long ts = System.currentTimeMillis();
+    for (int i = 0; i < numRs; i++) {
+      HRegionInfo regionInfo =
+          utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
+      WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
+      Path currentWalPath = AbstractFSWALProvider.getCurrentFileName(wal);
+      String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(currentWalPath.getName());
+      Path emptyWalPath = new Path(utility1.getDataTestDir(), walGroupId + "." + ts);
+      utility1.getTestFileSystem().create(emptyWalPath).close();
+      emptyWalPaths.add(emptyWalPath);
+    }
+
+    // inject our empty wal into the replication queue
+    for (int i = 0; i < numRs; i++) {
+      Replication replicationService =
+          (Replication) utility1.getHBaseCluster().getRegionServer(i).getReplicationSourceService();
+      replicationService.preLogRoll(null, emptyWalPaths.get(i));
+      replicationService.postLogRoll(null, emptyWalPaths.get(i));
+    }
+
+    // wait for ReplicationSource to start reading from our empty wal
+    waitForLogAdvance(numRs, emptyWalPaths, false);
+
+    // roll the original wal, which enqueues a new wal behind our empty wal
+    for (int i = 0; i < numRs; i++) {
+      HRegionInfo regionInfo =
+          utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
+      WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
+      wal.rollWriter(true);
+    }
+
+    // ReplicationSource should advance past the empty wal, or else the test will fail
+    waitForLogAdvance(numRs, emptyWalPaths, true);
+
+    // we're now writing to the new wal
+    // if everything works, the source should've stopped reading from the empty wal, and start
+    // replicating from the new wal
+    testSimplePutDelete();
+  }
+
+  /**
+   * Waits for the ReplicationSource to start reading from the given paths
+   * @param numRs number of regionservers
+   * @param emptyWalPaths path for each regionserver
+   * @param invert if true, waits until ReplicationSource is NOT reading from the given paths
+   */
+  private void waitForLogAdvance(final int numRs, final List<Path> emptyWalPaths,
+      final boolean invert) throws Exception {
+    Waiter.waitFor(conf1, 10000, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        for (int i = 0; i < numRs; i++) {
+          Replication replicationService = (Replication) utility1.getHBaseCluster()
+              .getRegionServer(i).getReplicationSourceService();
+          for (ReplicationSourceInterface rsi : replicationService.getReplicationManager()
+              .getSources()) {
+            ReplicationSource source = (ReplicationSource) rsi;
+            if (!invert && !emptyWalPaths.get(i).equals(source.getCurrentPath())) {
+              return false;
+            }
+            if (invert && emptyWalPaths.get(i).equals(source.getCurrentPath())) {
+              return false;
+            }
+          }
+        }
+        return true;
+      }
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
new file mode 100644
index 0000000..2e3cb5e
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -0,0 +1,381 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import static org.apache.hadoop.util.ToolRunner.run;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Ignore
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestExportSnapshot {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private static final Log LOG = LogFactory.getLog(TestExportSnapshot.class);
+
+  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  protected final static byte[] FAMILY = Bytes.toBytes("cf");
+
+  @Rule
+  public final TestName testName = new TestName();
+
+  protected TableName tableName;
+  private byte[] emptySnapshotName;
+  private byte[] snapshotName;
+  private int tableNumFiles;
+  private Admin admin;
+
+  public static void setUpBaseConf(Configuration conf) {
+    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+    conf.setInt("hbase.regionserver.msginterval", 100);
+    conf.setInt("hbase.client.pause", 250);
+    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
+    conf.setBoolean("hbase.master.enabletable.roundrobin", true);
+    conf.setInt("mapreduce.map.maxattempts", 10);
+    // If a single node has enough failures (default 3), resource manager will blacklist it.
+    // With only 2 nodes and tests injecting faults, we don't want that.
+    conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
+  }
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    setUpBaseConf(TEST_UTIL.getConfiguration());
+    TEST_UTIL.startMiniCluster(1, 3);
+    TEST_UTIL.startMiniMapReduceCluster();
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniMapReduceCluster();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Create a table and take a snapshot of the table used by the export test.
+   */
+  @Before
+  public void setUp() throws Exception {
+    this.admin = TEST_UTIL.getAdmin();
+
+    tableName = TableName.valueOf("testtb-" + testName.getMethodName());
+    snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
+    emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
+
+    // create Table
+    createTable();
+
+    // Take an empty snapshot
+    admin.snapshot(emptySnapshotName, tableName);
+
+    // Add some rows
+    SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 50, FAMILY);
+    tableNumFiles = admin.getTableRegions(tableName).size();
+
+    // take a snapshot
+    admin.snapshot(snapshotName, tableName);
+  }
+
+  protected void createTable() throws Exception {
+    SnapshotTestingUtils.createPreSplitTable(TEST_UTIL, tableName, 2, FAMILY);
+  }
+
+  protected interface RegionPredicate {
+    boolean evaluate(final HRegionInfo regionInfo);
+  }
+
+  protected RegionPredicate getBypassRegionPredicate() {
+    return null;
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    TEST_UTIL.deleteTable(tableName);
+    SnapshotTestingUtils.deleteAllSnapshots(TEST_UTIL.getAdmin());
+    SnapshotTestingUtils.deleteArchiveDirectory(TEST_UTIL);
+  }
+
+  /**
+   * Verify if exported snapshot and copied files matches the original one.
+   */
+  @Test
+  public void testExportFileSystemState() throws Exception {
+    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
+  }
+
+  @Test
+  public void testExportFileSystemStateWithSkipTmp() throws Exception {
+    TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);
+    try {
+      testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
+    } finally {
+      TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, false);
+    }
+  }
+
+  @Test
+  public void testEmptyExportFileSystemState() throws Exception {
+    testExportFileSystemState(tableName, emptySnapshotName, emptySnapshotName, 0);
+  }
+
+  @Test
+  public void testConsecutiveExports() throws Exception {
+    Path copyDir = getLocalDestinationDir();
+    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, false);
+    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, true);
+    removeExportDir(copyDir);
+  }
+
+  @Test
+  public void testExportWithTargetName() throws Exception {
+    final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
+    testExportFileSystemState(tableName, snapshotName, targetName, tableNumFiles);
+  }
+
+  private void testExportFileSystemState(final TableName tableName, final byte[] snapshotName,
+      final byte[] targetName, int filesExpected) throws Exception {
+    testExportFileSystemState(tableName, snapshotName, targetName,
+      filesExpected, getHdfsDestinationDir(), false);
+  }
+
+  protected void testExportFileSystemState(final TableName tableName,
+      final byte[] snapshotName, final byte[] targetName, int filesExpected,
+      Path copyDir, boolean overwrite) throws Exception {
+    testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
+      filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
+      overwrite, getBypassRegionPredicate(), true);
+  }
+
+  /**
+   * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
+   */
+  protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
+      final byte[] snapshotName, final byte[] targetName, final int filesExpected,
+      final Path sourceDir, Path copyDir, final boolean overwrite,
+      final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
+    URI hdfsUri = FileSystem.get(conf).getUri();
+    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+    copyDir = copyDir.makeQualified(fs);
+
+    List<String> opts = new ArrayList<>();
+    opts.add("--snapshot");
+    opts.add(Bytes.toString(snapshotName));
+    opts.add("--copy-to");
+    opts.add(copyDir.toString());
+    if (targetName != snapshotName) {
+      opts.add("--target");
+      opts.add(Bytes.toString(targetName));
+    }
+    if (overwrite) opts.add("--overwrite");
+
+    // Export Snapshot
+    int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
+    assertEquals(success ? 0 : 1, res);
+    if (!success) {
+      final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+      assertFalse(fs.exists(new Path(copyDir, targetDir)));
+      return;
+    }
+
+    // Verify File-System state
+    FileStatus[] rootFiles = fs.listStatus(copyDir);
+    assertEquals(filesExpected > 0 ? 2 : 1, rootFiles.length);
+    for (FileStatus fileStatus: rootFiles) {
+      String name = fileStatus.getPath().getName();
+      assertTrue(fileStatus.isDirectory());
+      assertTrue(name.equals(HConstants.SNAPSHOT_DIR_NAME) ||
+                 name.equals(HConstants.HFILE_ARCHIVE_DIRECTORY));
+    }
+
+    // compare the snapshot metadata and verify the hfiles
+    final FileSystem hdfs = FileSystem.get(hdfsUri, conf);
+    final Path snapshotDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(snapshotName));
+    final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+    verifySnapshotDir(hdfs, new Path(sourceDir, snapshotDir),
+        fs, new Path(copyDir, targetDir));
+    Set<String> snapshotFiles = verifySnapshot(conf, fs, copyDir, tableName,
+      Bytes.toString(targetName), bypassregionPredicate);
+    assertEquals(filesExpected, snapshotFiles.size());
+  }
+
+  /**
+   * Check that ExportSnapshot will succeed if something fails but the retry succeed.
+   */
+  @Test
+  public void testExportRetry() throws Exception {
+    Path copyDir = getLocalDestinationDir();
+    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+    copyDir = copyDir.makeQualified(fs);
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
+    conf.setInt("mapreduce.map.maxattempts", 3);
+    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
+  }
+
+  /**
+   * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
+   */
+  @Test
+  public void testExportFailure() throws Exception {
+    Path copyDir = getLocalDestinationDir();
+    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+    copyDir = copyDir.makeQualified(fs);
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
+    conf.setInt("mapreduce.map.maxattempts", 3);
+    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
+  }
+
+  /*
+   * verify if the snapshot folder on file-system 1 match the one on file-system 2
+   */
+  protected static void verifySnapshotDir(final FileSystem fs1, final Path root1,
+      final FileSystem fs2, final Path root2) throws IOException {
+    assertEquals(listFiles(fs1, root1, root1), listFiles(fs2, root2, root2));
+  }
+
+  protected Set<String> verifySnapshot(final FileSystem fs, final Path rootDir,
+      final TableName tableName, final String snapshotName) throws IOException {
+    return verifySnapshot(TEST_UTIL.getConfiguration(), fs, rootDir, tableName,
+      snapshotName, getBypassRegionPredicate());
+  }
+
+  /*
+   * Verify if the files exists
+   */
+  protected static Set<String> verifySnapshot(final Configuration conf, final FileSystem fs,
+      final Path rootDir, final TableName tableName, final String snapshotName,
+      final RegionPredicate bypassregionPredicate) throws IOException {
+    final Path exportedSnapshot = new Path(rootDir,
+      new Path(HConstants.SNAPSHOT_DIR_NAME, snapshotName));
+    final Set<String> snapshotFiles = new HashSet<>();
+    final Path exportedArchive = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
+    SnapshotReferenceUtil.visitReferencedFiles(conf, fs, exportedSnapshot,
+          new SnapshotReferenceUtil.SnapshotVisitor() {
+        @Override
+        public void storeFile(final HRegionInfo regionInfo, final String family,
+            final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
+          if (bypassregionPredicate != null && bypassregionPredicate.evaluate(regionInfo))
+            return;
+
+          String hfile = storeFile.getName();
+          snapshotFiles.add(hfile);
+          if (storeFile.hasReference()) {
+            // Nothing to do here, we have already the reference embedded
+          } else {
+            verifyNonEmptyFile(new Path(exportedArchive,
+              new Path(FSUtils.getTableDir(new Path("./"), tableName),
+                  new Path(regionInfo.getEncodedName(), new Path(family, hfile)))));
+          }
+        }
+
+        private void verifyNonEmptyFile(final Path path) throws IOException {
+          assertTrue(path + " should exists", fs.exists(path));
+          assertTrue(path + " should not be empty", fs.getFileStatus(path).getLen() > 0);
+        }
+    });
+
+    // Verify Snapshot description
+    SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, exportedSnapshot);
+    assertTrue(desc.getName().equals(snapshotName));
+    assertTrue(desc.getTable().equals(tableName.getNameAsString()));
+    return snapshotFiles;
+  }
+
+  private static Set<String> listFiles(final FileSystem fs, final Path root, final Path dir)
+      throws IOException {
+    Set<String> files = new HashSet<>();
+    int rootPrefix = root.makeQualified(fs).toString().length();
+    FileStatus[] list = FSUtils.listStatus(fs, dir);
+    if (list != null) {
+      for (FileStatus fstat: list) {
+        LOG.debug(fstat.getPath());
+        if (fstat.isDirectory()) {
+          files.addAll(listFiles(fs, root, fstat.getPath()));
+        } else {
+          files.add(fstat.getPath().makeQualified(fs).toString().substring(rootPrefix));
+        }
+      }
+    }
+    return files;
+  }
+
+  private Path getHdfsDestinationDir() {
+    Path rootDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+    Path path = new Path(new Path(rootDir, "export-test"), "export-" + System.currentTimeMillis());
+    LOG.info("HDFS export destination path: " + path);
+    return path;
+  }
+
+  private Path getLocalDestinationDir() {
+    Path path = TEST_UTIL.getDataTestDir("local-export-" + System.currentTimeMillis());
+    LOG.info("Local export destination path: " + path);
+    return path;
+  }
+
+  private static void removeExportDir(final Path path) throws IOException {
+    FileSystem fs = FileSystem.get(path.toUri(), new Configuration());
+    fs.delete(path, true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
new file mode 100644
index 0000000..e31e81e
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Export Snapshot Tool helpers
+ */
+@Category({RegionServerTests.class, SmallTests.class})
+public class TestExportSnapshotHelpers {
+  /**
+   * Verfy the result of getBalanceSplits() method.
+   * The result are groups of files, used as input list for the "export" mappers.
+   * All the groups should have similar amount of data.
+   *
+   * The input list is a pair of file path and length.
+   * The getBalanceSplits() function sort it by length,
+   * and assign to each group a file, going back and forth through the groups.
+   */
+  @Test
+  public void testBalanceSplit() throws Exception {
+    // Create a list of files
+    List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>(21);
+    for (long i = 0; i <= 20; i++) {
+      SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
+        .setType(SnapshotFileInfo.Type.HFILE)
+        .setHfile("file-" + i)
+        .build();
+      files.add(new Pair<>(fileInfo, i));
+    }
+
+    // Create 5 groups (total size 210)
+    //    group 0: 20, 11, 10,  1 (total size: 42)
+    //    group 1: 19, 12,  9,  2 (total size: 42)
+    //    group 2: 18, 13,  8,  3 (total size: 42)
+    //    group 3: 17, 12,  7,  4 (total size: 42)
+    //    group 4: 16, 11,  6,  5 (total size: 42)
+    List<List<Pair<SnapshotFileInfo, Long>>> splits = ExportSnapshot.getBalancedSplits(files, 5);
+    assertEquals(5, splits.size());
+
+    String[] split0 = new String[] {"file-20", "file-11", "file-10", "file-1", "file-0"};
+    verifyBalanceSplit(splits.get(0), split0, 42);
+    String[] split1 = new String[] {"file-19", "file-12", "file-9",  "file-2"};
+    verifyBalanceSplit(splits.get(1), split1, 42);
+    String[] split2 = new String[] {"file-18", "file-13", "file-8",  "file-3"};
+    verifyBalanceSplit(splits.get(2), split2, 42);
+    String[] split3 = new String[] {"file-17", "file-14", "file-7",  "file-4"};
+    verifyBalanceSplit(splits.get(3), split3, 42);
+    String[] split4 = new String[] {"file-16", "file-15", "file-6",  "file-5"};
+    verifyBalanceSplit(splits.get(4), split4, 42);
+  }
+
+  private void verifyBalanceSplit(final List<Pair<SnapshotFileInfo, Long>> split,
+      final String[] expected, final long expectedSize) {
+    assertEquals(expected.length, split.size());
+    long totalSize = 0;
+    for (int i = 0; i < expected.length; ++i) {
+      Pair<SnapshotFileInfo, Long> fileInfo = split.get(i);
+      assertEquals(expected[i], fileInfo.getFirst().getHfile());
+      totalSize += fileInfo.getSecond();
+    }
+    assertEquals(expectedSize, totalSize);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
new file mode 100644
index 0000000..00778502
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestExportSnapshotNoCluster {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private static final Log LOG = LogFactory.getLog(TestExportSnapshotNoCluster.class);
+
+  protected final static HBaseCommonTestingUtility TEST_UTIL = new HBaseCommonTestingUtility();
+
+  private static FileSystem fs;
+  private static Path testDir;
+
+  public static void setUpBaseConf(Configuration conf) {
+    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+    conf.setInt("hbase.regionserver.msginterval", 100);
+    conf.setInt("hbase.client.pause", 250);
+    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
+    conf.setBoolean("hbase.master.enabletable.roundrobin", true);
+    conf.setInt("mapreduce.map.maxattempts", 10);
+    conf.set(HConstants.HBASE_DIR, testDir.toString());
+  }
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    testDir = TEST_UTIL.getDataTestDir();
+    fs = testDir.getFileSystem(TEST_UTIL.getConfiguration());
+
+    setUpBaseConf(TEST_UTIL.getConfiguration());
+  }
+
+  /**
+   * Mock a snapshot with files in the archive dir,
+   * two regions, and one reference file.
+   */
+  @Test
+  public void testSnapshotWithRefsExportFileSystemState() throws Exception {
+    SnapshotMock snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
+    SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2("tableWithRefsV1",
+      "tableWithRefsV1");
+    testSnapshotWithRefsExportFileSystemState(builder);
+
+    snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
+    builder = snapshotMock.createSnapshotV2("tableWithRefsV2", "tableWithRefsV2");
+    testSnapshotWithRefsExportFileSystemState(builder);
+  }
+
+  /**
+   * Generates a couple of regions for the specified SnapshotMock,
+   * and then it will run the export and verification.
+   */
+  private void testSnapshotWithRefsExportFileSystemState(SnapshotMock.SnapshotBuilder builder)
+      throws Exception {
+    Path[] r1Files = builder.addRegion();
+    Path[] r2Files = builder.addRegion();
+    builder.commit();
+    int snapshotFilesCount = r1Files.length + r2Files.length;
+
+    byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName());
+    TableName tableName = builder.getTableDescriptor().getTableName();
+    TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
+      tableName, snapshotName, snapshotName, snapshotFilesCount,
+      testDir, getDestinationDir(), false, null, true);
+  }
+
+  private Path getDestinationDir() {
+    Path path = new Path(new Path(testDir, "export-test"), "export-" + System.currentTimeMillis());
+    LOG.info("HDFS export destination path: " + path);
+    return path;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
new file mode 100644
index 0000000..7407a7d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.mob.MobConstants;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestMobExportSnapshot extends TestExportSnapshot {
+
+  public static void setUpBaseConf(Configuration conf) {
+    TestExportSnapshot.setUpBaseConf(conf);
+    conf.setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0);
+  }
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    setUpBaseConf(TEST_UTIL.getConfiguration());
+    TEST_UTIL.startMiniCluster(1, 3);
+    TEST_UTIL.startMiniMapReduceCluster();
+  }
+
+  @Override
+  protected void createTable() throws Exception {
+    MobSnapshotTestingUtils.createPreSplitMobTable(TEST_UTIL, tableName, 2, FAMILY);
+  }
+
+  @Override
+  protected RegionPredicate getBypassRegionPredicate() {
+    return new RegionPredicate() {
+      @Override
+      public boolean evaluate(final HRegionInfo regionInfo) {
+        return MobUtils.isMobRegionInfo(regionInfo);
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
new file mode 100644
index 0000000..98d03c0
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
@@ -0,0 +1,59 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Reruns TestMobExportSnapshot using MobExportSnapshot in secure mode.
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    setUpBaseConf(TEST_UTIL.getConfiguration());
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
+
+    // set the always on security provider
+    UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
+      HadoopSecurityEnabledUserProviderForTesting.class);
+
+    // setup configuration
+    SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
+
+    TEST_UTIL.startMiniCluster(1, 3);
+    TEST_UTIL.startMiniMapReduceCluster();
+
+    // Wait for the ACL table to become available
+    TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
new file mode 100644
index 0000000..7d4832c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
@@ -0,0 +1,64 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Reruns TestExportSnapshot using ExportSnapshot in secure mode.
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestSecureExportSnapshot extends TestExportSnapshot {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    setUpBaseConf(TEST_UTIL.getConfiguration());
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
+
+    // set the always on security provider
+    UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
+      HadoopSecurityEnabledUserProviderForTesting.class);
+
+    // setup configuration
+    SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
+
+    TEST_UTIL.startMiniCluster(1, 3);
+    TEST_UTIL.startMiniMapReduceCluster();
+
+    // Wait for the ACL table to become available
+    TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+  }
+}

[20/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
deleted file mode 100644
index 43560fd..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A job with a map to count rows.
- * Map outputs table rows IF the input row has columns that have content.
- * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
- */
-@InterfaceAudience.Public
-public class RowCounter extends Configured implements Tool {
-  // Name of this 'program'
-  static final String NAME = "rowcounter";
-
-  /**
-   * Mapper that runs the count.
-   */
-  static class RowCounterMapper
-  implements TableMap<ImmutableBytesWritable, Result> {
-    private static enum Counters {ROWS}
-
-    public void map(ImmutableBytesWritable row, Result values,
-        OutputCollector<ImmutableBytesWritable, Result> output,
-        Reporter reporter)
-    throws IOException {
-        // Count every row containing data, whether it's in qualifiers or values
-        reporter.incrCounter(Counters.ROWS, 1);
-    }
-
-    public void configure(JobConf jc) {
-      // Nothing to do.
-    }
-
-    public void close() throws IOException {
-      // Nothing to do.
-    }
-  }
-
-  /**
-   * @param args
-   * @return the JobConf
-   * @throws IOException
-   */
-  public JobConf createSubmittableJob(String[] args) throws IOException {
-    JobConf c = new JobConf(getConf(), getClass());
-    c.setJobName(NAME);
-    // Columns are space delimited
-    StringBuilder sb = new StringBuilder();
-    final int columnoffset = 2;
-    for (int i = columnoffset; i < args.length; i++) {
-      if (i > columnoffset) {
-        sb.append(" ");
-      }
-      sb.append(args[i]);
-    }
-    // Second argument is the table name.
-    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
-      RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
-    c.setNumReduceTasks(0);
-    // First arg is the output directory.
-    FileOutputFormat.setOutputPath(c, new Path(args[0]));
-    return c;
-  }
-
-  static int printUsage() {
-    System.out.println(NAME +
-      " <outputdir> <tablename> <column1> [<column2>...]");
-    return -1;
-  }
-
-  public int run(final String[] args) throws Exception {
-    // Make sure there are at least 3 parameters
-    if (args.length < 3) {
-      System.err.println("ERROR: Wrong number of parameters: " + args.length);
-      return printUsage();
-    }
-    JobClient.runJob(createSubmittableJob(args));
-    return 0;
-  }
-
-  /**
-   * @param args
-   * @throws Exception
-   */
-  public static void main(String[] args) throws Exception {
-    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
-    System.exit(errCode);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
deleted file mode 100644
index 208849a..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Convert HBase tabular data into a format that is consumable by Map/Reduce.
- */
-@InterfaceAudience.Public
-public class TableInputFormat extends TableInputFormatBase implements
-    JobConfigurable {
-  private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
-
-  /**
-   * space delimited list of columns
-   */
-  public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
-
-  public void configure(JobConf job) {
-    try {
-      initialize(job);
-    } catch (Exception e) {
-      LOG.error(StringUtils.stringifyException(e));
-    }
-  }
-
-  @Override
-  protected void initialize(JobConf job) throws IOException {
-    Path[] tableNames = FileInputFormat.getInputPaths(job);
-    String colArg = job.get(COLUMN_LIST);
-    String[] colNames = colArg.split(" ");
-    byte [][] m_cols = new byte[colNames.length][];
-    for (int i = 0; i < m_cols.length; i++) {
-      m_cols[i] = Bytes.toBytes(colNames[i]);
-    }
-    setInputColumns(m_cols);
-    Connection connection = ConnectionFactory.createConnection(job);
-    initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
-  }
-
-  public void validateInput(JobConf job) throws IOException {
-    // expecting exactly one path
-    Path [] tableNames = FileInputFormat.getInputPaths(job);
-    if (tableNames == null || tableNames.length > 1) {
-      throw new IOException("expecting one table name");
-    }
-
-    // connected to table?
-    if (getTable() == null) {
-      throw new IOException("could not connect to table '" +
-        tableNames[0].getName() + "'");
-    }
-
-    // expecting at least one column
-    String colArg = job.get(COLUMN_LIST);
-    if (colArg == null || colArg.length() == 0) {
-      throw new IOException("expecting at least one column");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
deleted file mode 100644
index c65810f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.Closeable;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a
- * byte[] of input columns and optionally a {@link Filter}.
- * Subclasses may use other TableRecordReader implementations.
- *
- * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
- * function properly. Each of the entry points to this class used by the MapReduce framework,
- * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
- * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
- * retrieving the necessary configuration information. If your subclass overrides either of these
- * methods, either call the parent version or call initialize yourself.
- *
- * <p>
- * An example of a subclass:
- * <pre>
- *   class ExampleTIF extends TableInputFormatBase {
- *
- *     {@literal @}Override
- *     protected void initialize(JobConf context) throws IOException {
- *       // We are responsible for the lifecycle of this connection until we hand it over in
- *       // initializeTable.
- *       Connection connection =
- *          ConnectionFactory.createConnection(HBaseConfiguration.create(job));
- *       TableName tableName = TableName.valueOf("exampleTable");
- *       // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
- *       initializeTable(connection, tableName);
- *       byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- *         Bytes.toBytes("columnB") };
- *       // mandatory
- *       setInputColumns(inputColumns);
- *       // optional, by default we'll get everything for the given columns.
- *       Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- *       setRowFilter(exampleFilter);
- *     }
- *   }
- * </pre>
- */
-
-@InterfaceAudience.Public
-public abstract class TableInputFormatBase
-implements InputFormat<ImmutableBytesWritable, Result> {
-  private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
-  private byte [][] inputColumns;
-  private Table table;
-  private RegionLocator regionLocator;
-  private Connection connection;
-  private TableRecordReader tableRecordReader;
-  private Filter rowFilter;
-
-  private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
-      "initialized. Ensure you call initializeTable either in your constructor or initialize " +
-      "method";
-  private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
-            " previous error. Please look at the previous logs lines from" +
-            " the task's full log for more details.";
-
-  /**
-   * Builds a TableRecordReader. If no TableRecordReader was provided, uses
-   * the default.
-   *
-   * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
-   *      JobConf, Reporter)
-   */
-  public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
-      InputSplit split, JobConf job, Reporter reporter)
-  throws IOException {
-    // In case a subclass uses the deprecated approach or calls initializeTable directly
-    if (table == null) {
-      initialize(job);
-    }
-    // null check in case our child overrides getTable to not throw.
-    try {
-      if (getTable() == null) {
-        // initialize() must not have been implemented in the subclass.
-        throw new IOException(INITIALIZATION_ERROR);
-      }
-    } catch (IllegalStateException exception) {
-      throw new IOException(INITIALIZATION_ERROR, exception);
-    }
-
-    TableSplit tSplit = (TableSplit) split;
-    // if no table record reader was provided use default
-    final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :
-        this.tableRecordReader;
-    trr.setStartRow(tSplit.getStartRow());
-    trr.setEndRow(tSplit.getEndRow());
-    trr.setHTable(this.table);
-    trr.setInputColumns(this.inputColumns);
-    trr.setRowFilter(this.rowFilter);
-    trr.init();
-    return new RecordReader<ImmutableBytesWritable, Result>() {
-
-      @Override
-      public void close() throws IOException {
-        trr.close();
-        closeTable();
-      }
-
-      @Override
-      public ImmutableBytesWritable createKey() {
-        return trr.createKey();
-      }
-
-      @Override
-      public Result createValue() {
-        return trr.createValue();
-      }
-
-      @Override
-      public long getPos() throws IOException {
-        return trr.getPos();
-      }
-
-      @Override
-      public float getProgress() throws IOException {
-        return trr.getProgress();
-      }
-
-      @Override
-      public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
-        return trr.next(key, value);
-      }
-    };
-  }
-
-  /**
-   * Calculates the splits that will serve as input for the map tasks.
-   *
-   * Splits are created in number equal to the smallest between numSplits and
-   * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table. 
-   * If the number of splits is smaller than the number of 
-   * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across
-   * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s 
-   * and are grouped the most evenly possible. In the
-   * case splits are uneven the bigger splits are placed first in the
-   * {@link InputSplit} array.
-   *
-   * @param job the map task {@link JobConf}
-   * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
-   *
-   * @return the input splits
-   *
-   * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
-   */
-  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
-    if (this.table == null) {
-      initialize(job);
-    }
-    // null check in case our child overrides getTable to not throw.
-    try {
-      if (getTable() == null) {
-        // initialize() must not have been implemented in the subclass.
-        throw new IOException(INITIALIZATION_ERROR);
-      }
-    } catch (IllegalStateException exception) {
-      throw new IOException(INITIALIZATION_ERROR, exception);
-    }
-
-    byte [][] startKeys = this.regionLocator.getStartKeys();
-    if (startKeys == null || startKeys.length == 0) {
-      throw new IOException("Expecting at least one region");
-    }
-    if (this.inputColumns == null || this.inputColumns.length == 0) {
-      throw new IOException("Expecting at least one column");
-    }
-    int realNumSplits = numSplits > startKeys.length? startKeys.length:
-      numSplits;
-    InputSplit[] splits = new InputSplit[realNumSplits];
-    int middle = startKeys.length / realNumSplits;
-    int startPos = 0;
-    for (int i = 0; i < realNumSplits; i++) {
-      int lastPos = startPos + middle;
-      lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
-      String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).
-        getHostname();
-      splits[i] = new TableSplit(this.table.getName(),
-        startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
-          HConstants.EMPTY_START_ROW, regionLocation);
-      LOG.info("split: " + i + "->" + splits[i]);
-      startPos = lastPos;
-    }
-    return splits;
-  }
-
-  /**
-   * Allows subclasses to initialize the table information.
-   *
-   * @param connection  The Connection to the HBase cluster. MUST be unmanaged. We will close.
-   * @param tableName  The {@link TableName} of the table to process.
-   * @throws IOException
-   */
-  protected void initializeTable(Connection connection, TableName tableName) throws IOException {
-    if (this.table != null || this.connection != null) {
-      LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
-          "reference; TableInputFormatBase will not close these old references when done.");
-    }
-    this.table = connection.getTable(tableName);
-    this.regionLocator = connection.getRegionLocator(tableName);
-    this.connection = connection;
-  }
-
-  /**
-   * @param inputColumns to be passed in {@link Result} to the map task.
-   */
-  protected void setInputColumns(byte [][] inputColumns) {
-    this.inputColumns = inputColumns;
-  }
-
-  /**
-   * Allows subclasses to get the {@link Table}.
-   */
-  protected Table getTable() {
-    if (table == null) {
-      throw new IllegalStateException(NOT_INITIALIZED);
-    }
-    return this.table;
-  }
-
-  /**
-   * Allows subclasses to set the {@link TableRecordReader}.
-   *
-   * @param tableRecordReader
-   *                to provide other {@link TableRecordReader} implementations.
-   */
-  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
-    this.tableRecordReader = tableRecordReader;
-  }
-
-  /**
-   * Allows subclasses to set the {@link Filter} to be used.
-   *
-   * @param rowFilter
-   */
-  protected void setRowFilter(Filter rowFilter) {
-    this.rowFilter = rowFilter;
-  }
-
-  /**
-   * Handle subclass specific set up.
-   * Each of the entry points used by the MapReduce framework,
-   * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
-   * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
-   * retrieving the necessary configuration information and calling
-   * {@link #initializeTable(Connection, TableName)}.
-   *
-   * Subclasses should implement their initialize call such that it is safe to call multiple times.
-   * The current TableInputFormatBase implementation relies on a non-null table reference to decide
-   * if an initialize call is needed, but this behavior may change in the future. In particular,
-   * it is critical that initializeTable not be called multiple times since this will leak
-   * Connection instances.
-   *
-   */
-  protected void initialize(JobConf job) throws IOException {
-  }
-
-  /**
-   * Close the Table and related objects that were initialized via
-   * {@link #initializeTable(Connection, TableName)}.
-   *
-   * @throws IOException
-   */
-  protected void closeTable() throws IOException {
-    close(table, connection);
-    table = null;
-    connection = null;
-  }
-
-  private void close(Closeable... closables) throws IOException {
-    for (Closeable c : closables) {
-      if(c != null) { c.close(); }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
deleted file mode 100644
index a9f1e61..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.Mapper;
-
-/**
- * Scan an HBase table to sort by a specified sort column.
- * If the column does not exist, the record is not passed to Reduce.
- *
- * @param <K> WritableComparable key class
- * @param <V> Writable value class
- */
-@InterfaceAudience.Public
-public interface TableMap<K extends WritableComparable<? super K>, V>
-extends Mapper<ImmutableBytesWritable, Result, K, V> {
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
deleted file mode 100644
index 63ec418..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
+++ /dev/null
@@ -1,376 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
-import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.token.TokenUtil;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-/**
- * Utility for {@link TableMap} and {@link TableReduce}
- */
-@InterfaceAudience.Public
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class TableMapReduceUtil {
-
-  /**
-   * Use this before submitting a TableMap job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The table name to read from.
-   * @param columns  The columns to scan.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job configuration to adjust.
-   */
-  public static void initTableMapJob(String table, String columns,
-    Class<? extends TableMap> mapper,
-    Class<?> outputKeyClass,
-    Class<?> outputValueClass, JobConf job) {
-    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
-      true, TableInputFormat.class);
-  }
-
-  public static void initTableMapJob(String table, String columns,
-    Class<? extends TableMap> mapper,
-    Class<?> outputKeyClass,
-    Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
-    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
-      addDependencyJars, TableInputFormat.class);
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The table name to read from.
-   * @param columns  The columns to scan.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job configuration to adjust.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   */
-  public static void initTableMapJob(String table, String columns,
-    Class<? extends TableMap> mapper,
-    Class<?> outputKeyClass,
-    Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
-    Class<? extends InputFormat> inputFormat) {
-
-    job.setInputFormat(inputFormat);
-    job.setMapOutputValueClass(outputValueClass);
-    job.setMapOutputKeyClass(outputKeyClass);
-    job.setMapperClass(mapper);
-    job.setStrings("io.serializations", job.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName());
-    FileInputFormat.addInputPaths(job, table);
-    job.set(TableInputFormat.COLUMN_LIST, columns);
-    if (addDependencyJars) {
-      try {
-        addDependencyJars(job);
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-    try {
-      initCredentials(job);
-    } catch (IOException ioe) {
-      // just spit out the stack trace?  really?
-      ioe.printStackTrace();
-    }
-  }
-
-  /**
-   * Sets up the job for reading from one or more multiple table snapshots, with one or more scans
-   * per snapshot.
-   * It bypasses hbase servers and read directly from snapshot files.
-   *
-   * @param snapshotScans     map of snapshot name to scans on that snapshot.
-   * @param mapper            The mapper class to use.
-   * @param outputKeyClass    The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job               The current job to adjust.  Make sure the passed job is
-   *                          carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *                          job classes via the distributed cache (tmpjars).
-   */
-  public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
-      Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
-      JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
-    MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
-
-    job.setInputFormat(MultiTableSnapshotInputFormat.class);
-    if (outputValueClass != null) {
-      job.setMapOutputValueClass(outputValueClass);
-    }
-    if (outputKeyClass != null) {
-      job.setMapOutputKeyClass(outputKeyClass);
-    }
-    job.setMapperClass(mapper);
-    if (addDependencyJars) {
-      addDependencyJars(job);
-    }
-
-    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
-  }
-
-  /**
-   * Sets up the job for reading from a table snapshot. It bypasses hbase servers
-   * and read directly from snapshot files.
-   *
-   * @param snapshotName The name of the snapshot (of a table) to read from.
-   * @param columns  The columns to scan.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restore directory can be deleted.
-   * @throws IOException When setting up the details fails.
-   * @see TableSnapshotInputFormat
-   */
-  public static void initTableSnapshotMapJob(String snapshotName, String columns,
-      Class<? extends TableMap> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, JobConf job,
-      boolean addDependencyJars, Path tmpRestoreDir)
-  throws IOException {
-    TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
-    initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
-      addDependencyJars, TableSnapshotInputFormat.class);
-    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job configuration to adjust.
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReduceJob(String table,
-    Class<? extends TableReduce> reducer, JobConf job)
-  throws IOException {
-    initTableReduceJob(table, reducer, job, null);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job configuration to adjust.
-   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
-   * default partitioner.
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReduceJob(String table,
-    Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
-  throws IOException {
-    initTableReduceJob(table, reducer, job, partitioner, true);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job configuration to adjust.
-   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
-   * default partitioner.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReduceJob(String table,
-    Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
-    boolean addDependencyJars) throws IOException {
-    job.setOutputFormat(TableOutputFormat.class);
-    job.setReducerClass(reducer);
-    job.set(TableOutputFormat.OUTPUT_TABLE, table);
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(Put.class);
-    job.setStrings("io.serializations", job.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName());
-    if (partitioner == HRegionPartitioner.class) {
-      job.setPartitionerClass(HRegionPartitioner.class);
-      int regions =
-        MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
-      if (job.getNumReduceTasks() > regions) {
-        job.setNumReduceTasks(regions);
-      }
-    } else if (partitioner != null) {
-      job.setPartitionerClass(partitioner);
-    }
-    if (addDependencyJars) {
-      addDependencyJars(job);
-    }
-    initCredentials(job);
-  }
-
-  public static void initCredentials(JobConf job) throws IOException {
-    UserProvider userProvider = UserProvider.instantiate(job);
-    if (userProvider.isHadoopSecurityEnabled()) {
-      // propagate delegation related props from launcher job to MR job
-      if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
-        job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
-      }
-    }
-
-    if (userProvider.isHBaseSecurityEnabled()) {
-      Connection conn = ConnectionFactory.createConnection(job);
-      try {
-        // login the server principal (if using secure Hadoop)
-        User user = userProvider.getCurrent();
-        TokenUtil.addTokenForJob(conn, job, user);
-      } catch (InterruptedException ie) {
-        ie.printStackTrace();
-        Thread.currentThread().interrupt();
-      } finally {
-        conn.close();
-      }
-    }
-  }
-
-  /**
-   * Ensures that the given number of reduce tasks for the given job
-   * configuration does not exceed the number of regions for the given table.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job configuration to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  // Used by tests.
-  public static void limitNumReduceTasks(String table, JobConf job)
-  throws IOException {
-    int regions =
-      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
-    if (job.getNumReduceTasks() > regions)
-      job.setNumReduceTasks(regions);
-  }
-
-  /**
-   * Ensures that the given number of map tasks for the given job
-   * configuration does not exceed the number of regions for the given table.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job configuration to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  // Used by tests.
-  public static void limitNumMapTasks(String table, JobConf job)
-  throws IOException {
-    int regions =
-      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
-    if (job.getNumMapTasks() > regions)
-      job.setNumMapTasks(regions);
-  }
-
-  /**
-   * Sets the number of reduce tasks for the given job configuration to the
-   * number of regions the given table has.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job configuration to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  public static void setNumReduceTasks(String table, JobConf job)
-  throws IOException {
-    job.setNumReduceTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
-      TableName.valueOf(table)));
-  }
-
-  /**
-   * Sets the number of map tasks for the given job configuration to the
-   * number of regions the given table has.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job configuration to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  public static void setNumMapTasks(String table, JobConf job)
-  throws IOException {
-    job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
-      TableName.valueOf(table)));
-  }
-
-  /**
-   * Sets the number of rows to return and cache with each scanner iteration.
-   * Higher caching values will enable faster mapreduce jobs at the expense of
-   * requiring more heap to contain the cached rows.
-   *
-   * @param job The current job configuration to adjust.
-   * @param batchSize The number of rows to return in batch with each scanner
-   * iteration.
-   */
-  public static void setScannerCaching(JobConf job, int batchSize) {
-    job.setInt("hbase.client.scanner.caching", batchSize);
-  }
-
-  /**
-   * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
-   */
-  public static void addDependencyJars(JobConf job) throws IOException {
-    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
-    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(
-      job,
-      // when making changes here, consider also mapreduce.TableMapReduceUtil
-      // pull job classes
-      job.getMapOutputKeyClass(),
-      job.getMapOutputValueClass(),
-      job.getOutputKeyClass(),
-      job.getOutputValueClass(),
-      job.getPartitionerClass(),
-      job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
-      job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
-      job.getCombinerClass());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
deleted file mode 100644
index 8878eee..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileAlreadyExistsException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.InvalidJobConfException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Progressable;
-
-/**
- * Convert Map/Reduce output and write it to an HBase table
- */
-@InterfaceAudience.Public
-public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable, Put> {
-
-  /** JobConf parameter that specifies the output table */
-  public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
-
-  /**
-   * Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
-   * and write to an HBase table.
-   */
-  protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
-    private BufferedMutator m_mutator;
-    private Connection conn;
-
-
-    /**
-     * Instantiate a TableRecordWriter with the HBase HClient for writing.
-     *
-     * @deprecated Please use {@code #TableRecordWriter(JobConf)}  This version does not clean up
-     * connections and will leak connections (removed in 2.0)
-     */
-    @Deprecated
-    public TableRecordWriter(final BufferedMutator mutator) throws IOException {
-      this.m_mutator = mutator;
-      this.conn = null;
-    }
-
-    /**
-     * Instantiate a TableRecordWriter with a BufferedMutator for batch writing.
-     */
-    public TableRecordWriter(JobConf job) throws IOException {
-      // expecting exactly one path
-      TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
-      try {
-        this.conn = ConnectionFactory.createConnection(job);
-        this.m_mutator = conn.getBufferedMutator(tableName);
-      } finally {
-        if (this.m_mutator == null) {
-          conn.close();
-          conn = null;
-        }
-      }
-    }
-
-    public void close(Reporter reporter) throws IOException {
-      try {
-        if (this.m_mutator != null) {
-          this.m_mutator.close();
-        }
-      } finally {
-        if (conn != null) {
-          this.conn.close();
-        }
-      }
-    }
-
-    public void write(ImmutableBytesWritable key, Put value) throws IOException {
-      m_mutator.mutate(new Put(value));
-    }
-  }
-
-  /**
-   * Creates a new record writer.
-   * 
-   * Be aware that the baseline javadoc gives the impression that there is a single
-   * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
-   * RecordWriter per call of this method. You must close the returned RecordWriter when done.
-   * Failure to do so will drop writes.
-   *
-   * @param ignored Ignored filesystem
-   * @param job Current JobConf
-   * @param name Name of the job
-   * @param progress
-   * @return The newly created writer instance.
-   * @throws IOException When creating the writer fails.
-   */
-  @Override
-  public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
-      Progressable progress)
-  throws IOException {
-    // Clear write buffer on fail is true by default so no need to reset it.
-    return new TableRecordWriter(job);
-  }
-
-  @Override
-  public void checkOutputSpecs(FileSystem ignored, JobConf job)
-  throws FileAlreadyExistsException, InvalidJobConfException, IOException {
-    String tableName = job.get(OUTPUT_TABLE);
-    if (tableName == null) {
-      throw new IOException("Must specify table name");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
deleted file mode 100644
index cecef7d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.RecordReader;
-
-
-/**
- * Iterate over an HBase table data, return (Text, RowResult) pairs
- */
-@InterfaceAudience.Public
-public class TableRecordReader
-implements RecordReader<ImmutableBytesWritable, Result> {
-
-  private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
-
-  /**
-   * Restart from survivable exceptions by creating a new scanner.
-   *
-   * @param firstRow
-   * @throws IOException
-   */
-  public void restart(byte[] firstRow) throws IOException {
-    this.recordReaderImpl.restart(firstRow);
-  }
-
-  /**
-   * Build the scanner. Not done in constructor to allow for extension.
-   *
-   * @throws IOException
-   */
-  public void init() throws IOException {
-    this.recordReaderImpl.restart(this.recordReaderImpl.getStartRow());
-  }
-
-  /**
-   * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
-   */
-  public void setHTable(Table htable) {
-    this.recordReaderImpl.setHTable(htable);
-  }
-
-  /**
-   * @param inputColumns the columns to be placed in {@link Result}.
-   */
-  public void setInputColumns(final byte [][] inputColumns) {
-    this.recordReaderImpl.setInputColumns(inputColumns);
-  }
-
-  /**
-   * @param startRow the first row in the split
-   */
-  public void setStartRow(final byte [] startRow) {
-    this.recordReaderImpl.setStartRow(startRow);
-  }
-
-  /**
-   *
-   * @param endRow the last row in the split
-   */
-  public void setEndRow(final byte [] endRow) {
-    this.recordReaderImpl.setEndRow(endRow);
-  }
-
-  /**
-   * @param rowFilter the {@link Filter} to be used.
-   */
-  public void setRowFilter(Filter rowFilter) {
-    this.recordReaderImpl.setRowFilter(rowFilter);
-  }
-
-  public void close() {
-    this.recordReaderImpl.close();
-  }
-
-  /**
-   * @return ImmutableBytesWritable
-   *
-   * @see org.apache.hadoop.mapred.RecordReader#createKey()
-   */
-  public ImmutableBytesWritable createKey() {
-    return this.recordReaderImpl.createKey();
-  }
-
-  /**
-   * @return RowResult
-   *
-   * @see org.apache.hadoop.mapred.RecordReader#createValue()
-   */
-  public Result createValue() {
-    return this.recordReaderImpl.createValue();
-  }
-
-  public long getPos() {
-
-    // This should be the ordinal tuple in the range;
-    // not clear how to calculate...
-    return this.recordReaderImpl.getPos();
-  }
-
-  public float getProgress() {
-    // Depends on the total number of tuples and getPos
-    return this.recordReaderImpl.getPos();
-  }
-
-  /**
-   * @param key HStoreKey as input key.
-   * @param value MapWritable as input value
-   * @return true if there was more data
-   * @throws IOException
-   */
-  public boolean next(ImmutableBytesWritable key, Result value)
-  throws IOException {
-    return this.recordReaderImpl.next(key, value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
deleted file mode 100644
index f6b79c3..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.ScannerCallable;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.StringUtils;
-
-import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT;
-
-/**
- * Iterate over an HBase table data, return (Text, RowResult) pairs
- */
-@InterfaceAudience.Public
-public class TableRecordReaderImpl {
-  private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
-
-  private byte [] startRow;
-  private byte [] endRow;
-  private byte [] lastSuccessfulRow;
-  private Filter trrRowFilter;
-  private ResultScanner scanner;
-  private Table htable;
-  private byte [][] trrInputColumns;
-  private long timestamp;
-  private int rowcount;
-  private boolean logScannerActivity = false;
-  private int logPerRowCount = 100;
-
-  /**
-   * Restart from survivable exceptions by creating a new scanner.
-   *
-   * @param firstRow
-   * @throws IOException
-   */
-  public void restart(byte[] firstRow) throws IOException {
-    Scan currentScan;
-    if ((endRow != null) && (endRow.length > 0)) {
-      if (trrRowFilter != null) {
-        Scan scan = new Scan(firstRow, endRow);
-        TableInputFormat.addColumns(scan, trrInputColumns);
-        scan.setFilter(trrRowFilter);
-        scan.setCacheBlocks(false);
-        this.scanner = this.htable.getScanner(scan);
-        currentScan = scan;
-      } else {
-        LOG.debug("TIFB.restart, firstRow: " +
-            Bytes.toStringBinary(firstRow) + ", endRow: " +
-            Bytes.toStringBinary(endRow));
-        Scan scan = new Scan(firstRow, endRow);
-        TableInputFormat.addColumns(scan, trrInputColumns);
-        this.scanner = this.htable.getScanner(scan);
-        currentScan = scan;
-      }
-    } else {
-      LOG.debug("TIFB.restart, firstRow: " +
-          Bytes.toStringBinary(firstRow) + ", no endRow");
-
-      Scan scan = new Scan(firstRow);
-      TableInputFormat.addColumns(scan, trrInputColumns);
-      scan.setFilter(trrRowFilter);
-      this.scanner = this.htable.getScanner(scan);
-      currentScan = scan;
-    }
-    if (logScannerActivity) {
-      LOG.info("Current scan=" + currentScan.toString());
-      timestamp = System.currentTimeMillis();
-      rowcount = 0;
-    }
-  }
-
-  /**
-   * Build the scanner. Not done in constructor to allow for extension.
-   *
-   * @throws IOException
-   */
-  public void init() throws IOException {
-    restart(startRow);
-  }
-
-  byte[] getStartRow() {
-    return this.startRow;
-  }
-  /**
-   * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
-   */
-  public void setHTable(Table htable) {
-    Configuration conf = htable.getConfiguration();
-    logScannerActivity = conf.getBoolean(
-      ScannerCallable.LOG_SCANNER_ACTIVITY, false);
-    logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
-    this.htable = htable;
-  }
-
-  /**
-   * @param inputColumns the columns to be placed in {@link Result}.
-   */
-  public void setInputColumns(final byte [][] inputColumns) {
-    this.trrInputColumns = inputColumns;
-  }
-
-  /**
-   * @param startRow the first row in the split
-   */
-  public void setStartRow(final byte [] startRow) {
-    this.startRow = startRow;
-  }
-
-  /**
-   *
-   * @param endRow the last row in the split
-   */
-  public void setEndRow(final byte [] endRow) {
-    this.endRow = endRow;
-  }
-
-  /**
-   * @param rowFilter the {@link Filter} to be used.
-   */
-  public void setRowFilter(Filter rowFilter) {
-    this.trrRowFilter = rowFilter;
-  }
-
-  public void close() {
-    if (this.scanner != null) {
-      this.scanner.close();
-    }
-    try {
-      this.htable.close();
-    } catch (IOException ioe) {
-      LOG.warn("Error closing table", ioe);
-    }
-  }
-
-  /**
-   * @return ImmutableBytesWritable
-   *
-   * @see org.apache.hadoop.mapred.RecordReader#createKey()
-   */
-  public ImmutableBytesWritable createKey() {
-    return new ImmutableBytesWritable();
-  }
-
-  /**
-   * @return RowResult
-   *
-   * @see org.apache.hadoop.mapred.RecordReader#createValue()
-   */
-  public Result createValue() {
-    return new Result();
-  }
-
-  public long getPos() {
-    // This should be the ordinal tuple in the range;
-    // not clear how to calculate...
-    return 0;
-  }
-
-  public float getProgress() {
-    // Depends on the total number of tuples and getPos
-    return 0;
-  }
-
-  /**
-   * @param key HStoreKey as input key.
-   * @param value MapWritable as input value
-   * @return true if there was more data
-   * @throws IOException
-   */
-  public boolean next(ImmutableBytesWritable key, Result value)
-  throws IOException {
-    Result result;
-    try {
-      try {
-        result = this.scanner.next();
-        if (logScannerActivity) {
-          rowcount ++;
-          if (rowcount >= logPerRowCount) {
-            long now = System.currentTimeMillis();
-            LOG.info("Mapper took " + (now-timestamp)
-              + "ms to process " + rowcount + " rows");
-            timestamp = now;
-            rowcount = 0;
-          }
-        }
-      } catch (IOException e) {
-        // do not retry if the exception tells us not to do so
-        if (e instanceof DoNotRetryIOException) {
-          throw e;
-        }
-        // try to handle all other IOExceptions by restarting
-        // the scanner, if the second call fails, it will be rethrown
-        LOG.debug("recovered from " + StringUtils.stringifyException(e));
-        if (lastSuccessfulRow == null) {
-          LOG.warn("We are restarting the first next() invocation," +
-              " if your mapper has restarted a few other times like this" +
-              " then you should consider killing this job and investigate" +
-              " why it's taking so long.");
-        }
-        if (lastSuccessfulRow == null) {
-          restart(startRow);
-        } else {
-          restart(lastSuccessfulRow);
-          this.scanner.next();    // skip presumed already mapped row
-        }
-        result = this.scanner.next();
-      }
-
-      if (result != null && result.size() > 0) {
-        key.set(result.getRow());
-        lastSuccessfulRow = key.get();
-        value.copyFrom(result);
-        return true;
-      }
-      return false;
-    } catch (IOException ioe) {
-      if (logScannerActivity) {
-        long now = System.currentTimeMillis();
-        LOG.info("Mapper took " + (now-timestamp)
-          + "ms to process " + rowcount + " rows");
-        LOG.info(ioe);
-        String lastRow = lastSuccessfulRow == null ?
-          "null" : Bytes.toStringBinary(lastSuccessfulRow);
-        LOG.info("lastSuccessfulRow=" + lastRow);
-      }
-      throw ioe;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
deleted file mode 100644
index 91fb4a1..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.Reducer;
-
-/**
- * Write a table, sorting by the input key
- *
- * @param <K> key class
- * @param <V> value class
- */
-@InterfaceAudience.Public
-@SuppressWarnings("unchecked")
-public interface TableReduce<K extends WritableComparable, V>
-extends Reducer<K, V, ImmutableBytesWritable, Put> {
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
deleted file mode 100644
index d7b49ff..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-/**
- * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
- * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- */
-@InterfaceAudience.Public
-public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
-
-  public static class TableSnapshotRegionSplit implements InputSplit {
-    private TableSnapshotInputFormatImpl.InputSplit delegate;
-
-    // constructor for mapreduce framework / Writable
-    public TableSnapshotRegionSplit() {
-      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
-    }
-
-    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
-      this.delegate = delegate;
-    }
-
-    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
-        List<String> locations, Scan scan, Path restoreDir) {
-      this.delegate =
-          new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
-    }
-
-    @Override
-    public long getLength() throws IOException {
-      return delegate.getLength();
-    }
-
-    @Override
-    public String[] getLocations() throws IOException {
-      return delegate.getLocations();
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-      delegate.write(out);
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      delegate.readFields(in);
-    }
-  }
-
-  static class TableSnapshotRecordReader
-    implements RecordReader<ImmutableBytesWritable, Result> {
-
-    private TableSnapshotInputFormatImpl.RecordReader delegate;
-
-    public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
-        throws IOException {
-      delegate = new TableSnapshotInputFormatImpl.RecordReader();
-      delegate.initialize(split.delegate, job);
-    }
-
-    @Override
-    public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
-      if (!delegate.nextKeyValue()) {
-        return false;
-      }
-      ImmutableBytesWritable currentKey = delegate.getCurrentKey();
-      key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
-      value.copyFrom(delegate.getCurrentValue());
-      return true;
-    }
-
-    @Override
-    public ImmutableBytesWritable createKey() {
-      return new ImmutableBytesWritable();
-    }
-
-    @Override
-    public Result createValue() {
-      return new Result();
-    }
-
-    @Override
-    public long getPos() throws IOException {
-      return delegate.getPos();
-    }
-
-    @Override
-    public void close() throws IOException {
-      delegate.close();
-    }
-
-    @Override
-    public float getProgress() throws IOException {
-      return delegate.getProgress();
-    }
-  }
-
-  @Override
-  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
-    List<TableSnapshotInputFormatImpl.InputSplit> splits =
-      TableSnapshotInputFormatImpl.getSplits(job);
-    InputSplit[] results = new InputSplit[splits.size()];
-    for (int i = 0; i < splits.size(); i++) {
-      results[i] = new TableSnapshotRegionSplit(splits.get(i));
-    }
-    return results;
-  }
-
-  @Override
-  public RecordReader<ImmutableBytesWritable, Result>
-  getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
-    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
-  }
-
-  /**
-   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
-   * @param job the job to configure
-   * @param snapshotName the name of the snapshot to read from
-   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restoreDir can be deleted.
-   * @throws IOException if an error occurs
-   */
-  public static void setInput(JobConf job, String snapshotName, Path restoreDir)
-      throws IOException {
-    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
deleted file mode 100644
index 0784e5e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.InputSplit;
-
-/**
- * A table split corresponds to a key range [low, high)
- */
-@InterfaceAudience.Public
-public class TableSplit implements InputSplit, Comparable<TableSplit> {
-  private TableName m_tableName;
-  private byte [] m_startRow;
-  private byte [] m_endRow;
-  private String m_regionLocation;
-
-  /** default constructor */
-  public TableSplit() {
-    this((TableName)null, HConstants.EMPTY_BYTE_ARRAY,
-      HConstants.EMPTY_BYTE_ARRAY, "");
-  }
-
-  /**
-   * Constructor
-   * @param tableName
-   * @param startRow
-   * @param endRow
-   * @param location
-   */
-  public TableSplit(TableName tableName, byte [] startRow, byte [] endRow,
-      final String location) {
-    this.m_tableName = tableName;
-    this.m_startRow = startRow;
-    this.m_endRow = endRow;
-    this.m_regionLocation = location;
-  }
-
-  public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
-      final String location) {
-    this(TableName.valueOf(tableName), startRow, endRow,
-      location);
-  }
-
-  /** @return table name */
-  public TableName getTable() {
-    return this.m_tableName;
-  }
-
-  /** @return table name */
-   public byte [] getTableName() {
-     return this.m_tableName.getName();
-   }
-
-  /** @return starting row key */
-  public byte [] getStartRow() {
-    return this.m_startRow;
-  }
-
-  /** @return end row key */
-  public byte [] getEndRow() {
-    return this.m_endRow;
-  }
-
-  /** @return the region's hostname */
-  public String getRegionLocation() {
-    return this.m_regionLocation;
-  }
-
-  public String[] getLocations() {
-    return new String[] {this.m_regionLocation};
-  }
-
-  public long getLength() {
-    // Not clear how to obtain this... seems to be used only for sorting splits
-    return 0;
-  }
-
-  public void readFields(DataInput in) throws IOException {
-    this.m_tableName = TableName.valueOf(Bytes.readByteArray(in));
-    this.m_startRow = Bytes.readByteArray(in);
-    this.m_endRow = Bytes.readByteArray(in);
-    this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
-  }
-
-  public void write(DataOutput out) throws IOException {
-    Bytes.writeByteArray(out, this.m_tableName.getName());
-    Bytes.writeByteArray(out, this.m_startRow);
-    Bytes.writeByteArray(out, this.m_endRow);
-    Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
-  }
-
-  @Override
-  public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append("HBase table split(");
-      sb.append("table name: ").append(m_tableName);
-      sb.append(", start row: ").append(Bytes.toStringBinary(m_startRow));
-      sb.append(", end row: ").append(Bytes.toStringBinary(m_endRow));
-      sb.append(", region location: ").append(m_regionLocation);
-      sb.append(")");
-      return sb.toString();
-  }
-
-  @Override
-  public int compareTo(TableSplit o) {
-    return Bytes.compareTo(getStartRow(), o.getStartRow());
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (o == null || !(o instanceof TableSplit)) {
-      return false;
-    }
-    TableSplit other = (TableSplit)o;
-    return m_tableName.equals(other.m_tableName) &&
-      Bytes.equals(m_startRow, other.m_startRow) &&
-      Bytes.equals(m_endRow, other.m_endRow) &&
-      m_regionLocation.equals(other.m_regionLocation);
-  }
-
-  @Override
-  public int hashCode() {
-    int result = m_tableName != null ? m_tableName.hashCode() : 0;
-    result = 31 * result + Arrays.hashCode(m_startRow);
-    result = 31 * result + Arrays.hashCode(m_endRow);
-    result = 31 * result + (m_regionLocation != null ? m_regionLocation.hashCode() : 0);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
deleted file mode 100644
index 8a2a363..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
-Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
-Input/OutputFormats, a table indexing MapReduce job, and utility methods.
-
-<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
-in the HBase Reference Guide for mapreduce over hbase documentation. 
-*/
-package org.apache.hadoop.hbase.mapred;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
deleted file mode 100644
index 078033e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
+++ /dev/null
@@ -1,333 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-
-
-/**
- * A job with a a map and reduce phase to count cells in a table.
- * The counter lists the following stats for a given table:
- * <pre>
- * 1. Total number of rows in the table
- * 2. Total number of CFs across all rows
- * 3. Total qualifiers across all rows
- * 4. Total occurrence of each CF
- * 5. Total occurrence  of each qualifier
- * 6. Total number of versions of each qualifier.
- * </pre>
- *
- * The cellcounter can take optional parameters to use a user
- * supplied row/family/qualifier string to use in the report and
- * second a regex based or prefix based row filter to restrict the
- * count operation to a limited subset of rows from the table or a
- * start time and/or end time to limit the count to a time range.
- */
-@InterfaceAudience.Public
-public class CellCounter extends Configured implements Tool {
-  private static final Log LOG =
-    LogFactory.getLog(CellCounter.class.getName());
-
-
-  /**
-   * Name of this 'program'.
-   */
-  static final String NAME = "CellCounter";
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  /**
-   * Mapper that runs the count.
-   */
-  static class CellCounterMapper
-  extends TableMapper<Text, IntWritable> {
-    /**
-     * Counter enumeration to count the actual rows.
-     */
-    public static enum Counters {
-      ROWS,
-      CELLS
-    }
-
-    private Configuration conf;
-    private String separator;
-
-    // state of current row, family, column needs to persist across map() invocations
-    // in order to properly handle scanner batching, where a single qualifier may have too
-    // many versions for a single map() call
-    private byte[] lastRow;
-    private String currentRowKey;
-    byte[] currentFamily = null;
-    String currentFamilyName = null;
-    byte[] currentQualifier = null;
-    // family + qualifier
-    String currentQualifierName = null;
-    // rowkey + family + qualifier
-    String currentRowQualifierName = null;
-
-    @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
-      conf = context.getConfiguration();
-      separator = conf.get("ReportSeparator",":");
-    }
-
-    /**
-     * Maps the data.
-     *
-     * @param row     The current table row key.
-     * @param values  The columns.
-     * @param context The current context.
-     * @throws IOException When something is broken with the data.
-     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
-     *      org.apache.hadoop.mapreduce.Mapper.Context)
-     */
-
-    @Override
-    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
-      justification="Findbugs is blind to the Precondition null check")
-    public void map(ImmutableBytesWritable row, Result values,
-                    Context context)
-        throws IOException {
-      Preconditions.checkState(values != null,
-          "values passed to the map is null");
-
-      try {
-        byte[] currentRow = values.getRow();
-        if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {
-          lastRow = currentRow;
-          currentRowKey = Bytes.toStringBinary(currentRow);
-          currentFamily = null;
-          currentQualifier = null;
-          context.getCounter(Counters.ROWS).increment(1);
-          context.write(new Text("Total ROWS"), new IntWritable(1));
-        }
-        if (!values.isEmpty()) {
-          int cellCount = 0;
-          for (Cell value : values.listCells()) {
-            cellCount++;
-            if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {
-              currentFamily = CellUtil.cloneFamily(value);
-              currentFamilyName = Bytes.toStringBinary(currentFamily);
-              currentQualifier = null;
-              context.getCounter("CF", currentFamilyName).increment(1);
-              if (1 == context.getCounter("CF", currentFamilyName).getValue()) {
-                context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
-                context.write(new Text(currentFamily), new IntWritable(1));
-              }
-            }
-            if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {
-              currentQualifier = CellUtil.cloneQualifier(value);
-              currentQualifierName = currentFamilyName + separator +
-                  Bytes.toStringBinary(currentQualifier);
-              currentRowQualifierName = currentRowKey + separator + currentQualifierName;
-
-              context.write(new Text("Total Qualifiers across all Rows"),
-                  new IntWritable(1));
-              context.write(new Text(currentQualifierName), new IntWritable(1));
-            }
-            // Increment versions
-            context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));
-          }
-          context.getCounter(Counters.CELLS).increment(cellCount);
-        }
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-  }
-
-  static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
-      Key, IntWritable> {
-
-    private IntWritable result = new IntWritable();
-    public void reduce(Key key, Iterable<IntWritable> values,
-      Context context)
-    throws IOException, InterruptedException {
-      int sum = 0;
-      for (IntWritable val : values) {
-        sum += val.get();
-      }
-      result.set(sum);
-      context.write(key, result);
-    }
-  }
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param conf The current configuration.
-   * @param args The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public static Job createSubmittableJob(Configuration conf, String[] args)
-      throws IOException {
-    String tableName = args[0];
-    Path outputDir = new Path(args[1]);
-    String reportSeparatorString = (args.length > 2) ? args[2]: ":";
-    conf.set("ReportSeparator", reportSeparatorString);
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJarByClass(CellCounter.class);
-    Scan scan = getConfiguredScanForJob(conf, args);
-    TableMapReduceUtil.initTableMapperJob(tableName, scan,
-        CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
-    job.setNumReduceTasks(1);
-    job.setMapOutputKeyClass(Text.class);
-    job.setMapOutputValueClass(IntWritable.class);
-    job.setOutputFormatClass(TextOutputFormat.class);
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(IntWritable.class);
-    FileOutputFormat.setOutputPath(job, outputDir);
-    job.setReducerClass(IntSumReducer.class);
-    return job;
-  }
-
-  private static Scan getConfiguredScanForJob(Configuration conf, String[] args)
-      throws IOException {
-    // create scan with any properties set from TableInputFormat
-    Scan s = TableInputFormat.createScanFromConfiguration(conf);
-    // Set Scan Versions
-    if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {
-      // default to all versions unless explicitly set
-      s.setMaxVersions(Integer.MAX_VALUE);
-    }
-    s.setCacheBlocks(false);
-    // Set RowFilter or Prefix Filter if applicable.
-    Filter rowFilter = getRowFilter(args);
-    if (rowFilter!= null) {
-      LOG.info("Setting Row Filter for counter.");
-      s.setFilter(rowFilter);
-    }
-    // Set TimeRange if defined
-    long timeRange[] = getTimeRange(args);
-    if (timeRange != null) {
-      LOG.info("Setting TimeRange for counter.");
-      s.setTimeRange(timeRange[0], timeRange[1]);
-    }
-    return s;
-  }
-
-
-  private static Filter getRowFilter(String[] args) {
-    Filter rowFilter = null;
-    String filterCriteria = (args.length > 3) ? args[3]: null;
-    if (filterCriteria == null) return null;
-    if (filterCriteria.startsWith("^")) {
-      String regexPattern = filterCriteria.substring(1, filterCriteria.length());
-      rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
-    } else {
-      rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
-    }
-    return rowFilter;
-  }
-
-  private static long[] getTimeRange(String[] args) throws IOException {
-    final String startTimeArgKey = "--starttime=";
-    final String endTimeArgKey = "--endtime=";
-    long startTime = 0L;
-    long endTime = 0L;
-
-    for (int i = 1; i < args.length; i++) {
-      System.out.println("i:" + i + "arg[i]" + args[i]);
-      if (args[i].startsWith(startTimeArgKey)) {
-        startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
-      }
-      if (args[i].startsWith(endTimeArgKey)) {
-        endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
-      }
-    }
-
-    if (startTime == 0 && endTime == 0)
-      return null;
-
-    endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;
-    return new long [] {startTime, endTime};
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 2) {
-      System.err.println("ERROR: Wrong number of parameters: " + args.length);
-      System.err.println("Usage: CellCounter ");
-      System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
-        "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
-      System.err.println("  Note: -D properties will be applied to the conf used. ");
-      System.err.println("  Additionally, all of the SCAN properties from TableInputFormat");
-      System.err.println("  can be specified to get fine grained control on what is counted..");
-      System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
-      System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
-      System.err.println("   -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
-      System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
-      System.err.println("   -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
-      System.err.println("   -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
-      System.err.println("   -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
-      System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
-          "string : used to separate the rowId/column family name and qualifier name.");
-      System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
-          "operation to a limited subset of rows from the table based on regex or prefix pattern.");
-      return -1;
-    }
-    Job job = createSubmittableJob(getConf(), args);
-    return (job.waitForCompletion(true) ? 0 : 1);
-  }
-
-  /**
-   * Main entry point.
-   * @param args The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);
-    System.exit(errCode);
-  }
-
-}

[31/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
new file mode 100644
index 0000000..e80410f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -0,0 +1,1111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import java.io.BufferedInputStream;
+import java.io.FileNotFoundException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileChecksum;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.io.FileLink;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.io.WALLink;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.util.AbstractHBaseTool;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+
+/**
+ * Export the specified snapshot to a given FileSystem.
+ *
+ * The .snapshot/name folder is copied to the destination cluster
+ * and then all the hfiles/wals are copied using a Map-Reduce Job in the .archive/ location.
+ * When everything is done, the second cluster can restore the snapshot.
+ */
+@InterfaceAudience.Public
+public class ExportSnapshot extends AbstractHBaseTool implements Tool {
+  public static final String NAME = "exportsnapshot";
+  /** Configuration prefix for overrides for the source filesystem */
+  public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
+  /** Configuration prefix for overrides for the destination filesystem */
+  public static final String CONF_DEST_PREFIX = NAME + ".to.";
+
+  private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
+
+  private static final String MR_NUM_MAPS = "mapreduce.job.maps";
+  private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
+  private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
+  private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
+  private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
+  private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
+  private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
+  private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
+  private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
+  private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
+  private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
+  private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
+  private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
+  protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
+
+  static class Testing {
+    static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
+    static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
+    int failuresCountToInject = 0;
+    int injectedFailureCount = 0;
+  }
+
+  // Command line options and defaults.
+  static final class Options {
+    static final Option SNAPSHOT = new Option(null, "snapshot", true, "Snapshot to restore.");
+    static final Option TARGET_NAME = new Option(null, "target", true,
+        "Target name for the snapshot.");
+    static final Option COPY_TO = new Option(null, "copy-to", true, "Remote "
+        + "destination hdfs://");
+    static final Option COPY_FROM = new Option(null, "copy-from", true,
+        "Input folder hdfs:// (default hbase.rootdir)");
+    static final Option NO_CHECKSUM_VERIFY = new Option(null, "no-checksum-verify", false,
+        "Do not verify checksum, use name+length only.");
+    static final Option NO_TARGET_VERIFY = new Option(null, "no-target-verify", false,
+        "Do not verify the integrity of the exported snapshot.");
+    static final Option OVERWRITE = new Option(null, "overwrite", false,
+        "Rewrite the snapshot manifest if already exists.");
+    static final Option CHUSER = new Option(null, "chuser", true,
+        "Change the owner of the files to the specified one.");
+    static final Option CHGROUP = new Option(null, "chgroup", true,
+        "Change the group of the files to the specified one.");
+    static final Option CHMOD = new Option(null, "chmod", true,
+        "Change the permission of the files to the specified one.");
+    static final Option MAPPERS = new Option(null, "mappers", true,
+        "Number of mappers to use during the copy (mapreduce.job.maps).");
+    static final Option BANDWIDTH = new Option(null, "bandwidth", true,
+        "Limit bandwidth to this value in MB/second.");
+  }
+
+  // Export Map-Reduce Counters, to keep track of the progress
+  public enum Counter {
+    MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
+    BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
+  }
+
+  private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
+                                                   NullWritable, NullWritable> {
+    private static final Log LOG = LogFactory.getLog(ExportMapper.class);
+    final static int REPORT_SIZE = 1 * 1024 * 1024;
+    final static int BUFFER_SIZE = 64 * 1024;
+
+    private boolean verifyChecksum;
+    private String filesGroup;
+    private String filesUser;
+    private short filesMode;
+    private int bufferSize;
+
+    private FileSystem outputFs;
+    private Path outputArchive;
+    private Path outputRoot;
+
+    private FileSystem inputFs;
+    private Path inputArchive;
+    private Path inputRoot;
+
+    private static Testing testing = new Testing();
+
+    @Override
+    public void setup(Context context) throws IOException {
+      Configuration conf = context.getConfiguration();
+
+      Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+      Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+
+      verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
+
+      filesGroup = conf.get(CONF_FILES_GROUP);
+      filesUser = conf.get(CONF_FILES_USER);
+      filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
+      outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
+      inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
+
+      inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
+      outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
+
+      try {
+        srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+        inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
+      } catch (IOException e) {
+        throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
+      }
+
+      try {
+        destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+        outputFs = FileSystem.get(outputRoot.toUri(), destConf);
+      } catch (IOException e) {
+        throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
+      }
+
+      // Use the default block size of the outputFs if bigger
+      int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(outputRoot), BUFFER_SIZE);
+      bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
+      LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
+
+      for (Counter c : Counter.values()) {
+        context.getCounter(c).increment(0);
+      }
+      if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
+        testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
+        // Get number of times we have already injected failure based on attempt number of this
+        // task.
+        testing.injectedFailureCount = context.getTaskAttemptID().getId();
+      }
+    }
+
+    @Override
+    protected void cleanup(Context context) {
+      IOUtils.closeStream(inputFs);
+      IOUtils.closeStream(outputFs);
+    }
+
+    @Override
+    public void map(BytesWritable key, NullWritable value, Context context)
+        throws InterruptedException, IOException {
+      SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(key.copyBytes());
+      Path outputPath = getOutputPath(inputInfo);
+
+      copyFile(context, inputInfo, outputPath);
+    }
+
+    /**
+     * Returns the location where the inputPath will be copied.
+     */
+    private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
+      Path path = null;
+      switch (inputInfo.getType()) {
+        case HFILE:
+          Path inputPath = new Path(inputInfo.getHfile());
+          String family = inputPath.getParent().getName();
+          TableName table =HFileLink.getReferencedTableName(inputPath.getName());
+          String region = HFileLink.getReferencedRegionName(inputPath.getName());
+          String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
+          path = new Path(FSUtils.getTableDir(new Path("./"), table),
+              new Path(region, new Path(family, hfile)));
+          break;
+        case WAL:
+          LOG.warn("snapshot does not keeps WALs: " + inputInfo);
+          break;
+        default:
+          throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
+      }
+      return new Path(outputArchive, path);
+    }
+
+    /**
+     * Used by TestExportSnapshot to test for retries when failures happen.
+     * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
+     */
+    private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
+        throws IOException {
+      if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
+      if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
+      testing.injectedFailureCount++;
+      context.getCounter(Counter.COPY_FAILED).increment(1);
+      LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
+      throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
+          testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
+    }
+
+    private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
+        final Path outputPath) throws IOException {
+      // Get the file information
+      FileStatus inputStat = getSourceFileStatus(context, inputInfo);
+
+      // Verify if the output file exists and is the same that we want to copy
+      if (outputFs.exists(outputPath)) {
+        FileStatus outputStat = outputFs.getFileStatus(outputPath);
+        if (outputStat != null && sameFile(inputStat, outputStat)) {
+          LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
+          context.getCounter(Counter.FILES_SKIPPED).increment(1);
+          context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
+          return;
+        }
+      }
+
+      InputStream in = openSourceFile(context, inputInfo);
+      int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
+      if (Integer.MAX_VALUE != bandwidthMB) {
+        in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L);
+      }
+
+      try {
+        context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
+
+        // Ensure that the output folder is there and copy the file
+        createOutputPath(outputPath.getParent());
+        FSDataOutputStream out = outputFs.create(outputPath, true);
+        try {
+          copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
+        } finally {
+          out.close();
+        }
+
+        // Try to Preserve attributes
+        if (!preserveAttributes(outputPath, inputStat)) {
+          LOG.warn("You may have to run manually chown on: " + outputPath);
+        }
+      } finally {
+        in.close();
+        injectTestFailure(context, inputInfo);
+      }
+    }
+
+    /**
+     * Create the output folder and optionally set ownership.
+     */
+    private void createOutputPath(final Path path) throws IOException {
+      if (filesUser == null && filesGroup == null) {
+        outputFs.mkdirs(path);
+      } else {
+        Path parent = path.getParent();
+        if (!outputFs.exists(parent) && !parent.isRoot()) {
+          createOutputPath(parent);
+        }
+        outputFs.mkdirs(path);
+        if (filesUser != null || filesGroup != null) {
+          // override the owner when non-null user/group is specified
+          outputFs.setOwner(path, filesUser, filesGroup);
+        }
+        if (filesMode > 0) {
+          outputFs.setPermission(path, new FsPermission(filesMode));
+        }
+      }
+    }
+
+    /**
+     * Try to Preserve the files attribute selected by the user copying them from the source file
+     * This is only required when you are exporting as a different user than "hbase" or on a system
+     * that doesn't have the "hbase" user.
+     *
+     * This is not considered a blocking failure since the user can force a chmod with the user
+     * that knows is available on the system.
+     */
+    private boolean preserveAttributes(final Path path, final FileStatus refStat) {
+      FileStatus stat;
+      try {
+        stat = outputFs.getFileStatus(path);
+      } catch (IOException e) {
+        LOG.warn("Unable to get the status for file=" + path);
+        return false;
+      }
+
+      try {
+        if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
+          outputFs.setPermission(path, new FsPermission(filesMode));
+        } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
+          outputFs.setPermission(path, refStat.getPermission());
+        }
+      } catch (IOException e) {
+        LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
+        return false;
+      }
+
+      boolean hasRefStat = (refStat != null);
+      String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
+      String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
+      if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
+        try {
+          if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
+            outputFs.setOwner(path, user, group);
+          }
+        } catch (IOException e) {
+          LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
+          LOG.warn("The user/group may not exist on the destination cluster: user=" +
+                   user + " group=" + group);
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    private boolean stringIsNotEmpty(final String str) {
+      return str != null && str.length() > 0;
+    }
+
+    private void copyData(final Context context,
+        final Path inputPath, final InputStream in,
+        final Path outputPath, final FSDataOutputStream out,
+        final long inputFileSize)
+        throws IOException {
+      final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
+                                   " (%.1f%%)";
+
+      try {
+        byte[] buffer = new byte[bufferSize];
+        long totalBytesWritten = 0;
+        int reportBytes = 0;
+        int bytesRead;
+
+        long stime = System.currentTimeMillis();
+        while ((bytesRead = in.read(buffer)) > 0) {
+          out.write(buffer, 0, bytesRead);
+          totalBytesWritten += bytesRead;
+          reportBytes += bytesRead;
+
+          if (reportBytes >= REPORT_SIZE) {
+            context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
+            context.setStatus(String.format(statusMessage,
+                              StringUtils.humanReadableInt(totalBytesWritten),
+                              (totalBytesWritten/(float)inputFileSize) * 100.0f) +
+                              " from " + inputPath + " to " + outputPath);
+            reportBytes = 0;
+          }
+        }
+        long etime = System.currentTimeMillis();
+
+        context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
+        context.setStatus(String.format(statusMessage,
+                          StringUtils.humanReadableInt(totalBytesWritten),
+                          (totalBytesWritten/(float)inputFileSize) * 100.0f) +
+                          " from " + inputPath + " to " + outputPath);
+
+        // Verify that the written size match
+        if (totalBytesWritten != inputFileSize) {
+          String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
+                       " expected=" + inputFileSize + " for file=" + inputPath;
+          throw new IOException(msg);
+        }
+
+        LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
+        LOG.info("size=" + totalBytesWritten +
+            " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
+            " time=" + StringUtils.formatTimeDiff(etime, stime) +
+            String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
+        context.getCounter(Counter.FILES_COPIED).increment(1);
+      } catch (IOException e) {
+        LOG.error("Error copying " + inputPath + " to " + outputPath, e);
+        context.getCounter(Counter.COPY_FAILED).increment(1);
+        throw e;
+      }
+    }
+
+    /**
+     * Try to open the "source" file.
+     * Throws an IOException if the communication with the inputFs fail or
+     * if the file is not found.
+     */
+    private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
+            throws IOException {
+      try {
+        Configuration conf = context.getConfiguration();
+        FileLink link = null;
+        switch (fileInfo.getType()) {
+          case HFILE:
+            Path inputPath = new Path(fileInfo.getHfile());
+            link = getFileLink(inputPath, conf);
+            break;
+          case WAL:
+            String serverName = fileInfo.getWalServer();
+            String logName = fileInfo.getWalName();
+            link = new WALLink(inputRoot, serverName, logName);
+            break;
+          default:
+            throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
+        }
+        return link.open(inputFs);
+      } catch (IOException e) {
+        context.getCounter(Counter.MISSING_FILES).increment(1);
+        LOG.error("Unable to open source file=" + fileInfo.toString(), e);
+        throw e;
+      }
+    }
+
+    private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
+        throws IOException {
+      try {
+        Configuration conf = context.getConfiguration();
+        FileLink link = null;
+        switch (fileInfo.getType()) {
+          case HFILE:
+            Path inputPath = new Path(fileInfo.getHfile());
+            link = getFileLink(inputPath, conf);
+            break;
+          case WAL:
+            link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
+            break;
+          default:
+            throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
+        }
+        return link.getFileStatus(inputFs);
+      } catch (FileNotFoundException e) {
+        context.getCounter(Counter.MISSING_FILES).increment(1);
+        LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
+        throw e;
+      } catch (IOException e) {
+        LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
+        throw e;
+      }
+    }
+
+    private FileLink getFileLink(Path path, Configuration conf) throws IOException{
+      String regionName = HFileLink.getReferencedRegionName(path.getName());
+      TableName tableName = HFileLink.getReferencedTableName(path.getName());
+      if(MobUtils.getMobRegionInfo(tableName).getEncodedName().equals(regionName)) {
+        return HFileLink.buildFromHFileLinkPattern(MobUtils.getQualifiedMobRootDir(conf),
+                HFileArchiveUtil.getArchivePath(conf), path);
+      }
+      return HFileLink.buildFromHFileLinkPattern(inputRoot, inputArchive, path);
+    }
+
+    private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
+      try {
+        return fs.getFileChecksum(path);
+      } catch (IOException e) {
+        LOG.warn("Unable to get checksum for file=" + path, e);
+        return null;
+      }
+    }
+
+    /**
+     * Check if the two files are equal by looking at the file length,
+     * and at the checksum (if user has specified the verifyChecksum flag).
+     */
+    private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
+      // Not matching length
+      if (inputStat.getLen() != outputStat.getLen()) return false;
+
+      // Mark files as equals, since user asked for no checksum verification
+      if (!verifyChecksum) return true;
+
+      // If checksums are not available, files are not the same.
+      FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
+      if (inChecksum == null) return false;
+
+      FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
+      if (outChecksum == null) return false;
+
+      return inChecksum.equals(outChecksum);
+    }
+  }
+
+  // ==========================================================================
+  //  Input Format
+  // ==========================================================================
+
+  /**
+   * Extract the list of files (HFiles/WALs) to copy using Map-Reduce.
+   * @return list of files referenced by the snapshot (pair of path and size)
+   */
+  private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
+      final FileSystem fs, final Path snapshotDir) throws IOException {
+    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+
+    final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>();
+    final TableName table = TableName.valueOf(snapshotDesc.getTable());
+
+    // Get snapshot files
+    LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
+    SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
+      new SnapshotReferenceUtil.SnapshotVisitor() {
+        @Override
+        public void storeFile(final HRegionInfo regionInfo, final String family,
+            final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
+          // for storeFile.hasReference() case, copied as part of the manifest
+          if (!storeFile.hasReference()) {
+            String region = regionInfo.getEncodedName();
+            String hfile = storeFile.getName();
+            Path path = HFileLink.createPath(table, region, family, hfile);
+
+            SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
+              .setType(SnapshotFileInfo.Type.HFILE)
+              .setHfile(path.toString())
+              .build();
+
+            long size;
+            if (storeFile.hasFileSize()) {
+              size = storeFile.getFileSize();
+            } else {
+              size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
+            }
+            files.add(new Pair<>(fileInfo, size));
+          }
+        }
+    });
+
+    return files;
+  }
+
+  /**
+   * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
+   * The groups created will have similar amounts of bytes.
+   * <p>
+   * The algorithm used is pretty straightforward; the file list is sorted by size,
+   * and then each group fetch the bigger file available, iterating through groups
+   * alternating the direction.
+   */
+  static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
+      final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
+    // Sort files by size, from small to big
+    Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
+      public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
+        long r = a.getSecond() - b.getSecond();
+        return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
+      }
+    });
+
+    // create balanced groups
+    List<List<Pair<SnapshotFileInfo, Long>>> fileGroups = new LinkedList<>();
+    long[] sizeGroups = new long[ngroups];
+    int hi = files.size() - 1;
+    int lo = 0;
+
+    List<Pair<SnapshotFileInfo, Long>> group;
+    int dir = 1;
+    int g = 0;
+
+    while (hi >= lo) {
+      if (g == fileGroups.size()) {
+        group = new LinkedList<>();
+        fileGroups.add(group);
+      } else {
+        group = fileGroups.get(g);
+      }
+
+      Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
+
+      // add the hi one
+      sizeGroups[g] += fileInfo.getSecond();
+      group.add(fileInfo);
+
+      // change direction when at the end or the beginning
+      g += dir;
+      if (g == ngroups) {
+        dir = -1;
+        g = ngroups - 1;
+      } else if (g < 0) {
+        dir = 1;
+        g = 0;
+      }
+    }
+
+    if (LOG.isDebugEnabled()) {
+      for (int i = 0; i < sizeGroups.length; ++i) {
+        LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
+      }
+    }
+
+    return fileGroups;
+  }
+
+  private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
+    @Override
+    public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
+        TaskAttemptContext tac) throws IOException, InterruptedException {
+      return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
+    }
+
+    @Override
+    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
+      Configuration conf = context.getConfiguration();
+      Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
+      FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
+
+      List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
+      int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
+      if (mappers == 0 && snapshotFiles.size() > 0) {
+        mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
+        mappers = Math.min(mappers, snapshotFiles.size());
+        conf.setInt(CONF_NUM_SPLITS, mappers);
+        conf.setInt(MR_NUM_MAPS, mappers);
+      }
+
+      List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
+      List<InputSplit> splits = new ArrayList(groups.size());
+      for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
+        splits.add(new ExportSnapshotInputSplit(files));
+      }
+      return splits;
+    }
+
+    private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
+      private List<Pair<BytesWritable, Long>> files;
+      private long length;
+
+      public ExportSnapshotInputSplit() {
+        this.files = null;
+      }
+
+      public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
+        this.files = new ArrayList(snapshotFiles.size());
+        for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
+          this.files.add(new Pair<>(
+            new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
+          this.length += fileInfo.getSecond();
+        }
+      }
+
+      private List<Pair<BytesWritable, Long>> getSplitKeys() {
+        return files;
+      }
+
+      @Override
+      public long getLength() throws IOException, InterruptedException {
+        return length;
+      }
+
+      @Override
+      public String[] getLocations() throws IOException, InterruptedException {
+        return new String[] {};
+      }
+
+      @Override
+      public void readFields(DataInput in) throws IOException {
+        int count = in.readInt();
+        files = new ArrayList<>(count);
+        length = 0;
+        for (int i = 0; i < count; ++i) {
+          BytesWritable fileInfo = new BytesWritable();
+          fileInfo.readFields(in);
+          long size = in.readLong();
+          files.add(new Pair<>(fileInfo, size));
+          length += size;
+        }
+      }
+
+      @Override
+      public void write(DataOutput out) throws IOException {
+        out.writeInt(files.size());
+        for (final Pair<BytesWritable, Long> fileInfo: files) {
+          fileInfo.getFirst().write(out);
+          out.writeLong(fileInfo.getSecond());
+        }
+      }
+    }
+
+    private static class ExportSnapshotRecordReader
+        extends RecordReader<BytesWritable, NullWritable> {
+      private final List<Pair<BytesWritable, Long>> files;
+      private long totalSize = 0;
+      private long procSize = 0;
+      private int index = -1;
+
+      ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
+        this.files = files;
+        for (Pair<BytesWritable, Long> fileInfo: files) {
+          totalSize += fileInfo.getSecond();
+        }
+      }
+
+      @Override
+      public void close() { }
+
+      @Override
+      public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
+
+      @Override
+      public NullWritable getCurrentValue() { return NullWritable.get(); }
+
+      @Override
+      public float getProgress() { return (float)procSize / totalSize; }
+
+      @Override
+      public void initialize(InputSplit split, TaskAttemptContext tac) { }
+
+      @Override
+      public boolean nextKeyValue() {
+        if (index >= 0) {
+          procSize += files.get(index).getSecond();
+        }
+        return(++index < files.size());
+      }
+    }
+  }
+
+  // ==========================================================================
+  //  Tool
+  // ==========================================================================
+
+  /**
+   * Run Map-Reduce Job to perform the files copy.
+   */
+  private void runCopyJob(final Path inputRoot, final Path outputRoot,
+      final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
+      final String filesUser, final String filesGroup, final int filesMode,
+      final int mappers, final int bandwidthMB)
+          throws IOException, InterruptedException, ClassNotFoundException {
+    Configuration conf = getConf();
+    if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
+    if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
+    if (mappers > 0) {
+      conf.setInt(CONF_NUM_SPLITS, mappers);
+      conf.setInt(MR_NUM_MAPS, mappers);
+    }
+    conf.setInt(CONF_FILES_MODE, filesMode);
+    conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
+    conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
+    conf.set(CONF_INPUT_ROOT, inputRoot.toString());
+    conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
+    conf.set(CONF_SNAPSHOT_NAME, snapshotName);
+    conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
+
+    Job job = new Job(conf);
+    job.setJobName("ExportSnapshot-" + snapshotName);
+    job.setJarByClass(ExportSnapshot.class);
+    TableMapReduceUtil.addDependencyJars(job);
+    job.setMapperClass(ExportMapper.class);
+    job.setInputFormatClass(ExportSnapshotInputFormat.class);
+    job.setOutputFormatClass(NullOutputFormat.class);
+    job.setMapSpeculativeExecution(false);
+    job.setNumReduceTasks(0);
+
+    // Acquire the delegation Tokens
+    Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
+      new Path[] { inputRoot }, srcConf);
+    Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
+        new Path[] { outputRoot }, destConf);
+
+    // Run the MR Job
+    if (!job.waitForCompletion(true)) {
+      // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
+      // when it will be available on all the supported versions.
+      throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
+    }
+  }
+
+  private void verifySnapshot(final Configuration baseConf,
+      final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
+    // Update the conf with the current root dir, since may be a different cluster
+    Configuration conf = new Configuration(baseConf);
+    FSUtils.setRootDir(conf, rootDir);
+    FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
+    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+    SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
+  }
+
+  /**
+   * Set path ownership.
+   */
+  private void setOwner(final FileSystem fs, final Path path, final String user,
+      final String group, final boolean recursive) throws IOException {
+    if (user != null || group != null) {
+      if (recursive && fs.isDirectory(path)) {
+        for (FileStatus child : fs.listStatus(path)) {
+          setOwner(fs, child.getPath(), user, group, recursive);
+        }
+      }
+      fs.setOwner(path, user, group);
+    }
+  }
+
+  /**
+   * Set path permission.
+   */
+  private void setPermission(final FileSystem fs, final Path path, final short filesMode,
+      final boolean recursive) throws IOException {
+    if (filesMode > 0) {
+      FsPermission perm = new FsPermission(filesMode);
+      if (recursive && fs.isDirectory(path)) {
+        for (FileStatus child : fs.listStatus(path)) {
+          setPermission(fs, child.getPath(), filesMode, recursive);
+        }
+      }
+      fs.setPermission(path, perm);
+    }
+  }
+
+  private boolean verifyTarget = true;
+  private boolean verifyChecksum = true;
+  private String snapshotName = null;
+  private String targetName = null;
+  private boolean overwrite = false;
+  private String filesGroup = null;
+  private String filesUser = null;
+  private Path outputRoot = null;
+  private Path inputRoot = null;
+  private int bandwidthMB = Integer.MAX_VALUE;
+  private int filesMode = 0;
+  private int mappers = 0;
+
+  @Override
+  protected void processOptions(CommandLine cmd) {
+    snapshotName = cmd.getOptionValue(Options.SNAPSHOT.getLongOpt(), snapshotName);
+    targetName = cmd.getOptionValue(Options.TARGET_NAME.getLongOpt(), targetName);
+    if (cmd.hasOption(Options.COPY_TO.getLongOpt())) {
+      outputRoot = new Path(cmd.getOptionValue(Options.COPY_TO.getLongOpt()));
+    }
+    if (cmd.hasOption(Options.COPY_FROM.getLongOpt())) {
+      inputRoot = new Path(cmd.getOptionValue(Options.COPY_FROM.getLongOpt()));
+    }
+    mappers = getOptionAsInt(cmd, Options.MAPPERS.getLongOpt(), mappers);
+    filesUser = cmd.getOptionValue(Options.CHUSER.getLongOpt(), filesUser);
+    filesGroup = cmd.getOptionValue(Options.CHGROUP.getLongOpt(), filesGroup);
+    filesMode = getOptionAsInt(cmd, Options.CHMOD.getLongOpt(), filesMode);
+    bandwidthMB = getOptionAsInt(cmd, Options.BANDWIDTH.getLongOpt(), bandwidthMB);
+    overwrite = cmd.hasOption(Options.OVERWRITE.getLongOpt());
+    // And verifyChecksum and verifyTarget with values read from old args in processOldArgs(...).
+    verifyChecksum = !cmd.hasOption(Options.NO_CHECKSUM_VERIFY.getLongOpt());
+    verifyTarget = !cmd.hasOption(Options.NO_TARGET_VERIFY.getLongOpt());
+  }
+
+  /**
+   * Execute the export snapshot by copying the snapshot metadata, hfiles and wals.
+   * @return 0 on success, and != 0 upon failure.
+   */
+  @Override
+  public int doWork() throws IOException {
+    Configuration conf = getConf();
+
+    // Check user options
+    if (snapshotName == null) {
+      System.err.println("Snapshot name not provided.");
+      LOG.error("Use -h or --help for usage instructions.");
+      return 0;
+    }
+
+    if (outputRoot == null) {
+      System.err.println("Destination file-system (--" + Options.COPY_TO.getLongOpt()
+              + ") not provided.");
+      LOG.error("Use -h or --help for usage instructions.");
+      return 0;
+    }
+
+    if (targetName == null) {
+      targetName = snapshotName;
+    }
+    if (inputRoot == null) {
+      inputRoot = FSUtils.getRootDir(conf);
+    } else {
+      FSUtils.setRootDir(conf, inputRoot);
+    }
+
+    Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+    srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+    FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
+    LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
+    Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+    destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+    FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
+    LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
+
+    boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
+
+    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
+    Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
+    Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
+    Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
+
+    // Find the necessary directory which need to change owner and group
+    Path needSetOwnerDir = SnapshotDescriptionUtils.getSnapshotRootDir(outputRoot);
+    if (outputFs.exists(needSetOwnerDir)) {
+      if (skipTmp) {
+        needSetOwnerDir = outputSnapshotDir;
+      } else {
+        needSetOwnerDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(outputRoot);
+        if (outputFs.exists(needSetOwnerDir)) {
+          needSetOwnerDir = snapshotTmpDir;
+        }
+      }
+    }
+
+    // Check if the snapshot already exists
+    if (outputFs.exists(outputSnapshotDir)) {
+      if (overwrite) {
+        if (!outputFs.delete(outputSnapshotDir, true)) {
+          System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
+          return 1;
+        }
+      } else {
+        System.err.println("The snapshot '" + targetName +
+          "' already exists in the destination: " + outputSnapshotDir);
+        return 1;
+      }
+    }
+
+    if (!skipTmp) {
+      // Check if the snapshot already in-progress
+      if (outputFs.exists(snapshotTmpDir)) {
+        if (overwrite) {
+          if (!outputFs.delete(snapshotTmpDir, true)) {
+            System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
+            return 1;
+          }
+        } else {
+          System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
+          System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
+          System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
+          return 1;
+        }
+      }
+    }
+
+    // Step 1 - Copy fs1:/.snapshot/<snapshot> to  fs2:/.snapshot/.tmp/<snapshot>
+    // The snapshot references must be copied before the hfiles otherwise the cleaner
+    // will remove them because they are unreferenced.
+    try {
+      LOG.info("Copy Snapshot Manifest");
+      FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
+    } catch (IOException e) {
+      throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
+        snapshotDir + " to=" + initialOutputSnapshotDir, e);
+    } finally {
+      if (filesUser != null || filesGroup != null) {
+        LOG.warn((filesUser == null ? "" : "Change the owner of " + needSetOwnerDir + " to "
+            + filesUser)
+            + (filesGroup == null ? "" : ", Change the group of " + needSetOwnerDir + " to "
+            + filesGroup));
+        setOwner(outputFs, needSetOwnerDir, filesUser, filesGroup, true);
+      }
+      if (filesMode > 0) {
+        LOG.warn("Change the permission of " + needSetOwnerDir + " to " + filesMode);
+        setPermission(outputFs, needSetOwnerDir, (short)filesMode, true);
+      }
+    }
+
+    // Write a new .snapshotinfo if the target name is different from the source name
+    if (!targetName.equals(snapshotName)) {
+      SnapshotDescription snapshotDesc =
+        SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
+          .toBuilder()
+          .setName(targetName)
+          .build();
+      SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, initialOutputSnapshotDir, outputFs);
+      if (filesUser != null || filesGroup != null) {
+        outputFs.setOwner(new Path(initialOutputSnapshotDir,
+          SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), filesUser, filesGroup);
+      }
+      if (filesMode > 0) {
+        outputFs.setPermission(new Path(initialOutputSnapshotDir,
+          SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), new FsPermission((short)filesMode));
+      }
+    }
+
+    // Step 2 - Start MR Job to copy files
+    // The snapshot references must be copied before the files otherwise the files gets removed
+    // by the HFileArchiver, since they have no references.
+    try {
+      runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
+                 filesUser, filesGroup, filesMode, mappers, bandwidthMB);
+
+      LOG.info("Finalize the Snapshot Export");
+      if (!skipTmp) {
+        // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
+        if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
+          throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
+            snapshotTmpDir + " to=" + outputSnapshotDir);
+        }
+      }
+
+      // Step 4 - Verify snapshot integrity
+      if (verifyTarget) {
+        LOG.info("Verify snapshot integrity");
+        verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
+      }
+
+      LOG.info("Export Completed: " + targetName);
+      return 0;
+    } catch (Exception e) {
+      LOG.error("Snapshot export failed", e);
+      if (!skipTmp) {
+        outputFs.delete(snapshotTmpDir, true);
+      }
+      outputFs.delete(outputSnapshotDir, true);
+      return 1;
+    } finally {
+      IOUtils.closeStream(inputFs);
+      IOUtils.closeStream(outputFs);
+    }
+  }
+
+  @Override
+  protected void printUsage() {
+    super.printUsage();
+    System.out.println("\n"
+        + "Examples:\n"
+        + "  hbase snapshot export \\\n"
+        + "    --snapshot MySnapshot --copy-to hdfs://srv2:8082/hbase \\\n"
+        + "    --chuser MyUser --chgroup MyGroup --chmod 700 --mappers 16\n"
+        + "\n"
+        + "  hbase snapshot export \\\n"
+        + "    --snapshot MySnapshot --copy-from hdfs://srv2:8082/hbase \\\n"
+        + "    --copy-to hdfs://srv1:50070/hbase");
+  }
+
+  @Override protected void addOptions() {
+    addRequiredOption(Options.SNAPSHOT);
+    addOption(Options.COPY_TO);
+    addOption(Options.COPY_FROM);
+    addOption(Options.TARGET_NAME);
+    addOption(Options.NO_CHECKSUM_VERIFY);
+    addOption(Options.NO_TARGET_VERIFY);
+    addOption(Options.OVERWRITE);
+    addOption(Options.CHUSER);
+    addOption(Options.CHGROUP);
+    addOption(Options.CHMOD);
+    addOption(Options.MAPPERS);
+    addOption(Options.BANDWIDTH);
+  }
+
+  public static void main(String[] args) {
+    new ExportSnapshot().doStaticMain(args);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
new file mode 100644
index 0000000..e8f073d
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * Generate a classpath string containing any jars required by mapreduce jobs. Specify
+ * additional values by providing a comma-separated list of paths via -Dtmpjars.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class MapreduceDependencyClasspathTool implements Tool {
+
+  private Configuration conf;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length > 0) {
+      System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
+      System.err.println("  Construct a CLASSPATH containing dependency jars required to run a mapreduce");
+      System.err.println("  job. By default, includes any jars detected by TableMapReduceUtils. Provide");
+      System.err.println("  additional entries by specifying a comma-separated list in tmpjars.");
+      return 0;
+    }
+
+    TableMapReduceUtil.addHBaseDependencyJars(getConf());
+    System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
+    return 0;
+  }
+
+  public static void main(String[] argv) throws Exception {
+    // Silence the usual noise. This is probably fragile...
+    Logger logger = Logger.getLogger("org.apache.hadoop.hbase");
+    if (logger != null) {
+      logger.setLevel(Level.WARN);
+    }
+    System.exit(ToolRunner.run(
+      HBaseConfiguration.create(), new MapreduceDependencyClasspathTool(), argv));
+  }
+}

[19/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
deleted file mode 100644
index 1d4d37b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.util.ReflectionUtils;
-
-/**
- * Facade to create Cells for HFileOutputFormat. The created Cells are of <code>Put</code> type.
- */
-@InterfaceAudience.Public
-public class CellCreator {
-
-  public static final String VISIBILITY_EXP_RESOLVER_CLASS =
-      "hbase.mapreduce.visibility.expression.resolver.class";
-
-  private VisibilityExpressionResolver visExpResolver;
-
-  public CellCreator(Configuration conf) {
-    Class<? extends VisibilityExpressionResolver> clazz = conf.getClass(
-        VISIBILITY_EXP_RESOLVER_CLASS, DefaultVisibilityExpressionResolver.class,
-        VisibilityExpressionResolver.class);
-    this.visExpResolver = ReflectionUtils.newInstance(clazz, conf);
-    this.visExpResolver.init();
-  }
-
-  /**
-   * @param row row key
-   * @param roffset row offset
-   * @param rlength row length
-   * @param family family name
-   * @param foffset family offset
-   * @param flength family length
-   * @param qualifier column qualifier
-   * @param qoffset qualifier offset
-   * @param qlength qualifier length
-   * @param timestamp version timestamp
-   * @param value column value
-   * @param voffset value offset
-   * @param vlength value length
-   * @return created Cell
-   * @throws IOException
-   */
-  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
-      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
-      int vlength) throws IOException {
-    return create(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength,
-        timestamp, value, voffset, vlength, (List<Tag>)null);
-  }
-
-  /**
-   * @param row row key
-   * @param roffset row offset
-   * @param rlength row length
-   * @param family family name
-   * @param foffset family offset
-   * @param flength family length
-   * @param qualifier column qualifier
-   * @param qoffset qualifier offset
-   * @param qlength qualifier length
-   * @param timestamp version timestamp
-   * @param value column value
-   * @param voffset value offset
-   * @param vlength value length
-   * @param visExpression visibility expression to be associated with cell
-   * @return created Cell
-   * @throws IOException
-   */
-  @Deprecated
-  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
-      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
-      int vlength, String visExpression) throws IOException {
-    List<Tag> visTags = null;
-    if (visExpression != null) {
-      visTags = this.visExpResolver.createVisibilityExpTags(visExpression);
-    }
-    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
-        qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, visTags);
-  }
-
-  /**
-   * @param row row key
-   * @param roffset row offset
-   * @param rlength row length
-   * @param family family name
-   * @param foffset family offset
-   * @param flength family length
-   * @param qualifier column qualifier
-   * @param qoffset qualifier offset
-   * @param qlength qualifier length
-   * @param timestamp version timestamp
-   * @param value column value
-   * @param voffset value offset
-   * @param vlength value length
-   * @param tags
-   * @return created Cell
-   * @throws IOException
-   */
-  public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
-      byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
-      int vlength, List<Tag> tags) throws IOException {
-    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
-        qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, tags);
-  }
-
-  /**
-   * @return Visibility expression resolver
-   */
-  public VisibilityExpressionResolver getVisibilityExpressionResolver() {
-    return this.visExpResolver;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
deleted file mode 100644
index 21b8556..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Tool used to copy a table to another one which can be on a different setup.
- * It is also configurable with a start and time as well as a specification
- * of the region server implementation if different from the local cluster.
- */
-@InterfaceAudience.Public
-public class CopyTable extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(CopyTable.class);
-
-  final static String NAME = "copytable";
-  long startTime = 0;
-  long endTime = HConstants.LATEST_TIMESTAMP;
-  int batch = Integer.MAX_VALUE;
-  int cacheRow = -1;
-  int versions = -1;
-  String tableName = null;
-  String startRow = null;
-  String stopRow = null;
-  String dstTableName = null;
-  String peerAddress = null;
-  String families = null;
-  boolean allCells = false;
-  static boolean shuffle = false;
-
-  boolean bulkload = false;
-  Path bulkloadDir = null;
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public Job createSubmittableJob(String[] args)
-  throws IOException {
-    if (!doCommandLine(args)) {
-      return null;
-    }
-    
-    Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJarByClass(CopyTable.class);
-    Scan scan = new Scan();
-    
-    scan.setBatch(batch);
-    scan.setCacheBlocks(false);
-    
-    if (cacheRow > 0) {
-      scan.setCaching(cacheRow);
-    } else {
-      scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
-    }
-    
-    scan.setTimeRange(startTime, endTime);
-    
-    if (allCells) {
-      scan.setRaw(true);
-    }
-    if (shuffle) {
-      job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");
-    }
-    if (versions >= 0) {
-      scan.setMaxVersions(versions);
-    }
-
-    if (startRow != null) {
-      scan.setStartRow(Bytes.toBytesBinary(startRow));
-    }
-
-    if (stopRow != null) {
-      scan.setStopRow(Bytes.toBytesBinary(stopRow));
-    }
-
-    if(families != null) {
-      String[] fams = families.split(",");
-      Map<String,String> cfRenameMap = new HashMap<>();
-      for(String fam : fams) {
-        String sourceCf;
-        if(fam.contains(":")) {
-            // fam looks like "sourceCfName:destCfName"
-            String[] srcAndDest = fam.split(":", 2);
-            sourceCf = srcAndDest[0];
-            String destCf = srcAndDest[1];
-            cfRenameMap.put(sourceCf, destCf);
-        } else {
-            // fam is just "sourceCf"
-            sourceCf = fam;
-        }
-        scan.addFamily(Bytes.toBytes(sourceCf));
-      }
-      Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
-    }
-    job.setNumReduceTasks(0);
-
-    if (bulkload) {
-      TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
-        null, job);
-
-      // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
-      TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
-
-      FileSystem fs = FileSystem.get(getConf());
-      Random rand = new Random();
-      Path root = new Path(fs.getWorkingDirectory(), "copytable");
-      fs.mkdirs(root);
-      while (true) {
-        bulkloadDir = new Path(root, "" + rand.nextLong());
-        if (!fs.exists(bulkloadDir)) {
-          break;
-        }
-      }
-
-      System.out.println("HFiles will be stored at " + this.bulkloadDir);
-      HFileOutputFormat2.setOutputPath(job, bulkloadDir);
-      try (Connection conn = ConnectionFactory.createConnection(getConf());
-          Admin admin = conn.getAdmin()) {
-        HFileOutputFormat2.configureIncrementalLoadMap(job,
-            admin.listTableDescriptor((TableName.valueOf(dstTableName))));
-      }
-    } else {
-      TableMapReduceUtil.initTableMapperJob(tableName, scan,
-        Import.Importer.class, null, null, job);
-
-      TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
-        null);
-    }
-
-    return job;
-  }
-
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void printUsage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
-        "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
-    System.err.println();
-    System.err.println("Options:");
-    System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
-    System.err.println("              specify if different from current cluster");
-    System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
-    System.err.println(" startrow     the start row");
-    System.err.println(" stoprow      the stop row");
-    System.err.println(" starttime    beginning of the time range (unixtime in millis)");
-    System.err.println("              without endtime means from starttime to forever");
-    System.err.println(" endtime      end of the time range.  Ignored if no starttime specified.");
-    System.err.println(" versions     number of cell versions to copy");
-    System.err.println(" new.name     new table's name");
-    System.err.println(" peer.adr     Address of the peer cluster given in the format");
-    System.err.println("              hbase.zookeeper.quorum:hbase.zookeeper.client"
-        + ".port:zookeeper.znode.parent");
-    System.err.println(" families     comma-separated list of families to copy");
-    System.err.println("              To copy from cf1 to cf2, give sourceCfName:destCfName. ");
-    System.err.println("              To keep the same name, just give \"cfName\"");
-    System.err.println(" all.cells    also copy delete markers and deleted cells");
-    System.err.println(" bulkload     Write input into HFiles and bulk load to the destination "
-        + "table");
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" tablename    Name of the table to copy");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
-    System.err.println(" $ hbase " +
-        "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
-        "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
-    System.err.println("For performance consider the following general option:\n"
-        + "  It is recommended that you set the following to >=100. A higher value uses more memory but\n"
-        + "  decreases the round trip time to the server and may increase performance.\n"
-        + "    -Dhbase.client.scanner.caching=100\n"
-        + "  The following should always be set to false, to prevent writing data twice, which may produce \n"
-        + "  inaccurate results.\n"
-        + "    -Dmapreduce.map.speculative=false");
-  }
-
-  private boolean doCommandLine(final String[] args) {
-    // Process command-line args. TODO: Better cmd-line processing
-    // (but hopefully something not as painful as cli options).
-    if (args.length < 1) {
-      printUsage(null);
-      return false;
-    }
-    try {
-      for (int i = 0; i < args.length; i++) {
-        String cmd = args[i];
-        if (cmd.equals("-h") || cmd.startsWith("--h")) {
-          printUsage(null);
-          return false;
-        }
-
-        final String startRowArgKey = "--startrow=";
-        if (cmd.startsWith(startRowArgKey)) {
-          startRow = cmd.substring(startRowArgKey.length());
-          continue;
-        }
-
-        final String stopRowArgKey = "--stoprow=";
-        if (cmd.startsWith(stopRowArgKey)) {
-          stopRow = cmd.substring(stopRowArgKey.length());
-          continue;
-        }
-
-        final String startTimeArgKey = "--starttime=";
-        if (cmd.startsWith(startTimeArgKey)) {
-          startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
-          continue;
-        }
-
-        final String endTimeArgKey = "--endtime=";
-        if (cmd.startsWith(endTimeArgKey)) {
-          endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
-          continue;
-        }
-        
-        final String batchArgKey = "--batch=";
-        if (cmd.startsWith(batchArgKey)) {
-          batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
-          continue;
-        }
-        
-        final String cacheRowArgKey = "--cacheRow=";
-        if (cmd.startsWith(cacheRowArgKey)) {
-          cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));
-          continue;
-        }
-
-        final String versionsArgKey = "--versions=";
-        if (cmd.startsWith(versionsArgKey)) {
-          versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
-          continue;
-        }
-
-        final String newNameArgKey = "--new.name=";
-        if (cmd.startsWith(newNameArgKey)) {
-          dstTableName = cmd.substring(newNameArgKey.length());
-          continue;
-        }
-
-        final String peerAdrArgKey = "--peer.adr=";
-        if (cmd.startsWith(peerAdrArgKey)) {
-          peerAddress = cmd.substring(peerAdrArgKey.length());
-          continue;
-        }
-
-        final String familiesArgKey = "--families=";
-        if (cmd.startsWith(familiesArgKey)) {
-          families = cmd.substring(familiesArgKey.length());
-          continue;
-        }
-
-        if (cmd.startsWith("--all.cells")) {
-          allCells = true;
-          continue;
-        }
-
-        if (cmd.startsWith("--bulkload")) {
-          bulkload = true;
-          continue;
-        }
-
-        if (cmd.startsWith("--shuffle")) {
-          shuffle = true;
-          continue;
-        }
-
-        if (i == args.length-1) {
-          tableName = cmd;
-        } else {
-          printUsage("Invalid argument '" + cmd + "'");
-          return false;
-        }
-      }
-      if (dstTableName == null && peerAddress == null) {
-        printUsage("At least a new table name or a " +
-            "peer address must be specified");
-        return false;
-      }
-      if ((endTime != 0) && (startTime > endTime)) {
-        printUsage("Invalid time range filter: starttime=" + startTime + " >  endtime=" + endTime);
-        return false;
-      }
-
-      if (bulkload && peerAddress != null) {
-        printUsage("Remote bulkload is not supported!");
-        return false;
-      }
-
-      // set dstTableName if necessary
-      if (dstTableName == null) {
-        dstTableName = tableName;
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
-      printUsage("Can't start because " + e.getMessage());
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * Main entry point.
-   *
-   * @param args  The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int ret = ToolRunner.run(HBaseConfiguration.create(), new CopyTable(), args);
-    System.exit(ret);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    Job job = createSubmittableJob(args);
-    if (job == null) return 1;
-    if (!job.waitForCompletion(true)) {
-      LOG.info("Map-reduce job failed!");
-      if (bulkload) {
-        LOG.info("Files are not bulkloaded!");
-      }
-      return 1;
-    }
-    int code = 0;
-    if (bulkload) {
-      code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
-          this.dstTableName});
-      if (code == 0) {
-        // bulkloadDir is deleted only LoadIncrementalHFiles was successful so that one can rerun
-        // LoadIncrementalHFiles.
-        FileSystem fs = FileSystem.get(this.getConf());
-        if (!fs.delete(this.bulkloadDir, true)) {
-          LOG.error("Deleting folder " + bulkloadDir + " failed!");
-          code = 1;
-        }
-      }
-    }
-    return code;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
deleted file mode 100644
index 004ee5c..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_FAMILY;
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_NAME;
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABEL_QUALIFIER;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.security.visibility.Authorizations;
-import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
-import org.apache.hadoop.hbase.security.visibility.VisibilityLabelOrdinalProvider;
-import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-
-/**
- * This implementation creates tags by expanding expression using label ordinal. Labels will be
- * serialized in sorted order of it's ordinal.
- */
-@InterfaceAudience.Private
-public class DefaultVisibilityExpressionResolver implements VisibilityExpressionResolver {
-  private static final Log LOG = LogFactory.getLog(DefaultVisibilityExpressionResolver.class);
-
-  private Configuration conf;
-  private final Map<String, Integer> labels = new HashMap<>();
-
-  @Override
-  public Configuration getConf() {
-    return this.conf;
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    this.conf = conf;
-  }
-
-  @Override
-  public void init() {
-    // Reading all the labels and ordinal.
-    // This scan should be done by user with global_admin privileges.. Ensure that it works
-    Table labelsTable = null;
-    Connection connection = null;
-    try {
-      connection = ConnectionFactory.createConnection(conf);
-      try {
-        labelsTable = connection.getTable(LABELS_TABLE_NAME);
-      } catch (IOException e) {
-        LOG.error("Error opening 'labels' table", e);
-        return;
-      }
-      Scan scan = new Scan();
-      scan.setAuthorizations(new Authorizations(VisibilityUtils.SYSTEM_LABEL));
-      scan.addColumn(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
-      ResultScanner scanner = null;
-      try {
-        scanner = labelsTable.getScanner(scan);
-        Result next = null;
-        while ((next = scanner.next()) != null) {
-          byte[] row = next.getRow();
-          byte[] value = next.getValue(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
-          labels.put(Bytes.toString(value), Bytes.toInt(row));
-        }
-      } catch (TableNotFoundException e) {
-        // Table not found. So just return
-        return;
-      } catch (IOException e) {
-        LOG.error("Error scanning 'labels' table", e);
-      } finally {
-        if (scanner != null) scanner.close();
-      }
-    } catch (IOException ioe) {
-      LOG.error("Failed reading 'labels' tags", ioe);
-      return;
-    } finally {
-      if (labelsTable != null) {
-        try {
-          labelsTable.close();
-        } catch (IOException ioe) {
-          LOG.warn("Error closing 'labels' table", ioe);
-        }
-      }
-      if (connection != null)
-        try {
-          connection.close();
-        } catch (IOException ioe) {
-          LOG.warn("Failed close of temporary connection", ioe);
-        }
-    }
-  }
-
-  @Override
-  public List<Tag> createVisibilityExpTags(String visExpression) throws IOException {
-    VisibilityLabelOrdinalProvider provider = new VisibilityLabelOrdinalProvider() {
-      @Override
-      public int getLabelOrdinal(String label) {
-        Integer ordinal = null;
-        ordinal = labels.get(label);
-        if (ordinal != null) {
-          return ordinal.intValue();
-        }
-        return VisibilityConstants.NON_EXIST_LABEL_ORDINAL;
-      }
-
-      @Override
-      public String getLabel(int ordinal) {
-        // Unused
-        throw new UnsupportedOperationException(
-            "getLabel should not be used in VisibilityExpressionResolver");
-      }
-    };
-    return VisibilityUtils.createVisibilityExpTags(visExpression, true, false, null, provider);
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
deleted file mode 100644
index 9737b55..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
-import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
-import org.apache.hadoop.util.ProgramDriver;
-
-/**
- * Driver for hbase mapreduce jobs. Select which to run by passing
- * name of job to this main.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-@InterfaceStability.Stable
-public class Driver {
-  /**
-   * @param args
-   * @throws Throwable
-   */
-  public static void main(String[] args) throws Throwable {
-    ProgramDriver pgd = new ProgramDriver();
-
-    pgd.addClass(RowCounter.NAME, RowCounter.class,
-      "Count rows in HBase table.");
-    pgd.addClass(CellCounter.NAME, CellCounter.class,
-      "Count cells in HBase table.");
-    pgd.addClass(Export.NAME, Export.class, "Write table data to HDFS.");
-    pgd.addClass(Import.NAME, Import.class, "Import data written by Export.");
-    pgd.addClass(ImportTsv.NAME, ImportTsv.class, "Import data in TSV format.");
-    pgd.addClass(LoadIncrementalHFiles.NAME, LoadIncrementalHFiles.class,
-                 "Complete a bulk data load.");
-    pgd.addClass(CopyTable.NAME, CopyTable.class,
-        "Export a table from local cluster to peer cluster.");
-    pgd.addClass(VerifyReplication.NAME, VerifyReplication.class, "Compare" +
-        " the data from tables in two different clusters. WARNING: It" +
-        " doesn't work for incrementColumnValues'd cells since the" +
-        " timestamp is changed after being appended to the log.");
-    pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
-    pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
-        " the specific snapshot to a given FileSystem.");
-
-    ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
-      invoke(pgd, new Object[]{args});
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
deleted file mode 100644
index 4c01528..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
-* Export an HBase table.
-* Writes content to sequence files up in HDFS.  Use {@link Import} to read it
-* back in again.
-*/
-@InterfaceAudience.Public
-public class Export extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(Export.class);
-  final static String NAME = "export";
-  final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
-  final static String EXPORT_BATCHING = "hbase.export.scanner.batch";
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param conf  The current configuration.
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public static Job createSubmittableJob(Configuration conf, String[] args)
-  throws IOException {
-    String tableName = args[0];
-    Path outputDir = new Path(args[1]);
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJobName(NAME + "_" + tableName);
-    job.setJarByClass(Export.class);
-    // Set optional scan parameters
-    Scan s = getConfiguredScanForJob(conf, args);
-    IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
-    // No reducers.  Just write straight to output files.
-    job.setNumReduceTasks(0);
-    job.setOutputFormatClass(SequenceFileOutputFormat.class);
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(Result.class);
-    FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
-    return job;
-  }
-
-  private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
-    Scan s = new Scan();
-    // Optional arguments.
-    // Set Scan Versions
-    int versions = args.length > 2? Integer.parseInt(args[2]): 1;
-    s.setMaxVersions(versions);
-    // Set Scan Range
-    long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
-    long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
-    s.setTimeRange(startTime, endTime);
-    // Set cache blocks
-    s.setCacheBlocks(false);
-    // set Start and Stop row
-    if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
-      s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
-    }
-    if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
-      s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
-    }
-    // Set Scan Column Family
-    boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
-    if (raw) {
-      s.setRaw(raw);
-    }
-    for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
-      s.addFamily(Bytes.toBytes(columnFamily));
-    }
-    // Set RowFilter or Prefix Filter if applicable.
-    Filter exportFilter = getExportFilter(args);
-    if (exportFilter!= null) {
-        LOG.info("Setting Scan Filter for Export.");
-      s.setFilter(exportFilter);
-    }
-
-    int batching = conf.getInt(EXPORT_BATCHING, -1);
-    if (batching !=  -1){
-      try {
-        s.setBatch(batching);
-      } catch (IncompatibleFilterException e) {
-        LOG.error("Batching could not be set", e);
-      }
-    }
-    LOG.info("versions=" + versions + ", starttime=" + startTime +
-      ", endtime=" + endTime + ", keepDeletedCells=" + raw);
-    return s;
-  }
-
-  private static Filter getExportFilter(String[] args) {
-    Filter exportFilter = null;
-    String filterCriteria = (args.length > 5) ? args[5]: null;
-    if (filterCriteria == null) return null;
-    if (filterCriteria.startsWith("^")) {
-      String regexPattern = filterCriteria.substring(1, filterCriteria.length());
-      exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
-    } else {
-      exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
-    }
-    return exportFilter;
-  }
-
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void usage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
-      "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
-    System.err.println("  Note: -D properties will be applied to the conf used. ");
-    System.err.println("  For example: ");
-    System.err.println("   -D mapreduce.output.fileoutputformat.compress=true");
-    System.err.println("   -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");
-    System.err.println("   -D mapreduce.output.fileoutputformat.compress.type=BLOCK");
-    System.err.println("  Additionally, the following SCAN properties can be specified");
-    System.err.println("  to control/limit what is exported..");
-    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
-    System.err.println("   -D " + RAW_SCAN + "=true");
-    System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");
-    System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");
-    System.err.println("   -D " + JOB_NAME_CONF_KEY
-        + "=jobName - use the specified mapreduce job name for the export");
-    System.err.println("For performance consider the following properties:\n"
-        + "   -Dhbase.client.scanner.caching=100\n"
-        + "   -Dmapreduce.map.speculative=false\n"
-        + "   -Dmapreduce.reduce.speculative=false");
-    System.err.println("For tables with very wide rows consider setting the batch size as below:\n"
-        + "   -D" + EXPORT_BATCHING + "=10");
-  }
-
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 2) {
-      usage("Wrong number of arguments: " + args.length);
-      return -1;
-    }
-    Job job = createSubmittableJob(getConf(), args);
-    return (job.waitForCompletion(true) ? 0 : 1);
-  }
-
-  /**
-   * Main entry point.
-   * @param args The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);
-    System.exit(errCode);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
deleted file mode 100644
index dc30c6e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Extract grouping columns from input record.
- */
-@InterfaceAudience.Public
-public class GroupingTableMapper
-extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
-
-  /**
-   * JobConf parameter to specify the columns used to produce the key passed to
-   * collect from the map phase.
-   */
-  public static final String GROUP_COLUMNS =
-    "hbase.mapred.groupingtablemap.columns";
-
-  /** The grouping columns. */
-  protected byte [][] columns;
-  /** The current configuration. */
-  private Configuration conf = null;
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table The table to be processed.
-   * @param scan  The scan with the columns etc.
-   * @param groupColumns  A space separated list of columns used to form the
-   * key used in collect.
-   * @param mapper  The mapper class.
-   * @param job  The current job.
-   * @throws IOException When setting up the job fails.
-   */
-  @SuppressWarnings("unchecked")
-  public static void initJob(String table, Scan scan, String groupColumns,
-    Class<? extends TableMapper> mapper, Job job) throws IOException {
-    TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
-        ImmutableBytesWritable.class, Result.class, job);
-    job.getConfiguration().set(GROUP_COLUMNS, groupColumns);
-  }
-
-  /**
-   * Extract the grouping columns from value to construct a new key. Pass the
-   * new key and value to reduce. If any of the grouping columns are not found
-   * in the value, the record is skipped.
-   *
-   * @param key  The current key.
-   * @param value  The current value.
-   * @param context  The current context.
-   * @throws IOException When writing the record fails.
-   * @throws InterruptedException When the job is aborted.
-   */
-  @Override
-  public void map(ImmutableBytesWritable key, Result value, Context context)
-  throws IOException, InterruptedException {
-    byte[][] keyVals = extractKeyValues(value);
-    if(keyVals != null) {
-      ImmutableBytesWritable tKey = createGroupKey(keyVals);
-      context.write(tKey, value);
-    }
-  }
-
-  /**
-   * Extract columns values from the current record. This method returns
-   * null if any of the columns are not found.
-   * <p>
-   * Override this method if you want to deal with nulls differently.
-   *
-   * @param r  The current values.
-   * @return Array of byte values.
-   */
-  protected byte[][] extractKeyValues(Result r) {
-    byte[][] keyVals = null;
-    ArrayList<byte[]> foundList = new ArrayList<>();
-    int numCols = columns.length;
-    if (numCols > 0) {
-      for (Cell value: r.listCells()) {
-        byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
-            CellUtil.cloneQualifier(value));
-        for (int i = 0; i < numCols; i++) {
-          if (Bytes.equals(column, columns[i])) {
-            foundList.add(CellUtil.cloneValue(value));
-            break;
-          }
-        }
-      }
-      if(foundList.size() == numCols) {
-        keyVals = foundList.toArray(new byte[numCols][]);
-      }
-    }
-    return keyVals;
-  }
-
-  /**
-   * Create a key by concatenating multiple column values.
-   * <p>
-   * Override this function in order to produce different types of keys.
-   *
-   * @param vals  The current key/values.
-   * @return A key generated by concatenating multiple column values.
-   */
-  protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
-    if(vals == null) {
-      return null;
-    }
-    StringBuilder sb =  new StringBuilder();
-    for(int i = 0; i < vals.length; i++) {
-      if(i > 0) {
-        sb.append(" ");
-      }
-      sb.append(Bytes.toString(vals[i]));
-    }
-    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
-  }
-
-  /**
-   * Returns the current configuration.
-   *
-   * @return The current configuration.
-   * @see org.apache.hadoop.conf.Configurable#getConf()
-   */
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  /**
-   * Sets the configuration. This is used to set up the grouping details.
-   *
-   * @param configuration  The configuration to set.
-   * @see org.apache.hadoop.conf.Configurable#setConf(
-   *   org.apache.hadoop.conf.Configuration)
-   */
-  @Override
-  public void setConf(Configuration configuration) {
-    this.conf = configuration;
-    String[] cols = conf.get(GROUP_COLUMNS, "").split(" ");
-    columns = new byte[cols.length][];
-    for(int i = 0; i < cols.length; i++) {
-      columns[i] = Bytes.toBytes(cols[i]);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
deleted file mode 100644
index e90d5c1..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Simple MR input format for HFiles.
- * This code was borrowed from Apache Crunch project.
- * Updated to the recent version of HBase.
- */
-public class HFileInputFormat extends FileInputFormat<NullWritable, Cell> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(HFileInputFormat.class);
-
-  /**
-   * File filter that removes all "hidden" files. This might be something worth removing from
-   * a more general purpose utility; it accounts for the presence of metadata files created
-   * in the way we're doing exports.
-   */
-  static final PathFilter HIDDEN_FILE_FILTER = new PathFilter() {
-    @Override
-    public boolean accept(Path p) {
-      String name = p.getName();
-      return !name.startsWith("_") && !name.startsWith(".");
-    }
-  };
-
-  /**
-   * Record reader for HFiles.
-   */
-  private static class HFileRecordReader extends RecordReader<NullWritable, Cell> {
-
-    private Reader in;
-    protected Configuration conf;
-    private HFileScanner scanner;
-
-    /**
-     * A private cache of the key value so it doesn't need to be loaded twice from the scanner.
-     */
-    private Cell value = null;
-    private long count;
-    private boolean seeked = false;
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext context)
-        throws IOException, InterruptedException {
-      FileSplit fileSplit = (FileSplit) split;
-      conf = context.getConfiguration();
-      Path path = fileSplit.getPath();
-      FileSystem fs = path.getFileSystem(conf);
-      LOG.info("Initialize HFileRecordReader for {}", path);
-      this.in = HFile.createReader(fs, path, conf);
-
-      // The file info must be loaded before the scanner can be used.
-      // This seems like a bug in HBase, but it's easily worked around.
-      this.in.loadFileInfo();
-      this.scanner = in.getScanner(false, false);
-
-    }
-
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      boolean hasNext;
-      if (!seeked) {
-        LOG.info("Seeking to start");
-        hasNext = scanner.seekTo();
-        seeked = true;
-      } else {
-        hasNext = scanner.next();
-      }
-      if (!hasNext) {
-        return false;
-      }
-      value = scanner.getCell();
-      count++;
-      return true;
-    }
-
-    @Override
-    public NullWritable getCurrentKey() throws IOException, InterruptedException {
-      return NullWritable.get();
-    }
-
-    @Override
-    public Cell getCurrentValue() throws IOException, InterruptedException {
-      return value;
-    }
-
-    @Override
-    public float getProgress() throws IOException, InterruptedException {
-      // This would be inaccurate if KVs are not uniformly-sized or we have performed a seek to
-      // the start row, but better than nothing anyway.
-      return 1.0f * count / in.getEntries();
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (in != null) {
-        in.close();
-        in = null;
-      }
-    }
-  }
-
-  @Override
-  protected List<FileStatus> listStatus(JobContext job) throws IOException {
-    List<FileStatus> result = new ArrayList<FileStatus>();
-
-    // Explode out directories that match the original FileInputFormat filters
-    // since HFiles are written to directories where the
-    // directory name is the column name
-    for (FileStatus status : super.listStatus(job)) {
-      if (status.isDirectory()) {
-        FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
-        for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
-          result.add(match);
-        }
-      } else {
-        result.add(status);
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public RecordReader<NullWritable, Cell> createRecordReader(InputSplit split, TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    return new HFileRecordReader();
-  }
-
-  @Override
-  protected boolean isSplitable(JobContext context, Path filename) {
-    // This file isn't splittable.
-    return false;
-  }
-}
\ No newline at end of file

[17/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
deleted file mode 100644
index b5bb2ec..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
+++ /dev/null
@@ -1,780 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.zookeeper.KeeperException;
-
-
-/**
- * Import data written by {@link Export}.
- */
-@InterfaceAudience.Public
-public class Import extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(Import.class);
-  final static String NAME = "import";
-  public final static String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
-  public final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
-  public final static String FILTER_CLASS_CONF_KEY = "import.filter.class";
-  public final static String FILTER_ARGS_CONF_KEY = "import.filter.args";
-  public final static String TABLE_NAME = "import.table.name";
-  public final static String WAL_DURABILITY = "import.wal.durability";
-  public final static String HAS_LARGE_RESULT= "import.bulk.hasLargeResult";
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  public static class KeyValueWritableComparablePartitioner 
-      extends Partitioner<KeyValueWritableComparable, KeyValue> {
-    private static KeyValueWritableComparable[] START_KEYS = null;
-    @Override
-    public int getPartition(KeyValueWritableComparable key, KeyValue value,
-        int numPartitions) {
-      for (int i = 0; i < START_KEYS.length; ++i) {
-        if (key.compareTo(START_KEYS[i]) <= 0) {
-          return i;
-        }
-      }
-      return START_KEYS.length;
-    }
-    
-  }
-  
-  public static class KeyValueWritableComparable 
-      implements WritableComparable<KeyValueWritableComparable> {
-
-    private KeyValue kv = null;
-    
-    static {                                       
-      // register this comparator
-      WritableComparator.define(KeyValueWritableComparable.class, 
-          new KeyValueWritableComparator());
-    }
-    
-    public KeyValueWritableComparable() {
-    }
-    
-    public KeyValueWritableComparable(KeyValue kv) {
-      this.kv = kv;
-    }
-    
-    @Override
-    public void write(DataOutput out) throws IOException {
-      KeyValue.write(kv, out);
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      kv = KeyValue.create(in);
-    }
-
-    @Override
-    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
-      justification="This is wrong, yes, but we should be purging Writables, not fixing them")
-    public int compareTo(KeyValueWritableComparable o) {
-      return CellComparator.COMPARATOR.compare(this.kv, ((KeyValueWritableComparable)o).kv);
-    }
-    
-    public static class KeyValueWritableComparator extends WritableComparator {
-
-      @Override
-      public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-        try {
-          KeyValueWritableComparable kv1 = new KeyValueWritableComparable();
-          kv1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
-          KeyValueWritableComparable kv2 = new KeyValueWritableComparable();
-          kv2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
-          return compare(kv1, kv2);
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        } 
-      }
-      
-    }
-    
-  }
-  
-  public static class KeyValueReducer
-      extends
-      Reducer<KeyValueWritableComparable, KeyValue, ImmutableBytesWritable, KeyValue> {
-    protected void reduce(
-        KeyValueWritableComparable row,
-        Iterable<KeyValue> kvs,
-        Reducer<KeyValueWritableComparable,
-          KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
-        throws java.io.IOException, InterruptedException {
-      int index = 0;
-      for (KeyValue kv : kvs) {
-        context.write(new ImmutableBytesWritable(kv.getRowArray()), kv);
-        if (++index % 100 == 0)
-          context.setStatus("Wrote " + index + " KeyValues, "
-              + "and the rowkey whose is being wrote is " + Bytes.toString(kv.getRowArray())); 
-      }
-    }
-  }
-  
-  public static class KeyValueSortImporter 
-      extends TableMapper<KeyValueWritableComparable, KeyValue> {
-    private Map<byte[], byte[]> cfRenameMap;
-    private Filter filter;
-    private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
-
-    /**
-     * @param row  The current table row key.
-     * @param value  The columns.
-     * @param context  The current context.
-     * @throws IOException When something is broken with the data.
-     */
-    @Override
-    public void map(ImmutableBytesWritable row, Result value,
-      Context context)
-    throws IOException {
-      try {
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Considering the row."
-              + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
-        }
-        if (filter == null
-            || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
-                (short) row.getLength()))) {
-          for (Cell kv : value.rawCells()) {
-            kv = filterKv(filter, kv);
-            // skip if we filtered it out
-            if (kv == null) continue;
-            // TODO get rid of ensureKeyValue
-            KeyValue ret = KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap));
-            context.write(new KeyValueWritableComparable(ret.createKeyOnly(false)), ret); 
-          }
-        }
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-    
-    @Override
-    public void setup(Context context) throws IOException { 
-      cfRenameMap = createCfRenameMap(context.getConfiguration());
-      filter = instantiateFilter(context.getConfiguration());
-      int reduceNum = context.getNumReduceTasks();
-      Configuration conf = context.getConfiguration();
-      TableName tableName = TableName.valueOf(context.getConfiguration().get(TABLE_NAME));
-      try (Connection conn = ConnectionFactory.createConnection(conf);
-          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
-        byte[][] startKeys = regionLocator.getStartKeys();
-        if (startKeys.length != reduceNum) {
-          throw new IOException("Region split after job initialization");
-        }
-        KeyValueWritableComparable[] startKeyWraps = 
-            new KeyValueWritableComparable[startKeys.length - 1];
-        for (int i = 1; i < startKeys.length; ++i) {
-          startKeyWraps[i - 1] = 
-              new KeyValueWritableComparable(KeyValueUtil.createFirstOnRow(startKeys[i]));
-        }
-        KeyValueWritableComparablePartitioner.START_KEYS = startKeyWraps;
-      }
-    }
-  }
-  
-  /**
-   * A mapper that just writes out KeyValues.
-   */
-  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
-      justification="Writables are going away and this has been this way forever")
-  public static class KeyValueImporter extends TableMapper<ImmutableBytesWritable, KeyValue> {
-    private Map<byte[], byte[]> cfRenameMap;
-    private Filter filter;
-    private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
-
-    /**
-     * @param row  The current table row key.
-     * @param value  The columns.
-     * @param context  The current context.
-     * @throws IOException When something is broken with the data.
-     */
-    @Override
-    public void map(ImmutableBytesWritable row, Result value,
-      Context context)
-    throws IOException {
-      try {
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Considering the row."
-              + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
-        }
-        if (filter == null
-            || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
-                (short) row.getLength()))) {
-          for (Cell kv : value.rawCells()) {
-            kv = filterKv(filter, kv);
-            // skip if we filtered it out
-            if (kv == null) continue;
-            // TODO get rid of ensureKeyValue
-            context.write(row, KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap)));
-          }
-        }
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-
-    @Override
-    public void setup(Context context) {
-      cfRenameMap = createCfRenameMap(context.getConfiguration());
-      filter = instantiateFilter(context.getConfiguration());
-    }
-  }
-
-  /**
-   * Write table content out to files in hdfs.
-   */
-  public static class Importer extends TableMapper<ImmutableBytesWritable, Mutation> {
-    private Map<byte[], byte[]> cfRenameMap;
-    private List<UUID> clusterIds;
-    private Filter filter;
-    private Durability durability;
-
-    /**
-     * @param row  The current table row key.
-     * @param value  The columns.
-     * @param context  The current context.
-     * @throws IOException When something is broken with the data.
-     */
-    @Override
-    public void map(ImmutableBytesWritable row, Result value,
-      Context context)
-    throws IOException {
-      try {
-        writeResult(row, value, context);
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-
-    private void writeResult(ImmutableBytesWritable key, Result result, Context context)
-    throws IOException, InterruptedException {
-      Put put = null;
-      Delete delete = null;
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Considering the row."
-            + Bytes.toString(key.get(), key.getOffset(), key.getLength()));
-      }
-      if (filter == null
-          || !filter.filterRowKey(CellUtil.createFirstOnRow(key.get(), key.getOffset(),
-              (short) key.getLength()))) {
-        processKV(key, result, context, put, delete);
-      }
-    }
-
-    protected void processKV(ImmutableBytesWritable key, Result result, Context context, Put put,
-        Delete delete) throws IOException, InterruptedException {
-      for (Cell kv : result.rawCells()) {
-        kv = filterKv(filter, kv);
-        // skip if we filter it out
-        if (kv == null) continue;
-
-        kv = convertKv(kv, cfRenameMap);
-        // Deletes and Puts are gathered and written when finished
-        /*
-         * If there are sequence of mutations and tombstones in an Export, and after Import the same
-         * sequence should be restored as it is. If we combine all Delete tombstones into single
-         * request then there is chance of ignoring few DeleteFamily tombstones, because if we
-         * submit multiple DeleteFamily tombstones in single Delete request then we are maintaining
-         * only newest in hbase table and ignoring other. Check - HBASE-12065
-         */
-        if (CellUtil.isDeleteFamily(kv)) {
-          Delete deleteFamily = new Delete(key.get());
-          deleteFamily.add(kv);
-          if (durability != null) {
-            deleteFamily.setDurability(durability);
-          }
-          deleteFamily.setClusterIds(clusterIds);
-          context.write(key, deleteFamily);
-        } else if (CellUtil.isDelete(kv)) {
-          if (delete == null) {
-            delete = new Delete(key.get());
-          }
-          delete.add(kv);
-        } else {
-          if (put == null) {
-            put = new Put(key.get());
-          }
-          addPutToKv(put, kv);
-        }
-      }
-      if (put != null) {
-        if (durability != null) {
-          put.setDurability(durability);
-        }
-        put.setClusterIds(clusterIds);
-        context.write(key, put);
-      }
-      if (delete != null) {
-        if (durability != null) {
-          delete.setDurability(durability);
-        }
-        delete.setClusterIds(clusterIds);
-        context.write(key, delete);
-      }
-    }
-
-    protected void addPutToKv(Put put, Cell kv) throws IOException {
-      put.add(kv);
-    }
-
-    @Override
-    public void setup(Context context) {
-      LOG.info("Setting up " + getClass() + " mapper.");
-      Configuration conf = context.getConfiguration();
-      cfRenameMap = createCfRenameMap(conf);
-      filter = instantiateFilter(conf);
-      String durabilityStr = conf.get(WAL_DURABILITY);
-      if(durabilityStr != null){
-        durability = Durability.valueOf(durabilityStr.toUpperCase(Locale.ROOT));
-        LOG.info("setting WAL durability to " + durability);
-      } else {
-        LOG.info("setting WAL durability to default.");
-      }
-      // TODO: This is kind of ugly doing setup of ZKW just to read the clusterid.
-      ZooKeeperWatcher zkw = null;
-      Exception ex = null;
-      try {
-        zkw = new ZooKeeperWatcher(conf, context.getTaskAttemptID().toString(), null);
-        clusterIds = Collections.singletonList(ZKClusterId.getUUIDForCluster(zkw));
-      } catch (ZooKeeperConnectionException e) {
-        ex = e;
-        LOG.error("Problem connecting to ZooKeper during task setup", e);
-      } catch (KeeperException e) {
-        ex = e;
-        LOG.error("Problem reading ZooKeeper data during task setup", e);
-      } catch (IOException e) {
-        ex = e;
-        LOG.error("Problem setting up task", e);
-      } finally {
-        if (zkw != null) zkw.close();
-      }
-      if (clusterIds == null) {
-        // exit early if setup fails
-        throw new RuntimeException(ex);
-      }
-    }
-  }
-
-  /**
-   * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
-   * optionally not include in the job output
-   * @param conf {@link Configuration} from which to load the filter
-   * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
-   * @throws IllegalArgumentException if the filter is misconfigured
-   */
-  public static Filter instantiateFilter(Configuration conf) {
-    // get the filter, if it was configured    
-    Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
-    if (filterClass == null) {
-      LOG.debug("No configured filter class, accepting all keyvalues.");
-      return null;
-    }
-    LOG.debug("Attempting to create filter:" + filterClass);
-    String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
-    ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
-    try {
-      Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
-      return (Filter) m.invoke(null, quotedArgs);
-    } catch (IllegalAccessException e) {
-      LOG.error("Couldn't instantiate filter!", e);
-      throw new RuntimeException(e);
-    } catch (SecurityException e) {
-      LOG.error("Couldn't instantiate filter!", e);
-      throw new RuntimeException(e);
-    } catch (NoSuchMethodException e) {
-      LOG.error("Couldn't instantiate filter!", e);
-      throw new RuntimeException(e);
-    } catch (IllegalArgumentException e) {
-      LOG.error("Couldn't instantiate filter!", e);
-      throw new RuntimeException(e);
-    } catch (InvocationTargetException e) {
-      LOG.error("Couldn't instantiate filter!", e);
-      throw new RuntimeException(e);
-    }
-  }
-
-  private static ArrayList<byte[]> toQuotedByteArrays(String... stringArgs) {
-    ArrayList<byte[]> quotedArgs = new ArrayList<>();
-    for (String stringArg : stringArgs) {
-      // all the filters' instantiation methods expected quoted args since they are coming from
-      // the shell, so add them here, though it shouldn't really be needed :-/
-      quotedArgs.add(Bytes.toBytes("'" + stringArg + "'"));
-    }
-    return quotedArgs;
-  }
-
-  /**
-   * Attempt to filter out the keyvalue
-   * @param kv {@link KeyValue} on which to apply the filter
-   * @return <tt>null</tt> if the key should not be written, otherwise returns the original
-   *         {@link KeyValue}
-   */
-  public static Cell filterKv(Filter filter, Cell kv) throws IOException {
-    // apply the filter and skip this kv if the filter doesn't apply
-    if (filter != null) {
-      Filter.ReturnCode code = filter.filterKeyValue(kv);
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Filter returned:" + code + " for the key value:" + kv);
-      }
-      // if its not an accept type, then skip this kv
-      if (!(code.equals(Filter.ReturnCode.INCLUDE) || code
-          .equals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL))) {
-        return null;
-      }
-    }
-    return kv;
-  }
-
-  // helper: create a new KeyValue based on CF rename map
-  private static Cell convertKv(Cell kv, Map<byte[], byte[]> cfRenameMap) {
-    if(cfRenameMap != null) {
-      // If there's a rename mapping for this CF, create a new KeyValue
-      byte[] newCfName = cfRenameMap.get(CellUtil.cloneFamily(kv));
-      if(newCfName != null) {
-          kv = new KeyValue(kv.getRowArray(), // row buffer 
-                  kv.getRowOffset(),        // row offset
-                  kv.getRowLength(),        // row length
-                  newCfName,                // CF buffer
-                  0,                        // CF offset 
-                  newCfName.length,         // CF length 
-                  kv.getQualifierArray(),   // qualifier buffer
-                  kv.getQualifierOffset(),  // qualifier offset
-                  kv.getQualifierLength(),  // qualifier length
-                  kv.getTimestamp(),        // timestamp
-                  KeyValue.Type.codeToType(kv.getTypeByte()), // KV Type
-                  kv.getValueArray(),       // value buffer 
-                  kv.getValueOffset(),      // value offset
-                  kv.getValueLength());     // value length
-      }
-    }
-    return kv;
-  }
-
-  // helper: make a map from sourceCfName to destCfName by parsing a config key
-  private static Map<byte[], byte[]> createCfRenameMap(Configuration conf) {
-    Map<byte[], byte[]> cfRenameMap = null;
-    String allMappingsPropVal = conf.get(CF_RENAME_PROP);
-    if(allMappingsPropVal != null) {
-      // The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
-      String[] allMappings = allMappingsPropVal.split(",");
-      for (String mapping: allMappings) {
-        if(cfRenameMap == null) {
-            cfRenameMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-        }
-        String [] srcAndDest = mapping.split(":");
-        if(srcAndDest.length != 2) {
-            continue;
-        }
-        cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
-      }
-    }
-    return cfRenameMap;
-  }
-
-  /**
-   * <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
-   * the mapper how to rename column families.
-   * 
-   * <p>Alternately, instead of calling this function, you could set the configuration key 
-   * {@link #CF_RENAME_PROP} yourself. The value should look like 
-   * <pre>srcCf1:destCf1,srcCf2:destCf2,....</pre>. This would have the same effect on
-   * the mapper behavior.
-   * 
-   * @param conf the Configuration in which the {@link #CF_RENAME_PROP} key will be
-   *  set
-   * @param renameMap a mapping from source CF names to destination CF names
-   */
-  static public void configureCfRenaming(Configuration conf, 
-          Map<String, String> renameMap) {
-    StringBuilder sb = new StringBuilder();
-    for(Map.Entry<String,String> entry: renameMap.entrySet()) {
-      String sourceCf = entry.getKey();
-      String destCf = entry.getValue();
-
-      if(sourceCf.contains(":") || sourceCf.contains(",") || 
-              destCf.contains(":") || destCf.contains(",")) {
-        throw new IllegalArgumentException("Illegal character in CF names: " 
-              + sourceCf + ", " + destCf);
-      }
-
-      if(sb.length() != 0) {
-        sb.append(",");
-      }
-      sb.append(sourceCf + ":" + destCf);
-    }
-    conf.set(CF_RENAME_PROP, sb.toString());
-  }
-
-  /**
-   * Add a Filter to be instantiated on import
-   * @param conf Configuration to update (will be passed to the job)
-   * @param clazz {@link Filter} subclass to instantiate on the server.
-   * @param filterArgs List of arguments to pass to the filter on instantiation
-   */
-  public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz,
-      List<String> filterArgs) throws IOException {
-    conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName());
-    conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()]));
-  }
-
-  /**
-   * Sets up the actual job.
-   * @param conf The current configuration.
-   * @param args The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public static Job createSubmittableJob(Configuration conf, String[] args)
-  throws IOException {
-    TableName tableName = TableName.valueOf(args[0]);
-    conf.set(TABLE_NAME, tableName.getNameAsString());
-    Path inputDir = new Path(args[1]);
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJarByClass(Importer.class);
-    FileInputFormat.setInputPaths(job, inputDir);
-    job.setInputFormatClass(SequenceFileInputFormat.class);
-    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
-
-    // make sure we get the filter in the jars
-    try {
-      Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
-      if (filter != null) {
-        TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
-      }
-    } catch (Exception e) {
-      throw new IOException(e);
-    }
-
-    if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
-      LOG.info("Use Large Result!!");
-      try (Connection conn = ConnectionFactory.createConnection(conf);
-          Table table = conn.getTable(tableName);
-          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
-        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
-        job.setMapperClass(KeyValueSortImporter.class);
-        job.setReducerClass(KeyValueReducer.class);
-        Path outputDir = new Path(hfileOutPath);
-        FileOutputFormat.setOutputPath(job, outputDir);
-        job.setMapOutputKeyClass(KeyValueWritableComparable.class);
-        job.setMapOutputValueClass(KeyValue.class);
-        job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class", 
-            KeyValueWritableComparable.KeyValueWritableComparator.class,
-            RawComparator.class);
-        Path partitionsPath = 
-            new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
-        FileSystem fs = FileSystem.get(job.getConfiguration());
-        fs.deleteOnExit(partitionsPath);
-        job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
-        job.setNumReduceTasks(regionLocator.getStartKeys().length);
-        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-            org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
-      }
-    } else if (hfileOutPath != null) {
-      LOG.info("writing to hfiles for bulk load.");
-      job.setMapperClass(KeyValueImporter.class);
-      try (Connection conn = ConnectionFactory.createConnection(conf); 
-          Table table = conn.getTable(tableName);
-          RegionLocator regionLocator = conn.getRegionLocator(tableName)){
-        job.setReducerClass(KeyValueSortReducer.class);
-        Path outputDir = new Path(hfileOutPath);
-        FileOutputFormat.setOutputPath(job, outputDir);
-        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-        job.setMapOutputValueClass(KeyValue.class);
-        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
-        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-            org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
-      }
-    } else {
-      LOG.info("writing directly to table from Mapper.");
-      // No reducers.  Just write straight to table.  Call initTableReducerJob
-      // because it sets up the TableOutputFormat.
-      job.setMapperClass(Importer.class);
-      TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
-      job.setNumReduceTasks(0);
-    }
-    return job;
-  }
-
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void usage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: Import [options] <tablename> <inputdir>");
-    System.err.println("By default Import will load data directly into HBase. To instead generate");
-    System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
-    System.err.println("  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
-    System.err.println("If there is a large result that includes too much KeyValue "
-        + "whitch can occur OOME caused by the memery sort in reducer, pass the option:");
-    System.err.println("  -D" + HAS_LARGE_RESULT + "=true");
-    System.err
-        .println(" To apply a generic org.apache.hadoop.hbase.filter.Filter to the input, use");
-    System.err.println("  -D" + FILTER_CLASS_CONF_KEY + "=<name of filter class>");
-    System.err.println("  -D" + FILTER_ARGS_CONF_KEY + "=<comma separated list of args for filter");
-    System.err.println(" NOTE: The filter will be applied BEFORE doing key renames via the "
-        + CF_RENAME_PROP + " property. Futher, filters will only use the"
-        + " Filter#filterRowKey(byte[] buffer, int offset, int length) method to identify "
-        + " whether the current row needs to be ignored completely for processing and "
-        + " Filter#filterKeyValue(KeyValue) method to determine if the KeyValue should be added;"
-        + " Filter.ReturnCode#INCLUDE and #INCLUDE_AND_NEXT_COL will be considered as including"
-        + " the KeyValue.");
-    System.err.println("To import data exported from HBase 0.94, use");
-    System.err.println("  -Dhbase.import.version=0.94");
-    System.err.println("  -D " + JOB_NAME_CONF_KEY
-        + "=jobName - use the specified mapreduce job name for the import");
-    System.err.println("For performance consider the following options:\n"
-        + "  -Dmapreduce.map.speculative=false\n"
-        + "  -Dmapreduce.reduce.speculative=false\n"
-        + "  -D" + WAL_DURABILITY + "=<Used while writing data to hbase."
-            +" Allowed values are the supported durability values"
-            +" like SKIP_WAL/ASYNC_WAL/SYNC_WAL/...>");
-  }
-
-  /**
-   * If the durability is set to {@link Durability#SKIP_WAL} and the data is imported to hbase, we
-   * need to flush all the regions of the table as the data is held in memory and is also not
-   * present in the Write Ahead Log to replay in scenarios of a crash. This method flushes all the
-   * regions of the table in the scenarios of import data to hbase with {@link Durability#SKIP_WAL}
-   */
-  public static void flushRegionsIfNecessary(Configuration conf) throws IOException,
-      InterruptedException {
-    String tableName = conf.get(TABLE_NAME);
-    Admin hAdmin = null;
-    Connection connection = null;
-    String durability = conf.get(WAL_DURABILITY);
-    // Need to flush if the data is written to hbase and skip wal is enabled.
-    if (conf.get(BULK_OUTPUT_CONF_KEY) == null && durability != null
-        && Durability.SKIP_WAL.name().equalsIgnoreCase(durability)) {
-      LOG.info("Flushing all data that skipped the WAL.");
-      try {
-        connection = ConnectionFactory.createConnection(conf);
-        hAdmin = connection.getAdmin();
-        hAdmin.flush(TableName.valueOf(tableName));
-      } finally {
-        if (hAdmin != null) {
-          hAdmin.close();
-        }
-        if (connection != null) {
-          connection.close();
-        }
-      }
-    }
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 2) {
-      usage("Wrong number of arguments: " + args.length);
-      return -1;
-    }
-    String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER);
-    if (inputVersionString != null) {
-      getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString);
-    }
-    Job job = createSubmittableJob(getConf(), args);
-    boolean isJobSuccessful = job.waitForCompletion(true);
-    if(isJobSuccessful){
-      // Flush all the regions of the table
-      flushRegionsIfNecessary(getConf());
-    }
-    long inputRecords = job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
-    long outputRecords = job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue();
-    if (outputRecords < inputRecords) {
-      System.err.println("Warning, not all records were imported (maybe filtered out).");
-      if (outputRecords == 0) {
-        System.err.println("If the data was exported from HBase 0.94 "+
-            "consider using -Dhbase.import.version=0.94.");
-      }
-    }
-
-    return (isJobSuccessful ? 0 : 1);
-  }
-
-  /**
-   * Main entry point.
-   * @param args The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int errCode = ToolRunner.run(HBaseConfiguration.create(), new Import(), args);
-    System.exit(errCode);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
deleted file mode 100644
index b64271e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
+++ /dev/null
@@ -1,793 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static java.lang.String.format;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-/**
- * Tool to import data from a TSV file.
- *
- * This tool is rather simplistic - it doesn't do any quoting or
- * escaping, but is useful for many data loads.
- *
- * @see ImportTsv#usage(String)
- */
-@InterfaceAudience.Public
-public class ImportTsv extends Configured implements Tool {
-
-  protected static final Log LOG = LogFactory.getLog(ImportTsv.class);
-
-  final static String NAME = "importtsv";
-
-  public final static String MAPPER_CONF_KEY = "importtsv.mapper.class";
-  public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output";
-  public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp";
-  public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-  // TODO: the rest of these configs are used exclusively by TsvImporterMapper.
-  // Move them out of the tool and let the mapper handle its own validation.
-  public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run";
-  // If true, bad lines are logged to stderr. Default: false.
-  public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines";
-  public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines";
-  public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns";
-  public final static String COLUMNS_CONF_KEY = "importtsv.columns";
-  public final static String SEPARATOR_CONF_KEY = "importtsv.separator";
-  public final static String ATTRIBUTE_SEPERATOR_CONF_KEY = "attributes.seperator";
-  //This config is used to propagate credentials from parent MR jobs which launch
-  //ImportTSV jobs. SEE IntegrationTestImportTsv.
-  public final static String CREDENTIALS_LOCATION = "credentials_location";
-  final static String DEFAULT_SEPARATOR = "\t";
-  final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>";
-  final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";
-  final static Class DEFAULT_MAPPER = TsvImporterMapper.class;
-  public final static String CREATE_TABLE_CONF_KEY = "create.table";
-  public final static String NO_STRICT_COL_FAMILY = "no.strict";
-  /**
-   * If table didn't exist and was created in dry-run mode, this flag is
-   * flipped to delete it when MR ends.
-   */
-  private static boolean DRY_RUN_TABLE_CREATED;
-
-  public static class TsvParser {
-    /**
-     * Column families and qualifiers mapped to the TSV columns
-     */
-    private final byte[][] families;
-    private final byte[][] qualifiers;
-
-    private final byte separatorByte;
-
-    private int rowKeyColumnIndex;
-
-    private int maxColumnCount;
-
-    // Default value must be negative
-    public static final int DEFAULT_TIMESTAMP_COLUMN_INDEX = -1;
-
-    private int timestampKeyColumnIndex = DEFAULT_TIMESTAMP_COLUMN_INDEX;
-
-    public static final String ROWKEY_COLUMN_SPEC = "HBASE_ROW_KEY";
-
-    public static final String TIMESTAMPKEY_COLUMN_SPEC = "HBASE_TS_KEY";
-
-    public static final String ATTRIBUTES_COLUMN_SPEC = "HBASE_ATTRIBUTES_KEY";
-
-    public static final String CELL_VISIBILITY_COLUMN_SPEC = "HBASE_CELL_VISIBILITY";
-
-    public static final String CELL_TTL_COLUMN_SPEC = "HBASE_CELL_TTL";
-
-    private int attrKeyColumnIndex = DEFAULT_ATTRIBUTES_COLUMN_INDEX;
-
-    public static final int DEFAULT_ATTRIBUTES_COLUMN_INDEX = -1;
-
-    public static final int DEFAULT_CELL_VISIBILITY_COLUMN_INDEX = -1;
-
-    public static final int DEFAULT_CELL_TTL_COLUMN_INDEX = -1;
-
-    private int cellVisibilityColumnIndex = DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-
-    private int cellTTLColumnIndex = DEFAULT_CELL_TTL_COLUMN_INDEX;
-
-    /**
-     * @param columnsSpecification the list of columns to parser out, comma separated.
-     * The row key should be the special token TsvParser.ROWKEY_COLUMN_SPEC
-     * @param separatorStr
-     */
-    public TsvParser(String columnsSpecification, String separatorStr) {
-      // Configure separator
-      byte[] separator = Bytes.toBytes(separatorStr);
-      Preconditions.checkArgument(separator.length == 1,
-        "TsvParser only supports single-byte separators");
-      separatorByte = separator[0];
-
-      // Configure columns
-      ArrayList<String> columnStrings = Lists.newArrayList(
-        Splitter.on(',').trimResults().split(columnsSpecification));
-
-      maxColumnCount = columnStrings.size();
-      families = new byte[maxColumnCount][];
-      qualifiers = new byte[maxColumnCount][];
-
-      for (int i = 0; i < columnStrings.size(); i++) {
-        String str = columnStrings.get(i);
-        if (ROWKEY_COLUMN_SPEC.equals(str)) {
-          rowKeyColumnIndex = i;
-          continue;
-        }
-        if (TIMESTAMPKEY_COLUMN_SPEC.equals(str)) {
-          timestampKeyColumnIndex = i;
-          continue;
-        }
-        if (ATTRIBUTES_COLUMN_SPEC.equals(str)) {
-          attrKeyColumnIndex = i;
-          continue;
-        }
-        if (CELL_VISIBILITY_COLUMN_SPEC.equals(str)) {
-          cellVisibilityColumnIndex = i;
-          continue;
-        }
-        if (CELL_TTL_COLUMN_SPEC.equals(str)) {
-          cellTTLColumnIndex = i;
-          continue;
-        }
-        String[] parts = str.split(":", 2);
-        if (parts.length == 1) {
-          families[i] = str.getBytes();
-          qualifiers[i] = HConstants.EMPTY_BYTE_ARRAY;
-        } else {
-          families[i] = parts[0].getBytes();
-          qualifiers[i] = parts[1].getBytes();
-        }
-      }
-    }
-
-    public boolean hasTimestamp() {
-      return timestampKeyColumnIndex != DEFAULT_TIMESTAMP_COLUMN_INDEX;
-    }
-
-    public int getTimestampKeyColumnIndex() {
-      return timestampKeyColumnIndex;
-    }
-
-    public boolean hasAttributes() {
-      return attrKeyColumnIndex != DEFAULT_ATTRIBUTES_COLUMN_INDEX;
-    }
-
-    public boolean hasCellVisibility() {
-      return cellVisibilityColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-    }
-
-    public boolean hasCellTTL() {
-      return cellTTLColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-    }
-
-    public int getAttributesKeyColumnIndex() {
-      return attrKeyColumnIndex;
-    }
-
-    public int getCellVisibilityColumnIndex() {
-      return cellVisibilityColumnIndex;
-    }
-
-    public int getCellTTLColumnIndex() {
-      return cellTTLColumnIndex;
-    }
-
-    public int getRowKeyColumnIndex() {
-      return rowKeyColumnIndex;
-    }
-
-    public byte[] getFamily(int idx) {
-      return families[idx];
-    }
-    public byte[] getQualifier(int idx) {
-      return qualifiers[idx];
-    }
-
-    public ParsedLine parse(byte[] lineBytes, int length)
-    throws BadTsvLineException {
-      // Enumerate separator offsets
-      ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount);
-      for (int i = 0; i < length; i++) {
-        if (lineBytes[i] == separatorByte) {
-          tabOffsets.add(i);
-        }
-      }
-      if (tabOffsets.isEmpty()) {
-        throw new BadTsvLineException("No delimiter");
-      }
-
-      tabOffsets.add(length);
-
-      if (tabOffsets.size() > maxColumnCount) {
-        throw new BadTsvLineException("Excessive columns");
-      } else if (tabOffsets.size() <= getRowKeyColumnIndex()) {
-        throw new BadTsvLineException("No row key");
-      } else if (hasTimestamp()
-          && tabOffsets.size() <= getTimestampKeyColumnIndex()) {
-        throw new BadTsvLineException("No timestamp");
-      } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) {
-        throw new BadTsvLineException("No attributes specified");
-      } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) {
-        throw new BadTsvLineException("No cell visibility specified");
-      } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) {
-        throw new BadTsvLineException("No cell TTL specified");
-      }
-      return new ParsedLine(tabOffsets, lineBytes);
-    }
-
-    class ParsedLine {
-      private final ArrayList<Integer> tabOffsets;
-      private byte[] lineBytes;
-
-      ParsedLine(ArrayList<Integer> tabOffsets, byte[] lineBytes) {
-        this.tabOffsets = tabOffsets;
-        this.lineBytes = lineBytes;
-      }
-
-      public int getRowKeyOffset() {
-        return getColumnOffset(rowKeyColumnIndex);
-      }
-      public int getRowKeyLength() {
-        return getColumnLength(rowKeyColumnIndex);
-      }
-
-      public long getTimestamp(long ts) throws BadTsvLineException {
-        // Return ts if HBASE_TS_KEY is not configured in column spec
-        if (!hasTimestamp()) {
-          return ts;
-        }
-
-        String timeStampStr = Bytes.toString(lineBytes,
-            getColumnOffset(timestampKeyColumnIndex),
-            getColumnLength(timestampKeyColumnIndex));
-        try {
-          return Long.parseLong(timeStampStr);
-        } catch (NumberFormatException nfe) {
-          // treat this record as bad record
-          throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
-        }
-      }
-
-      private String getAttributes() {
-        if (!hasAttributes()) {
-          return null;
-        } else {
-          return Bytes.toString(lineBytes, getColumnOffset(attrKeyColumnIndex),
-              getColumnLength(attrKeyColumnIndex));
-        }
-      }
-
-      public String[] getIndividualAttributes() {
-        String attributes = getAttributes();
-        if (attributes != null) {
-          return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR);
-        } else {
-          return null;
-        }
-      }
-
-      public int getAttributeKeyOffset() {
-        if (hasAttributes()) {
-          return getColumnOffset(attrKeyColumnIndex);
-        } else {
-          return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
-        }
-      }
-
-      public int getAttributeKeyLength() {
-        if (hasAttributes()) {
-          return getColumnLength(attrKeyColumnIndex);
-        } else {
-          return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
-        }
-      }
-
-      public int getCellVisibilityColumnOffset() {
-        if (hasCellVisibility()) {
-          return getColumnOffset(cellVisibilityColumnIndex);
-        } else {
-          return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-        }
-      }
-
-      public int getCellVisibilityColumnLength() {
-        if (hasCellVisibility()) {
-          return getColumnLength(cellVisibilityColumnIndex);
-        } else {
-          return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-        }
-      }
-
-      public String getCellVisibility() {
-        if (!hasCellVisibility()) {
-          return null;
-        } else {
-          return Bytes.toString(lineBytes, getColumnOffset(cellVisibilityColumnIndex),
-              getColumnLength(cellVisibilityColumnIndex));
-        }
-      }
-
-      public int getCellTTLColumnOffset() {
-        if (hasCellTTL()) {
-          return getColumnOffset(cellTTLColumnIndex);
-        } else {
-          return DEFAULT_CELL_TTL_COLUMN_INDEX;
-        }
-      }
-
-      public int getCellTTLColumnLength() {
-        if (hasCellTTL()) {
-          return getColumnLength(cellTTLColumnIndex);
-        } else {
-          return DEFAULT_CELL_TTL_COLUMN_INDEX;
-        }
-      }
-
-      public long getCellTTL() {
-        if (!hasCellTTL()) {
-          return 0;
-        } else {
-          return Bytes.toLong(lineBytes, getColumnOffset(cellTTLColumnIndex),
-              getColumnLength(cellTTLColumnIndex));
-        }
-      }
-
-      public int getColumnOffset(int idx) {
-        if (idx > 0)
-          return tabOffsets.get(idx - 1) + 1;
-        else
-          return 0;
-      }
-      public int getColumnLength(int idx) {
-        return tabOffsets.get(idx) - getColumnOffset(idx);
-      }
-      public int getColumnCount() {
-        return tabOffsets.size();
-      }
-      public byte[] getLineBytes() {
-        return lineBytes;
-      }
-    }
-
-    public static class BadTsvLineException extends Exception {
-      public BadTsvLineException(String err) {
-        super(err);
-      }
-      private static final long serialVersionUID = 1L;
-    }
-
-    /**
-     * Return starting position and length of row key from the specified line bytes.
-     * @param lineBytes
-     * @param length
-     * @return Pair of row key offset and length.
-     * @throws BadTsvLineException
-     */
-    public Pair<Integer, Integer> parseRowKey(byte[] lineBytes, int length)
-        throws BadTsvLineException {
-      int rkColumnIndex = 0;
-      int startPos = 0, endPos = 0;
-      for (int i = 0; i <= length; i++) {
-        if (i == length || lineBytes[i] == separatorByte) {
-          endPos = i - 1;
-          if (rkColumnIndex++ == getRowKeyColumnIndex()) {
-            if ((endPos + 1) == startPos) {
-              throw new BadTsvLineException("Empty value for ROW KEY.");
-            }
-            break;
-          } else {
-            startPos = endPos + 2;
-          }
-        }
-        if (i == length) {
-          throw new BadTsvLineException(
-              "Row key does not exist as number of columns in the line"
-                  + " are less than row key position.");
-        }
-      }
-      return new Pair<>(startPos, endPos - startPos + 1);
-    }
-  }
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param conf  The current configuration.
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  protected static Job createSubmittableJob(Configuration conf, String[] args)
-      throws IOException, ClassNotFoundException {
-    Job job = null;
-    boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
-    try (Connection connection = ConnectionFactory.createConnection(conf)) {
-      try (Admin admin = connection.getAdmin()) {
-        // Support non-XML supported characters
-        // by re-encoding the passed separator as a Base64 string.
-        String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
-        if (actualSeparator != null) {
-          conf.set(SEPARATOR_CONF_KEY,
-              Base64.encodeBytes(actualSeparator.getBytes()));
-        }
-
-        // See if a non-default Mapper was set
-        String mapperClassName = conf.get(MAPPER_CONF_KEY);
-        Class mapperClass = mapperClassName != null? Class.forName(mapperClassName): DEFAULT_MAPPER;
-
-        TableName tableName = TableName.valueOf(args[0]);
-        Path inputDir = new Path(args[1]);
-        String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
-        job = Job.getInstance(conf, jobName);
-        job.setJarByClass(mapperClass);
-        FileInputFormat.setInputPaths(job, inputDir);
-        job.setInputFormatClass(TextInputFormat.class);
-        job.setMapperClass(mapperClass);
-        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-        String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
-        String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
-        if(StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
-          String fileLoc = conf.get(CREDENTIALS_LOCATION);
-          Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
-          job.getCredentials().addAll(cred);
-        }
-
-        if (hfileOutPath != null) {
-          if (!admin.tableExists(tableName)) {
-            LOG.warn(format("Table '%s' does not exist.", tableName));
-            if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
-              // TODO: this is backwards. Instead of depending on the existence of a table,
-              // create a sane splits file for HFileOutputFormat based on data sampling.
-              createTable(admin, tableName, columns);
-              if (isDryRun) {
-                LOG.warn("Dry run: Table will be deleted at end of dry run.");
-                synchronized (ImportTsv.class) {
-                  DRY_RUN_TABLE_CREATED = true;
-                }
-              }
-            } else {
-              String errorMsg =
-                  format("Table '%s' does not exist and '%s' is set to no.", tableName,
-                      CREATE_TABLE_CONF_KEY);
-              LOG.error(errorMsg);
-              throw new TableNotFoundException(errorMsg);
-            }
-          }
-          try (Table table = connection.getTable(tableName);
-              RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
-            boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
-            // if no.strict is false then check column family
-            if(!noStrict) {
-              ArrayList<String> unmatchedFamilies = new ArrayList<>();
-              Set<String> cfSet = getColumnFamilies(columns);
-              TableDescriptor tDesc = table.getDescriptor();
-              for (String cf : cfSet) {
-                if(!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
-                  unmatchedFamilies.add(cf);
-                }
-              }
-              if(unmatchedFamilies.size() > 0) {
-                ArrayList<String> familyNames = new ArrayList<>();
-                for (ColumnFamilyDescriptor family : table.getDescriptor().getColumnFamilies()) {
-                  familyNames.add(family.getNameAsString());
-                }
-                String msg =
-                    "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY
-                    + " does not match with any of the table " + tableName
-                    + " column families " + familyNames + ".\n"
-                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
-                    + "=true.\n";
-                usage(msg);
-                System.exit(-1);
-              }
-            }
-            if (mapperClass.equals(TsvImporterTextMapper.class)) {
-              job.setMapOutputValueClass(Text.class);
-              job.setReducerClass(TextSortReducer.class);
-            } else {
-              job.setMapOutputValueClass(Put.class);
-              job.setCombinerClass(PutCombiner.class);
-              job.setReducerClass(PutSortReducer.class);
-            }
-            if (!isDryRun) {
-              Path outputDir = new Path(hfileOutPath);
-              FileOutputFormat.setOutputPath(job, outputDir);
-              HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
-                  regionLocator);
-            }
-          }
-        } else {
-          if (!admin.tableExists(tableName)) {
-            String errorMsg = format("Table '%s' does not exist.", tableName);
-            LOG.error(errorMsg);
-            throw new TableNotFoundException(errorMsg);
-          }
-          if (mapperClass.equals(TsvImporterTextMapper.class)) {
-            usage(TsvImporterTextMapper.class.toString()
-                + " should not be used for non bulkloading case. use "
-                + TsvImporterMapper.class.toString()
-                + " or custom mapper whose value type is Put.");
-            System.exit(-1);
-          }
-          if (!isDryRun) {
-            // No reducers. Just write straight to table. Call initTableReducerJob
-            // to set up the TableOutputFormat.
-            TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
-          }
-          job.setNumReduceTasks(0);
-        }
-        if (isDryRun) {
-          job.setOutputFormatClass(NullOutputFormat.class);
-          job.getConfiguration().setStrings("io.serializations",
-              job.getConfiguration().get("io.serializations"),
-              MutationSerialization.class.getName(), ResultSerialization.class.getName(),
-              KeyValueSerialization.class.getName());
-        }
-        TableMapReduceUtil.addDependencyJars(job);
-        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-            org.apache.hadoop.hbase.shaded.com.google.common.base.Function.class /* Guava used by TsvParser */);
-      }
-    }
-    return job;
-  }
-
-  private static void createTable(Admin admin, TableName tableName, String[] columns)
-      throws IOException {
-    HTableDescriptor htd = new HTableDescriptor(tableName);
-    Set<String> cfSet = getColumnFamilies(columns);
-    for (String cf : cfSet) {
-      HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
-      htd.addFamily(hcd);
-    }
-    LOG.warn(format("Creating table '%s' with '%s' columns and default descriptors.",
-      tableName, cfSet));
-    admin.createTable(htd);
-  }
-
-  private static void deleteTable(Configuration conf, String[] args) {
-    TableName tableName = TableName.valueOf(args[0]);
-    try (Connection connection = ConnectionFactory.createConnection(conf);
-         Admin admin = connection.getAdmin()) {
-      try {
-        admin.disableTable(tableName);
-      } catch (TableNotEnabledException e) {
-        LOG.debug("Dry mode: Table: " + tableName + " already disabled, so just deleting it.");
-      }
-      admin.deleteTable(tableName);
-    } catch (IOException e) {
-      LOG.error(format("***Dry run: Failed to delete table '%s'.***%n%s", tableName,
-          e.toString()));
-      return;
-    }
-    LOG.info(format("Dry run: Deleted table '%s'.", tableName));
-  }
-
-  private static Set<String> getColumnFamilies(String[] columns) {
-    Set<String> cfSet = new HashSet<>();
-    for (String aColumn : columns) {
-      if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)
-          || TsvParser.TIMESTAMPKEY_COLUMN_SPEC.equals(aColumn)
-          || TsvParser.CELL_VISIBILITY_COLUMN_SPEC.equals(aColumn)
-          || TsvParser.CELL_TTL_COLUMN_SPEC.equals(aColumn)
-          || TsvParser.ATTRIBUTES_COLUMN_SPEC.equals(aColumn))
-        continue;
-      // we are only concerned with the first one (in case this is a cf:cq)
-      cfSet.add(aColumn.split(":", 2)[0]);
-    }
-    return cfSet;
-  }
-
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void usage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    String usage =
-      "Usage: " + NAME + " -D"+ COLUMNS_CONF_KEY + "=a,b,c <tablename> <inputdir>\n" +
-      "\n" +
-      "Imports the given input directory of TSV data into the specified table.\n" +
-      "\n" +
-      "The column names of the TSV data must be specified using the -D" + COLUMNS_CONF_KEY + "\n" +
-      "option. This option takes the form of comma-separated column names, where each\n" +
-      "column name is either a simple column family, or a columnfamily:qualifier. The special\n" +
-      "column name " + TsvParser.ROWKEY_COLUMN_SPEC + " is used to designate that this column should be used\n" +
-      "as the row key for each imported record. You must specify exactly one column\n" +
-      "to be the row key, and you must specify a column name for every column that exists in the\n" +
-      "input data. Another special column" + TsvParser.TIMESTAMPKEY_COLUMN_SPEC +
-      " designates that this column should be\n" +
-      "used as timestamp for each record. Unlike " + TsvParser.ROWKEY_COLUMN_SPEC + ", " +
-      TsvParser.TIMESTAMPKEY_COLUMN_SPEC + " is optional." + "\n" +
-      "You must specify at most one column as timestamp key for each imported record.\n" +
-      "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n" +
-      "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n" +
-      "\n" +
-      "Other special columns that can be specified are " + TsvParser.CELL_TTL_COLUMN_SPEC +
-      " and " + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + ".\n" +
-      TsvParser.CELL_TTL_COLUMN_SPEC + " designates that this column will be used " +
-      "as a Cell's Time To Live (TTL) attribute.\n" +
-      TsvParser.CELL_VISIBILITY_COLUMN_SPEC + " designates that this column contains the " +
-      "visibility label expression.\n" +
-      "\n" +
-      TsvParser.ATTRIBUTES_COLUMN_SPEC+" can be used to specify Operation Attributes per record.\n"+
-      " Should be specified as key=>value where "+TsvParser.DEFAULT_ATTRIBUTES_COLUMN_INDEX+ " is used \n"+
-      " as the seperator.  Note that more than one OperationAttributes can be specified.\n"+
-      "By default importtsv will load data directly into HBase. To instead generate\n" +
-      "HFiles of data to prepare for a bulk data load, pass the option:\n" +
-      "  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output\n" +
-      "  Note: if you do not use this option, then the target table must already exist in HBase\n" +
-      "\n" +
-      "Other options that may be specified with -D include:\n" +
-      "  -D" + DRY_RUN_CONF_KEY + "=true - Dry run mode. Data is not actually populated into" +
-      " table. If table does not exist, it is created but deleted in the end.\n" +
-      "  -D" + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" +
-      "  -D" + LOG_BAD_LINES_CONF_KEY + "=true - logs invalid lines to stderr\n" +
-      "  -D" + SKIP_EMPTY_COLUMNS + "=false - If true then skip empty columns in bulk import\n" +
-      "  '-D" + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" +
-      "  -D" + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n" +
-      "  -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " +
-      DEFAULT_MAPPER.getName() + "\n" +
-      "  -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" +
-      "  -D" + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n" +
-      "  Note: if you set this to 'no', then the target table must already exist in HBase\n" +
-      "  -D" + NO_STRICT_COL_FAMILY + "=true - ignore column family check in hbase table. " +
-      "Default is false\n\n" +
-      "For performance consider the following options:\n" +
-      "  -Dmapreduce.map.speculative=false\n" +
-      "  -Dmapreduce.reduce.speculative=false";
-
-    System.err.println(usage);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 2) {
-      usage("Wrong number of arguments: " + args.length);
-      return -1;
-    }
-
-    // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
-    // perform validation on these additional args. When it's not null, user has provided their
-    // own mapper, thus these validation are not relevant.
-    // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
-    if (null == getConf().get(MAPPER_CONF_KEY)) {
-      // Make sure columns are specified
-      String[] columns = getConf().getStrings(COLUMNS_CONF_KEY);
-      if (columns == null) {
-        usage("No columns specified. Please specify with -D" +
-            COLUMNS_CONF_KEY+"=...");
-        return -1;
-      }
-
-      // Make sure they specify exactly one column as the row key
-      int rowkeysFound = 0;
-      for (String col : columns) {
-        if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++;
-      }
-      if (rowkeysFound != 1) {
-        usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
-        return -1;
-      }
-
-      // Make sure we have at most one column as the timestamp key
-      int tskeysFound = 0;
-      for (String col : columns) {
-        if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
-          tskeysFound++;
-      }
-      if (tskeysFound > 1) {
-        usage("Must specify at most one column as "
-            + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
-        return -1;
-      }
-
-      int attrKeysFound = 0;
-      for (String col : columns) {
-        if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
-          attrKeysFound++;
-      }
-      if (attrKeysFound > 1) {
-        usage("Must specify at most one column as "
-            + TsvParser.ATTRIBUTES_COLUMN_SPEC);
-        return -1;
-      }
-
-      // Make sure one or more columns are specified excluding rowkey and
-      // timestamp key
-      if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
-        usage("One or more columns in addition to the row key and timestamp(optional) are required");
-        return -1;
-      }
-    }
-
-    // If timestamp option is not specified, use current system time.
-    long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());
-
-    // Set it back to replace invalid timestamp (non-numeric) with current
-    // system time
-    getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);
-
-    synchronized (ImportTsv.class) {
-      DRY_RUN_TABLE_CREATED = false;
-    }
-    Job job = createSubmittableJob(getConf(), args);
-    boolean success = job.waitForCompletion(true);
-    boolean delete = false;
-    synchronized (ImportTsv.class) {
-      delete = DRY_RUN_TABLE_CREATED;
-    }
-    if (delete) {
-      deleteTable(getConf(), args);
-    }
-    return success ? 0 : 1;
-  }
-
-  public static void main(String[] args) throws Exception {
-    int status = ToolRunner.run(HBaseConfiguration.create(), new ImportTsv(), args);
-    System.exit(status);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
deleted file mode 100644
index 953df62..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.text.MessageFormat;
-import java.util.Enumeration;
-import java.util.jar.JarFile;
-import java.util.jar.JarOutputStream;
-import java.util.jar.Manifest;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
-
-/**
- * Finds the Jar for a class. If the class is in a directory in the
- * classpath, it creates a Jar on the fly with the contents of the directory
- * and returns the path to that Jar. If a Jar is created, it is created in
- * the system temporary directory.
- *
- * This file was forked from hadoop/common/branches/branch-2@1377176.
- */
-public class JarFinder {
-
-  private static void copyToZipStream(File file, ZipEntry entry,
-                              ZipOutputStream zos) throws IOException {
-    InputStream is = new FileInputStream(file);
-    try {
-      zos.putNextEntry(entry);
-      byte[] arr = new byte[4096];
-      int read = is.read(arr);
-      while (read > -1) {
-        zos.write(arr, 0, read);
-        read = is.read(arr);
-      }
-    } finally {
-      try {
-        is.close();
-      } finally {
-        zos.closeEntry();
-      }
-    }
-  }
-
-  public static void jarDir(File dir, String relativePath, ZipOutputStream zos)
-    throws IOException {
-    Preconditions.checkNotNull(relativePath, "relativePath");
-    Preconditions.checkNotNull(zos, "zos");
-
-    // by JAR spec, if there is a manifest, it must be the first entry in the
-    // ZIP.
-    File manifestFile = new File(dir, JarFile.MANIFEST_NAME);
-    ZipEntry manifestEntry = new ZipEntry(JarFile.MANIFEST_NAME);
-    if (!manifestFile.exists()) {
-      zos.putNextEntry(manifestEntry);
-      new Manifest().write(new BufferedOutputStream(zos));
-      zos.closeEntry();
-    } else {
-      copyToZipStream(manifestFile, manifestEntry, zos);
-    }
-    zos.closeEntry();
-    zipDir(dir, relativePath, zos, true);
-    zos.close();
-  }
-
-  private static void zipDir(File dir, String relativePath, ZipOutputStream zos,
-                             boolean start) throws IOException {
-    String[] dirList = dir.list();
-    if (dirList == null) {
-      return;
-    }
-    for (String aDirList : dirList) {
-      File f = new File(dir, aDirList);
-      if (!f.isHidden()) {
-        if (f.isDirectory()) {
-          if (!start) {
-            ZipEntry dirEntry = new ZipEntry(relativePath + f.getName() + "/");
-            zos.putNextEntry(dirEntry);
-            zos.closeEntry();
-          }
-          String filePath = f.getPath();
-          File file = new File(filePath);
-          zipDir(file, relativePath + f.getName() + "/", zos, false);
-        }
-        else {
-          String path = relativePath + f.getName();
-          if (!path.equals(JarFile.MANIFEST_NAME)) {
-            ZipEntry anEntry = new ZipEntry(path);
-            copyToZipStream(f, anEntry, zos);
-          }
-        }
-      }
-    }
-  }
-
-  private static void createJar(File dir, File jarFile) throws IOException {
-    Preconditions.checkNotNull(dir, "dir");
-    Preconditions.checkNotNull(jarFile, "jarFile");
-    File jarDir = jarFile.getParentFile();
-    if (!jarDir.exists()) {
-      if (!jarDir.mkdirs()) {
-        throw new IOException(MessageFormat.format("could not create dir [{0}]",
-                                                   jarDir));
-      }
-    }
-    try (FileOutputStream fos = new FileOutputStream(jarFile);
-         JarOutputStream jos = new JarOutputStream(fos)) {
-      jarDir(dir, "", jos);
-    }
-  }
-
-  /**
-   * Returns the full path to the Jar containing the class. It always return a
-   * JAR.
-   *
-   * @param klass class.
-   *
-   * @return path to the Jar containing the class.
-   */
-  public static String getJar(Class klass) {
-    Preconditions.checkNotNull(klass, "klass");
-    ClassLoader loader = klass.getClassLoader();
-    if (loader != null) {
-      String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
-      try {
-        for (Enumeration itr = loader.getResources(class_file);
-             itr.hasMoreElements(); ) {
-          URL url = (URL) itr.nextElement();
-          String path = url.getPath();
-          if (path.startsWith("file:")) {
-            path = path.substring("file:".length());
-          }
-          path = URLDecoder.decode(path, "UTF-8");
-          if ("jar".equals(url.getProtocol())) {
-            path = URLDecoder.decode(path, "UTF-8");
-            return path.replaceAll("!.*$", "");
-          }
-          else if ("file".equals(url.getProtocol())) {
-            String klassName = klass.getName();
-            klassName = klassName.replace(".", "/") + ".class";
-            path = path.substring(0, path.length() - klassName.length());
-            File baseDir = new File(path);
-            File testDir = new File(System.getProperty("test.build.dir", "target/test-dir"));
-            testDir = testDir.getAbsoluteFile();
-            if (!testDir.exists()) {
-              testDir.mkdirs();
-            }
-            File tempJar = File.createTempFile("hadoop-", "", testDir);
-            tempJar = new File(tempJar.getAbsolutePath() + ".jar");
-            tempJar.deleteOnExit();
-            createJar(baseDir, tempJar);
-            return tempJar.getAbsolutePath();
-          }
-        }
-      }
-      catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
deleted file mode 100644
index 241608b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class KeyValueSerialization implements Serialization<KeyValue> {
-  @Override
-  public boolean accept(Class<?> c) {
-    return KeyValue.class.isAssignableFrom(c);
-  }
-
-  @Override
-  public KeyValueDeserializer getDeserializer(Class<KeyValue> t) {
-    return new KeyValueDeserializer();
-  }
-
-  @Override
-  public KeyValueSerializer getSerializer(Class<KeyValue> c) {
-    return new KeyValueSerializer();
-  }
-
-  public static class KeyValueDeserializer implements Deserializer<KeyValue> {
-    private DataInputStream dis;
-
-    @Override
-    public void close() throws IOException {
-      this.dis.close();
-    }
-
-    @Override
-    public KeyValue deserialize(KeyValue ignore) throws IOException {
-      // I can't overwrite the passed in KV, not from a proto kv, not just yet.  TODO
-      return KeyValueUtil.create(this.dis);
-    }
-
-    @Override
-    public void open(InputStream is) throws IOException {
-      this.dis = new DataInputStream(is);
-    }
-  }
-
-  public static class KeyValueSerializer implements Serializer<KeyValue> {
-    private DataOutputStream dos;
-
-    @Override
-    public void close() throws IOException {
-      this.dos.close();
-    }
-
-    @Override
-    public void open(OutputStream os) throws IOException {
-      this.dos = new DataOutputStream(os);
-    }
-
-    @Override
-    public void serialize(KeyValue kv) throws IOException {
-      KeyValueUtil.write(kv, this.dos);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
deleted file mode 100644
index 5c7ace2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.util.TreeSet;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Emits sorted KeyValues.
- * Reads in all KeyValues from passed Iterator, sorts them, then emits
- * KeyValues in sorted order.  If lots of columns per row, it will use lots of
- * memory sorting.
- * @see HFileOutputFormat2
- */
-@InterfaceAudience.Public
-public class KeyValueSortReducer extends Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue> {
-  protected void reduce(ImmutableBytesWritable row, java.lang.Iterable<KeyValue> kvs,
-      org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
-  throws java.io.IOException, InterruptedException {
-    TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
-    for (KeyValue kv: kvs) {
-      try {
-        map.add(kv.clone());
-      } catch (CloneNotSupportedException e) {
-        throw new java.io.IOException(e);
-      }
-    }
-    context.setStatus("Read " + map.getClass());
-    int index = 0;
-    for (KeyValue kv: map) {
-      context.write(row, kv);
-      if (++index % 100 == 0) context.setStatus("Wrote " + index);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
deleted file mode 100644
index d7c7cc0..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.List;
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Create 3 level tree directory, first level is using table name as parent
- * directory and then use family name as child directory, and all related HFiles
- * for one family are under child directory
- * -tableName1
- *     -columnFamilyName1
- *     -columnFamilyName2
- *         -HFiles
- * -tableName2
- *     -columnFamilyName1
- *         -HFiles
- *     -columnFamilyName2
- */
-@InterfaceAudience.Public
-@VisibleForTesting
-public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
-  private static final Log LOG = LogFactory.getLog(MultiTableHFileOutputFormat.class);
-
-  /**
-   * Creates a composite key to use as a mapper output key when using
-   * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
-   *
-   * @param tableName Name of the Table - Eg: TableName.getNameAsString()
-   * @param suffix    Usually represents a rowkey when creating a mapper key or column family
-   * @return          byte[] representation of composite key
-   */
-  public static byte[] createCompositeKey(byte[] tableName,
-                                          byte[] suffix) {
-    return combineTableNameSuffix(tableName, suffix);
-  }
-
-  /**
-   * Alternate api which accepts an ImmutableBytesWritable for the suffix
-   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
-   */
-  public static byte[] createCompositeKey(byte[] tableName,
-                                          ImmutableBytesWritable suffix) {
-    return combineTableNameSuffix(tableName, suffix.get());
-  }
-
-  /**
-   * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
-   * suffix
-   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
-   */
-  public static byte[] createCompositeKey(String tableName,
-                                          ImmutableBytesWritable suffix) {
-    return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
-  }
-
-  /**
-   * Analogous to
-   * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
-   * this function will configure the requisite number of reducers to write HFiles for multple
-   * tables simultaneously
-   *
-   * @param job                   See {@link org.apache.hadoop.mapreduce.Job}
-   * @param multiTableDescriptors Table descriptor and region locator pairs
-   * @throws IOException
-   */
-  public static void configureIncrementalLoad(Job job, List<TableInfo>
-      multiTableDescriptors)
-      throws IOException {
-    MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
-            MultiTableHFileOutputFormat.class);
-  }
-
-  final private static int validateCompositeKey(byte[] keyBytes) {
-
-    int separatorIdx = Bytes.indexOf(keyBytes, HFileOutputFormat2.tableSeparator);
-
-    // Either the separator was not found or a tablename wasn't present or a key wasn't present
-    if (separatorIdx == -1) {
-      throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
-              .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
-    }
-    return separatorIdx;
-  }
-
-  protected static byte[] getTableName(byte[] keyBytes) {
-    int separatorIdx = validateCompositeKey(keyBytes);
-    return Bytes.copy(keyBytes, 0, separatorIdx);
-  }
-
-  protected static byte[] getSuffix(byte[] keyBytes) {
-    int separatorIdx = validateCompositeKey(keyBytes);
-    return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
deleted file mode 100644
index a8e6837..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
-
-/**
- * Convert HBase tabular data from multiple scanners into a format that 
- * is consumable by Map/Reduce.
- *
- * <p>
- * Usage example
- * </p>
- *
- * <pre>
- * List&lt;Scan&gt; scans = new ArrayList&lt;Scan&gt;();
- * 
- * Scan scan1 = new Scan();
- * scan1.setStartRow(firstRow1);
- * scan1.setStopRow(lastRow1);
- * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
- * scans.add(scan1);
- *
- * Scan scan2 = new Scan();
- * scan2.setStartRow(firstRow2);
- * scan2.setStopRow(lastRow2);
- * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
- * scans.add(scan2);
- *
- * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
- *     IntWritable.class, job);
- * </pre>
- */
-@InterfaceAudience.Public
-public class MultiTableInputFormat extends MultiTableInputFormatBase implements
-    Configurable {
-
-  /** Job parameter that specifies the scan list. */
-  public static final String SCANS = "hbase.mapreduce.scans";
-
-  /** The configuration. */
-  private Configuration conf = null;
-
-  /**
-   * Returns the current configuration.
-   *
-   * @return The current configuration.
-   * @see org.apache.hadoop.conf.Configurable#getConf()
-   */
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  /**
-   * Sets the configuration. This is used to set the details for the tables to
-   *  be scanned.
-   *
-   * @param configuration The configuration to set.
-   * @see org.apache.hadoop.conf.Configurable#setConf(
-   *        org.apache.hadoop.conf.Configuration)
-   */
-  @Override
-  public void setConf(Configuration configuration) {
-    this.conf = configuration;
-    String[] rawScans = conf.getStrings(SCANS);
-    if (rawScans.length <= 0) {
-      throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
-          + SCANS);
-    }
-    List<Scan> scans = new ArrayList<>();
-
-    for (int i = 0; i < rawScans.length; i++) {
-      try {
-        scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
-      } catch (IOException e) {
-        throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
-      }
-    }
-    this.setScans(scans);
-  }
-}

[24/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
new file mode 100644
index 0000000..694a359
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestMultithreadedTableMapper {
+  private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
+  private static final HBaseTestingUtility UTIL =
+      new HBaseTestingUtility();
+  static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
+  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+  static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
+  static final int    NUMBER_OF_THREADS = 10;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    // Up the handlers; this test needs more than usual.
+    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
+    UTIL.startMiniCluster();
+    Table table =
+        UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
+            OUTPUT_FAMILY });
+    UTIL.loadTable(table, INPUT_FAMILY, false);
+    UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Pass the given key and processed record reduce
+   */
+  public static class ProcessContentsMapper
+  extends TableMapper<ImmutableBytesWritable, Put> {
+
+    /**
+     * Pass the key, and reversed value to reduce
+     *
+     * @param key
+     * @param value
+     * @param context
+     * @throws IOException
+     */
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+        Context context)
+            throws IOException, InterruptedException {
+      if (value.size() != 1) {
+        throw new IOException("There should only be one input column");
+      }
+      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+      cf = value.getMap();
+      if(!cf.containsKey(INPUT_FAMILY)) {
+        throw new IOException("Wrong input columns. Missing: '" +
+            Bytes.toString(INPUT_FAMILY) + "'.");
+      }
+      // Get the original value and reverse it
+      String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+      StringBuilder newValue = new StringBuilder(originalValue);
+      newValue.reverse();
+      // Now set the value to be collected
+      Put outval = new Put(key.get());
+      outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+      context.write(key, outval);
+    }
+  }
+
+  /**
+   * Test multithreadedTableMappper map/reduce against a multi-region table
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testMultithreadedTableMapper()
+      throws IOException, InterruptedException, ClassNotFoundException {
+    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+  }
+
+  private void runTestOnTable(Table table)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    Job job = null;
+    try {
+      LOG.info("Before map/reduce startup");
+      job = new Job(table.getConfiguration(), "process column contents");
+      job.setNumReduceTasks(1);
+      Scan scan = new Scan();
+      scan.addFamily(INPUT_FAMILY);
+      TableMapReduceUtil.initTableMapperJob(
+          table.getName(), scan,
+          MultithreadedTableMapper.class, ImmutableBytesWritable.class,
+          Put.class, job);
+      MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
+      MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
+      TableMapReduceUtil.initTableReducerJob(
+          table.getName().getNameAsString(),
+          IdentityTableReducer.class, job);
+      FileOutputFormat.setOutputPath(job, new Path("test"));
+      LOG.info("Started " + table.getName());
+      assertTrue(job.waitForCompletion(true));
+      LOG.info("After map/reduce completion");
+      // verify map-reduce results
+      verify(table.getName());
+    } finally {
+      table.close();
+      if (job != null) {
+        FileUtil.fullyDelete(
+            new File(job.getConfiguration().get("hadoop.tmp.dir")));
+      }
+    }
+  }
+
+  private void verify(TableName tableName) throws IOException {
+    Table table = UTIL.getConnection().getTable(tableName);
+    boolean verified = false;
+    long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        LOG.info("Verification attempt #" + i);
+        verifyAttempt(table);
+        verified = true;
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty.  Presume its because updates came in
+        // after the scanner had been opened.  Wait a while and retry.
+        LOG.debug("Verification attempt failed: " + e.getMessage());
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    assertTrue(verified);
+    table.close();
+  }
+
+  /**
+   * Looks at every value of the mapreduce output and verifies that indeed
+   * the values have been reversed.
+   *
+   * @param table Table to scan.
+   * @throws IOException
+   * @throws NullPointerException if we failed to find a cell value
+   */
+  private void verifyAttempt(final Table table)
+      throws IOException, NullPointerException {
+    Scan scan = new Scan();
+    scan.addFamily(INPUT_FAMILY);
+    scan.addFamily(OUTPUT_FAMILY);
+    ResultScanner scanner = table.getScanner(scan);
+    try {
+      Iterator<Result> itr = scanner.iterator();
+      assertTrue(itr.hasNext());
+      while(itr.hasNext()) {
+        Result r = itr.next();
+        if (LOG.isDebugEnabled()) {
+          if (r.size() > 2 ) {
+            throw new IOException("Too many results, expected 2 got " +
+                r.size());
+          }
+        }
+        byte[] firstValue = null;
+        byte[] secondValue = null;
+        int count = 0;
+        for(Cell kv : r.listCells()) {
+          if (count == 0) {
+            firstValue = CellUtil.cloneValue(kv);
+          }else if (count == 1) {
+            secondValue = CellUtil.cloneValue(kv);
+          }else if (count == 2) {
+            break;
+          }
+          count++;
+        }
+        String first = "";
+        if (firstValue == null) {
+          throw new NullPointerException(Bytes.toString(r.getRow()) +
+              ": first value is null");
+        }
+        first = Bytes.toString(firstValue);
+        String second = "";
+        if (secondValue == null) {
+          throw new NullPointerException(Bytes.toString(r.getRow()) +
+              ": second value is null");
+        }
+        byte[] secondReversed = new byte[secondValue.length];
+        for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
+          secondReversed[i] = secondValue[j];
+        }
+        second = Bytes.toString(secondReversed);
+        if (first.compareTo(second) != 0) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("second key is not the reverse of first. row=" +
+                Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
+                ", second value=" + second);
+          }
+          fail();
+        }
+      }
+    } finally {
+      scanner.close();
+    }
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
new file mode 100644
index 0000000..301cfef
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static org.apache.hadoop.hbase.HConstants.DEFAULT_REGIONSERVER_PORT;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.when;
+
+@Category({MiscTests.class, SmallTests.class})
+public class TestRegionSizeCalculator {
+
+  private Configuration configuration = new Configuration();
+  private final long megabyte = 1024L * 1024L;
+  private final ServerName sn = ServerName.valueOf("local-rs", DEFAULT_REGIONSERVER_PORT,
+      ServerName.NON_STARTCODE);
+
+  @Test
+  public void testSimpleTestCase() throws Exception {
+
+    RegionLocator regionLocator = mockRegionLocator("region1", "region2", "region3");
+
+    Admin admin = mockAdmin(
+        mockRegion("region1", 123),
+        mockRegion("region3", 1232),
+        mockRegion("region2",  54321)
+    );
+
+    RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
+
+    assertEquals(123 * megabyte, calculator.getRegionSize("region1".getBytes()));
+    assertEquals(54321 * megabyte, calculator.getRegionSize("region2".getBytes()));
+    assertEquals(1232 * megabyte, calculator.getRegionSize("region3".getBytes()));
+    // if regionCalculator does not know about a region, it should return 0
+    assertEquals(0 * megabyte, calculator.getRegionSize("otherTableRegion".getBytes()));
+
+    assertEquals(3, calculator.getRegionSizeMap().size());
+  }
+
+
+  /**
+   * When size of region in megabytes is larger than largest possible integer there could be
+   * error caused by lost of precision.
+   * */
+  @Test
+  public void testLargeRegion() throws Exception {
+
+    RegionLocator regionLocator = mockRegionLocator("largeRegion");
+
+    Admin admin = mockAdmin(
+        mockRegion("largeRegion", Integer.MAX_VALUE)
+    );
+
+    RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
+
+    assertEquals(((long) Integer.MAX_VALUE) * megabyte, calculator.getRegionSize("largeRegion".getBytes()));
+  }
+
+  /** When calculator is disabled, it should return 0 for each request.*/
+  @Test
+  public void testDisabled() throws Exception {
+    String regionName = "cz.goout:/index.html";
+    RegionLocator table = mockRegionLocator(regionName);
+
+    Admin admin = mockAdmin(
+        mockRegion(regionName, 999)
+    );
+
+    //first request on enabled calculator
+    RegionSizeCalculator calculator = new RegionSizeCalculator(table, admin);
+    assertEquals(999 * megabyte, calculator.getRegionSize(regionName.getBytes()));
+
+    //then disabled calculator.
+    configuration.setBoolean(RegionSizeCalculator.ENABLE_REGIONSIZECALCULATOR, false);
+    RegionSizeCalculator disabledCalculator = new RegionSizeCalculator(table, admin);
+    assertEquals(0 * megabyte, disabledCalculator.getRegionSize(regionName.getBytes()));
+
+    assertEquals(0, disabledCalculator.getRegionSizeMap().size());
+  }
+
+  /**
+   * Makes some table with given region names.
+   * */
+  private RegionLocator mockRegionLocator(String... regionNames) throws IOException {
+    RegionLocator mockedTable = Mockito.mock(RegionLocator.class);
+    when(mockedTable.getName()).thenReturn(TableName.valueOf("sizeTestTable"));
+    List<HRegionLocation> regionLocations = new ArrayList<>(regionNames.length);
+    when(mockedTable.getAllRegionLocations()).thenReturn(regionLocations);
+
+    for (String regionName : regionNames) {
+      HRegionInfo info = Mockito.mock(HRegionInfo.class);
+      when(info.getRegionName()).thenReturn(regionName.getBytes());
+      regionLocations.add(new HRegionLocation(info, sn));
+    }
+
+    return mockedTable;
+  }
+
+  /**
+   * Creates mock returning RegionLoad info about given servers.
+  */
+  private Admin mockAdmin(RegionLoad... regionLoadArray) throws Exception {
+    Admin mockAdmin = Mockito.mock(Admin.class);
+    Map<byte[], RegionLoad> regionLoads = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (RegionLoad regionLoad : regionLoadArray) {
+      regionLoads.put(regionLoad.getName(), regionLoad);
+    }
+    when(mockAdmin.getConfiguration()).thenReturn(configuration);
+    when(mockAdmin.getRegionLoad(sn, TableName.valueOf("sizeTestTable"))).thenReturn(regionLoads);
+    return mockAdmin;
+  }
+
+  /**
+   * Creates mock of region with given name and size.
+   *
+   * @param  fileSizeMb number of megabytes occupied by region in file store in megabytes
+   * */
+  private RegionLoad mockRegion(String regionName, int fileSizeMb) {
+    RegionLoad region = Mockito.mock(RegionLoad.class);
+    when(region.getName()).thenReturn(regionName.getBytes());
+    when(region.getNameAsString()).thenReturn(regionName);
+    when(region.getStorefileSizeMB()).thenReturn(fileSizeMb);
+    return region;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
new file mode 100644
index 0000000..3b84e2d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -0,0 +1,400 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Test the rowcounter map reduce job.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestRowCounter {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private static final Log LOG = LogFactory.getLog(TestRowCounter.class);
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private final static String TABLE_NAME = "testRowCounter";
+  private final static String TABLE_NAME_TS_RANGE = "testRowCounter_ts_range";
+  private final static String COL_FAM = "col_fam";
+  private final static String COL1 = "c1";
+  private final static String COL2 = "c2";
+  private final static String COMPOSITE_COLUMN = "C:A:A";
+  private final static int TOTAL_ROWS = 10;
+  private final static int ROWS_WITH_ONE_COL = 2;
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster();
+    Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes(COL_FAM));
+    writeRows(table, TOTAL_ROWS, ROWS_WITH_ONE_COL);
+    table.close();
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test a case when no column was specified in command line arguments.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterNoColumn() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME
+    };
+    runRowCount(args, 10);
+  }
+
+  /**
+   * Test a case when the column specified in command line arguments is
+   * exclusive for few rows.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterExclusiveColumn() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, COL_FAM + ":" + COL1
+    };
+    runRowCount(args, 8);
+  }
+
+  /**
+   * Test a case when the column specified in command line arguments is
+   * one for which the qualifier contains colons.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterColumnWithColonInQualifier() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
+    };
+    runRowCount(args, 8);
+  }
+
+  /**
+   * Test a case when the column specified in command line arguments is not part
+   * of first KV for a row.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterHiddenColumn() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, COL_FAM + ":" + COL2
+    };
+    runRowCount(args, 10);
+  }
+
+
+  /**
+   * Test a case when the column specified in command line arguments is
+   * exclusive for few rows and also a row range filter is specified
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterColumnAndRowRange() throws Exception {
+    String[] args = new String[] {
+            TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
+    };
+    runRowCount(args, 8);
+  }
+
+  /**
+   * Test a case when a range is specified with single range of start-end keys
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterRowSingleRange() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, "--range=\\x00row1,\\x00row3"
+    };
+    runRowCount(args, 2);
+  }
+
+  /**
+   * Test a case when a range is specified with single range with end key only
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterRowSingleRangeUpperBound() throws Exception {
+    String[] args = new String[] {
+      TABLE_NAME, "--range=,\\x00row3"
+    };
+    runRowCount(args, 3);
+  }
+
+  /**
+   * Test a case when a range is specified with two ranges where one range is with end key only
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterRowMultiRangeUpperBound() throws Exception {
+    String[] args = new String[] {
+      TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7"
+    };
+    runRowCount(args, 5);
+  }
+
+  /**
+   * Test a case when a range is specified with multiple ranges of start-end keys
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterRowMultiRange() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
+    };
+    runRowCount(args, 5);
+  }
+
+  /**
+   * Test a case when a range is specified with multiple ranges of start-end keys;
+   * one range is filled, another two are not
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterRowMultiEmptyRange() throws Exception {
+    String[] args = new String[] {
+        TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
+    };
+    runRowCount(args, 2);
+  }
+
+  @Test
+  public void testRowCounter10kRowRange() throws Exception {
+    String tableName = TABLE_NAME + "10k";
+
+    try (Table table = TEST_UTIL.createTable(
+      TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
+      writeRows(table, 10000, 0);
+    }
+    String[] args = new String[] {
+      tableName, "--range=\\x00row9872,\\x00row9875"
+    };
+    runRowCount(args, 3);
+  }
+
+  /**
+   * Test a case when the timerange is specified with --starttime and --endtime options
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRowCounterTimeRange() throws Exception {
+    final byte[] family = Bytes.toBytes(COL_FAM);
+    final byte[] col1 = Bytes.toBytes(COL1);
+    Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
+    Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
+    Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
+
+    long ts;
+
+    // clean up content of TABLE_NAME
+    Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_TS_RANGE), Bytes.toBytes(COL_FAM));
+
+    ts = System.currentTimeMillis();
+    put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
+    table.put(put1);
+    Thread.sleep(100);
+
+    ts = System.currentTimeMillis();
+    put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
+    put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
+    table.put(put2);
+    table.put(put3);
+    table.close();
+
+    String[] args = new String[] {
+        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+        "--starttime=" + 0,
+        "--endtime=" + ts
+    };
+    runRowCount(args, 1);
+
+    args = new String[] {
+        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+        "--starttime=" + 0,
+        "--endtime=" + (ts - 10)
+    };
+    runRowCount(args, 1);
+
+    args = new String[] {
+        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+        "--starttime=" + ts,
+        "--endtime=" + (ts + 1000)
+    };
+    runRowCount(args, 2);
+
+    args = new String[] {
+        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+        "--starttime=" + (ts - 30 * 1000),
+        "--endtime=" + (ts + 30 * 1000),
+    };
+    runRowCount(args, 3);
+  }
+
+  /**
+   * Run the RowCounter map reduce job and verify the row count.
+   *
+   * @param args the command line arguments to be used for rowcounter job.
+   * @param expectedCount the expected row count (result of map reduce job).
+   * @throws Exception
+   */
+  private void runRowCount(String[] args, int expectedCount) throws Exception {
+    Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
+    long start = System.currentTimeMillis();
+    job.waitForCompletion(true);
+    long duration = System.currentTimeMillis() - start;
+    LOG.debug("row count duration (ms): " + duration);
+    assertTrue(job.isSuccessful());
+    Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
+    assertEquals(expectedCount, counter.getValue());
+  }
+
+  /**
+   * Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have
+   * two columns, Few have one.
+   *
+   * @param table
+   * @throws IOException
+   */
+  private static void writeRows(Table table, int totalRows, int rowsWithOneCol) throws IOException {
+    final byte[] family = Bytes.toBytes(COL_FAM);
+    final byte[] value = Bytes.toBytes("abcd");
+    final byte[] col1 = Bytes.toBytes(COL1);
+    final byte[] col2 = Bytes.toBytes(COL2);
+    final byte[] col3 = Bytes.toBytes(COMPOSITE_COLUMN);
+    ArrayList<Put> rowsUpdate = new ArrayList<>();
+    // write few rows with two columns
+    int i = 0;
+    for (; i < totalRows - rowsWithOneCol; i++) {
+      // Use binary rows values to test for HBASE-15287.
+      byte[] row = Bytes.toBytesBinary("\\x00row" + i);
+      Put put = new Put(row);
+      put.addColumn(family, col1, value);
+      put.addColumn(family, col2, value);
+      put.addColumn(family, col3, value);
+      rowsUpdate.add(put);
+    }
+
+    // write few rows with only one column
+    for (; i < totalRows; i++) {
+      byte[] row = Bytes.toBytes("row" + i);
+      Put put = new Put(row);
+      put.addColumn(family, col2, value);
+      rowsUpdate.add(put);
+    }
+    table.put(rowsUpdate);
+  }
+
+  /**
+   * test main method. Import should print help and call System.exit
+   */
+  @Test
+  public void testImportMain() throws Exception {
+    PrintStream oldPrintStream = System.err;
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    String[] args = {};
+    System.setErr(new PrintStream(data));
+    try {
+      System.setErr(new PrintStream(data));
+
+      try {
+        RowCounter.main(args);
+        fail("should be SecurityException");
+      } catch (SecurityException e) {
+        assertEquals(-1, newSecurityManager.getExitCode());
+        assertTrue(data.toString().contains("Wrong number of parameters:"));
+        assertTrue(data.toString().contains(
+            "Usage: RowCounter [options] <tablename> " +
+            "[--starttime=[start] --endtime=[end] " +
+            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
+            "[<column1> <column2>...]"));
+        assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
+        assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
+      }
+      data.reset();
+      try {
+        args = new String[2];
+        args[0] = "table";
+        args[1] = "--range=1";
+        RowCounter.main(args);
+        fail("should be SecurityException");
+      } catch (SecurityException e) {
+        assertEquals(-1, newSecurityManager.getExitCode());
+        assertTrue(data.toString().contains(
+            "Please specify range in such format as \"--range=a,b\" or, with only one boundary," +
+            " \"--range=,b\" or \"--range=a,\""));
+        assertTrue(data.toString().contains(
+            "Usage: RowCounter [options] <tablename> " +
+            "[--starttime=[start] --endtime=[end] " +
+            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
+            "[<column1> <column2>...]"));
+      }
+
+    } finally {
+      System.setErr(oldPrintStream);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
new file mode 100644
index 0000000..78fddbc
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Reruns TestLoadIncrementalHFiles using LoadIncrementalHFiles in secure mode.
+ * This suite is unable to verify the security handoff/turnover
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestSecureLoadIncrementalHFiles extends  TestLoadIncrementalHFiles{
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    // set the always on security provider
+    UserProvider.setUserProviderForTesting(util.getConfiguration(),
+      HadoopSecurityEnabledUserProviderForTesting.class);
+    // setup configuration
+    SecureTestUtil.enableSecurity(util.getConfiguration());
+    util.getConfiguration().setInt(
+        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
+        MAX_FILES_PER_REGION_PER_FAMILY);
+    // change default behavior so that tag values are returned with normal rpcs
+    util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
+        KeyValueCodecWithTags.class.getCanonicalName());
+
+    util.startMiniCluster();
+
+    // Wait for the ACL table to become available
+    util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+
+    setupNamespace();
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
new file mode 100644
index 0000000..0e877ad
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+
+/**
+ * Reruns TestSecureLoadIncrementalHFilesSplitRecovery
+ * using LoadIncrementalHFiles in secure mode.
+ * This suite is unable to verify the security handoff/turnove
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestSecureLoadIncrementalHFilesSplitRecovery extends TestLoadIncrementalHFilesSplitRecovery {
+
+  //This "overrides" the parent static method
+  //make sure they are in sync
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    util = new HBaseTestingUtility();
+    // set the always on security provider
+    UserProvider.setUserProviderForTesting(util.getConfiguration(),
+      HadoopSecurityEnabledUserProviderForTesting.class);
+    // setup configuration
+    SecureTestUtil.enableSecurity(util.getConfiguration());
+
+    util.startMiniCluster();
+
+    // Wait for the ACL table to become available
+    util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+  }
+
+  //Disabling this test as it does not work in secure mode
+  @Test (timeout=180000)
+  @Override
+  public void testBulkLoadPhaseFailure() {
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
new file mode 100644
index 0000000..5629cb4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
@@ -0,0 +1,81 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.experimental.categories.Category;
+
+import org.junit.Test;
+
+/**
+ * Test of simple partitioner.
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestSimpleTotalOrderPartitioner {
+  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  Configuration conf = TEST_UTIL.getConfiguration();
+
+  @Test
+  public void testSplit() throws Exception {
+    String start = "a";
+    String end = "{";
+    SimpleTotalOrderPartitioner<byte []> p = new SimpleTotalOrderPartitioner<>();
+
+    this.conf.set(SimpleTotalOrderPartitioner.START, start);
+    this.conf.set(SimpleTotalOrderPartitioner.END, end);
+    p.setConf(this.conf);
+    ImmutableBytesWritable c = new ImmutableBytesWritable(Bytes.toBytes("c"));
+    // If one reduce, partition should be 0.
+    int partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 1);
+    assertEquals(0, partition);
+    // If two reduces, partition should be 0.
+    partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 2);
+    assertEquals(0, partition);
+    // Divide in 3.
+    partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 3);
+    assertEquals(0, partition);
+    ImmutableBytesWritable q = new ImmutableBytesWritable(Bytes.toBytes("q"));
+    partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 2);
+    assertEquals(1, partition);
+    partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 3);
+    assertEquals(2, partition);
+    // What about end and start keys.
+    ImmutableBytesWritable startBytes =
+      new ImmutableBytesWritable(Bytes.toBytes(start));
+    partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
+    assertEquals(0, partition);
+    partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
+    assertEquals(0, partition);
+    ImmutableBytesWritable endBytes =
+      new ImmutableBytesWritable(Bytes.toBytes("z"));
+    partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
+    assertEquals(1, partition);
+    partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
+    assertEquals(2, partition);
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
new file mode 100644
index 0000000..9a0c160
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
@@ -0,0 +1,339 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+
+/**
+ * Basic test for the SyncTable M/R tool
+ */
+@Category(LargeTests.class)
+public class TestSyncTable {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  private static byte[][] generateSplits(int numRows, int numRegions) {
+    byte[][] splitRows = new byte[numRegions-1][];
+    for (int i = 1; i < numRegions; i++) {
+      splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
+    }
+    return splitRows;
+  }
+
+  @Test
+  public void testSyncTable() throws Exception {
+    final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source");
+    final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target");
+    Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
+
+    writeTestData(sourceTableName, targetTableName);
+    hashSourceTable(sourceTableName, testDir);
+    Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
+    assertEqualTables(90, sourceTableName, targetTableName);
+
+    assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
+    assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
+    assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
+    assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
+    assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
+    assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
+
+    TEST_UTIL.deleteTable(sourceTableName);
+    TEST_UTIL.deleteTable(targetTableName);
+    TEST_UTIL.cleanupDataTestDirOnTestFS();
+  }
+
+  private void assertEqualTables(int expectedRows, TableName sourceTableName,
+      TableName targetTableName) throws Exception {
+    Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName);
+    Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName);
+
+    ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
+    ResultScanner targetScanner = targetTable.getScanner(new Scan());
+
+    for (int i = 0; i < expectedRows; i++) {
+      Result sourceRow = sourceScanner.next();
+      Result targetRow = targetScanner.next();
+
+      LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
+          + " cells:" + sourceRow);
+      LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
+          + " cells:" + targetRow);
+
+      if (sourceRow == null) {
+        Assert.fail("Expected " + expectedRows
+            + " source rows but only found " + i);
+      }
+      if (targetRow == null) {
+        Assert.fail("Expected " + expectedRows
+            + " target rows but only found " + i);
+      }
+      Cell[] sourceCells = sourceRow.rawCells();
+      Cell[] targetCells = targetRow.rawCells();
+      if (sourceCells.length != targetCells.length) {
+        LOG.debug("Source cells: " + Arrays.toString(sourceCells));
+        LOG.debug("Target cells: " + Arrays.toString(targetCells));
+        Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
+            + " has " + sourceCells.length
+            + " cells in source table but " + targetCells.length
+            + " cells in target table");
+      }
+      for (int j = 0; j < sourceCells.length; j++) {
+        Cell sourceCell = sourceCells[j];
+        Cell targetCell = targetCells[j];
+        try {
+          if (!CellUtil.matchingRow(sourceCell, targetCell)) {
+            Assert.fail("Rows don't match");
+          }
+          if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
+            Assert.fail("Families don't match");
+          }
+          if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
+            Assert.fail("Qualifiers don't match");
+          }
+          if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
+            Assert.fail("Timestamps don't match");
+          }
+          if (!CellUtil.matchingValue(sourceCell, targetCell)) {
+            Assert.fail("Values don't match");
+          }
+        } catch (Throwable t) {
+          LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
+          Throwables.propagate(t);
+        }
+      }
+    }
+    Result sourceRow = sourceScanner.next();
+    if (sourceRow != null) {
+      Assert.fail("Source table has more than " + expectedRows
+          + " rows.  Next row: " + Bytes.toInt(sourceRow.getRow()));
+    }
+    Result targetRow = targetScanner.next();
+    if (targetRow != null) {
+      Assert.fail("Target table has more than " + expectedRows
+          + " rows.  Next row: " + Bytes.toInt(targetRow.getRow()));
+    }
+    sourceScanner.close();
+    targetScanner.close();
+    sourceTable.close();
+    targetTable.close();
+  }
+
+  private Counters syncTables(TableName sourceTableName, TableName targetTableName,
+      Path testDir) throws Exception {
+    SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
+    int code = syncTable.run(new String[] {
+        testDir.toString(),
+        sourceTableName.getNameAsString(),
+        targetTableName.getNameAsString()
+        });
+    assertEquals("sync table job failed", 0, code);
+
+    LOG.info("Sync tables completed");
+    return syncTable.counters;
+  }
+
+  private void hashSourceTable(TableName sourceTableName, Path testDir)
+      throws Exception, IOException {
+    int numHashFiles = 3;
+    long batchSize = 100;  // should be 2 batches per region
+    int scanBatch = 1;
+    HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
+    int code = hashTable.run(new String[] {
+        "--batchsize=" + batchSize,
+        "--numhashfiles=" + numHashFiles,
+        "--scanbatch=" + scanBatch,
+        sourceTableName.getNameAsString(),
+        testDir.toString()});
+    assertEquals("hash table job failed", 0, code);
+
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+
+    HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
+    assertEquals(sourceTableName.getNameAsString(), tableHash.tableName);
+    assertEquals(batchSize, tableHash.batchSize);
+    assertEquals(numHashFiles, tableHash.numHashFiles);
+    assertEquals(numHashFiles - 1, tableHash.partitions.size());
+
+    LOG.info("Hash table completed");
+  }
+
+  private void writeTestData(TableName sourceTableName, TableName targetTableName)
+      throws Exception {
+    final byte[] family = Bytes.toBytes("family");
+    final byte[] column1 = Bytes.toBytes("c1");
+    final byte[] column2 = Bytes.toBytes("c2");
+    final byte[] value1 = Bytes.toBytes("val1");
+    final byte[] value2 = Bytes.toBytes("val2");
+    final byte[] value3 = Bytes.toBytes("val3");
+
+    int numRows = 100;
+    int sourceRegions = 10;
+    int targetRegions = 6;
+
+    Table sourceTable = TEST_UTIL.createTable(sourceTableName,
+        family, generateSplits(numRows, sourceRegions));
+
+    Table targetTable = TEST_UTIL.createTable(targetTableName,
+        family, generateSplits(numRows, targetRegions));
+
+    long timestamp = 1430764183454L;
+
+    int rowIndex = 0;
+    // a bunch of identical rows
+    for (; rowIndex < 40; rowIndex++) {
+      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+      sourcePut.addColumn(family, column1, timestamp, value1);
+      sourcePut.addColumn(family, column2, timestamp, value2);
+      sourceTable.put(sourcePut);
+
+      Put targetPut = new Put(Bytes.toBytes(rowIndex));
+      targetPut.addColumn(family, column1, timestamp, value1);
+      targetPut.addColumn(family, column2, timestamp, value2);
+      targetTable.put(targetPut);
+    }
+    // some rows only in the source table
+    // ROWSWITHDIFFS: 10
+    // TARGETMISSINGROWS: 10
+    // TARGETMISSINGCELLS: 20
+    for (; rowIndex < 50; rowIndex++) {
+      Put put = new Put(Bytes.toBytes(rowIndex));
+      put.addColumn(family, column1, timestamp, value1);
+      put.addColumn(family, column2, timestamp, value2);
+      sourceTable.put(put);
+    }
+    // some rows only in the target table
+    // ROWSWITHDIFFS: 10
+    // SOURCEMISSINGROWS: 10
+    // SOURCEMISSINGCELLS: 20
+    for (; rowIndex < 60; rowIndex++) {
+      Put put = new Put(Bytes.toBytes(rowIndex));
+      put.addColumn(family, column1, timestamp, value1);
+      put.addColumn(family, column2, timestamp, value2);
+      targetTable.put(put);
+    }
+    // some rows with 1 missing cell in target table
+    // ROWSWITHDIFFS: 10
+    // TARGETMISSINGCELLS: 10
+    for (; rowIndex < 70; rowIndex++) {
+      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+      sourcePut.addColumn(family, column1, timestamp, value1);
+      sourcePut.addColumn(family, column2, timestamp, value2);
+      sourceTable.put(sourcePut);
+
+      Put targetPut = new Put(Bytes.toBytes(rowIndex));
+      targetPut.addColumn(family, column1, timestamp, value1);
+      targetTable.put(targetPut);
+    }
+    // some rows with 1 missing cell in source table
+    // ROWSWITHDIFFS: 10
+    // SOURCEMISSINGCELLS: 10
+    for (; rowIndex < 80; rowIndex++) {
+      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+      sourcePut.addColumn(family, column1, timestamp, value1);
+      sourceTable.put(sourcePut);
+
+      Put targetPut = new Put(Bytes.toBytes(rowIndex));
+      targetPut.addColumn(family, column1, timestamp, value1);
+      targetPut.addColumn(family, column2, timestamp, value2);
+      targetTable.put(targetPut);
+    }
+    // some rows differing only in timestamp
+    // ROWSWITHDIFFS: 10
+    // SOURCEMISSINGCELLS: 20
+    // TARGETMISSINGCELLS: 20
+    for (; rowIndex < 90; rowIndex++) {
+      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+      sourcePut.addColumn(family, column1, timestamp, column1);
+      sourcePut.addColumn(family, column2, timestamp, value2);
+      sourceTable.put(sourcePut);
+
+      Put targetPut = new Put(Bytes.toBytes(rowIndex));
+      targetPut.addColumn(family, column1, timestamp+1, column1);
+      targetPut.addColumn(family, column2, timestamp-1, value2);
+      targetTable.put(targetPut);
+    }
+    // some rows with different values
+    // ROWSWITHDIFFS: 10
+    // DIFFERENTCELLVALUES: 20
+    for (; rowIndex < numRows; rowIndex++) {
+      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+      sourcePut.addColumn(family, column1, timestamp, value1);
+      sourcePut.addColumn(family, column2, timestamp, value2);
+      sourceTable.put(sourcePut);
+
+      Put targetPut = new Put(Bytes.toBytes(rowIndex));
+      targetPut.addColumn(family, column1, timestamp, value3);
+      targetPut.addColumn(family, column2, timestamp, value3);
+      targetTable.put(targetPut);
+    }
+
+    sourceTable.close();
+    targetTable.close();
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
new file mode 100644
index 0000000..b4c6ab9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
@@ -0,0 +1,481 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * This tests the TableInputFormat and its recovery semantics
+ *
+ */
+@Category(LargeTests.class)
+public class TestTableInputFormat {
+
+  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
+
+  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static MiniMRCluster mrCluster;
+  static final byte[] FAMILY = Bytes.toBytes("family");
+
+  private static final byte[][] columns = new byte[][] { FAMILY };
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws IOException {
+    LOG.info("before");
+    UTIL.ensureSomeRegionServersAvailable(1);
+    LOG.info("before done");
+  }
+
+  /**
+   * Setup a table with two rows and values.
+   *
+   * @param tableName
+   * @return
+   * @throws IOException
+   */
+  public static Table createTable(byte[] tableName) throws IOException {
+    return createTable(tableName, new byte[][] { FAMILY });
+  }
+
+  /**
+   * Setup a table with two rows and values per column family.
+   *
+   * @param tableName
+   * @return
+   * @throws IOException
+   */
+  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
+    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
+    Put p = new Put("aaa".getBytes());
+    for (byte[] family : families) {
+      p.addColumn(family, null, "value aaa".getBytes());
+    }
+    table.put(p);
+    p = new Put("bbb".getBytes());
+    for (byte[] family : families) {
+      p.addColumn(family, null, "value bbb".getBytes());
+    }
+    table.put(p);
+    return table;
+  }
+
+  /**
+   * Verify that the result and key have expected values.
+   *
+   * @param r
+   * @param key
+   * @param expectedKey
+   * @param expectedValue
+   * @return
+   */
+  static boolean checkResult(Result r, ImmutableBytesWritable key,
+      byte[] expectedKey, byte[] expectedValue) {
+    assertEquals(0, key.compareTo(expectedKey));
+    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
+    byte[] value = vals.values().iterator().next();
+    assertTrue(Arrays.equals(value, expectedValue));
+    return true; // if succeed
+  }
+
+  /**
+   * Create table data and run tests on specified htable using the
+   * o.a.h.hbase.mapreduce API.
+   *
+   * @param table
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  static void runTestMapreduce(Table table) throws IOException,
+      InterruptedException {
+    org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
+        new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
+    Scan s = new Scan();
+    s.setStartRow("aaa".getBytes());
+    s.setStopRow("zzz".getBytes());
+    s.addFamily(FAMILY);
+    trr.setScan(s);
+    trr.setHTable(table);
+
+    trr.initialize(null, null);
+    Result r = new Result();
+    ImmutableBytesWritable key = new ImmutableBytesWritable();
+
+    boolean more = trr.nextKeyValue();
+    assertTrue(more);
+    key = trr.getCurrentKey();
+    r = trr.getCurrentValue();
+    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
+
+    more = trr.nextKeyValue();
+    assertTrue(more);
+    key = trr.getCurrentKey();
+    r = trr.getCurrentValue();
+    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
+
+    // no more data
+    more = trr.nextKeyValue();
+    assertFalse(more);
+  }
+
+  /**
+   * Create a table that IOE's on first scanner next call
+   *
+   * @throws IOException
+   */
+  static Table createIOEScannerTable(byte[] name, final int failCnt)
+      throws IOException {
+    // build up a mock scanner stuff to fail the first time
+    Answer<ResultScanner> a = new Answer<ResultScanner>() {
+      int cnt = 0;
+
+      @Override
+      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+        // first invocation return the busted mock scanner
+        if (cnt++ < failCnt) {
+          // create mock ResultScanner that always fails.
+          Scan scan = mock(Scan.class);
+          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+          ResultScanner scanner = mock(ResultScanner.class);
+          // simulate TimeoutException / IOException
+          doThrow(new IOException("Injected exception")).when(scanner).next();
+          return scanner;
+        }
+
+        // otherwise return the real scanner.
+        return (ResultScanner) invocation.callRealMethod();
+      }
+    };
+
+    Table htable = spy(createTable(name));
+    doAnswer(a).when(htable).getScanner((Scan) anyObject());
+    return htable;
+  }
+
+  /**
+   * Create a table that throws a NotServingRegionException on first scanner
+   * next call
+   *
+   * @throws IOException
+   */
+  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
+      throws IOException {
+    // build up a mock scanner stuff to fail the first time
+    Answer<ResultScanner> a = new Answer<ResultScanner>() {
+      int cnt = 0;
+
+      @Override
+      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+        // first invocation return the busted mock scanner
+        if (cnt++ < failCnt) {
+          // create mock ResultScanner that always fails.
+          Scan scan = mock(Scan.class);
+          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+          ResultScanner scanner = mock(ResultScanner.class);
+
+          invocation.callRealMethod(); // simulate NotServingRegionException
+          doThrow(
+              new NotServingRegionException("Injected simulated TimeoutException"))
+              .when(scanner).next();
+          return scanner;
+        }
+
+        // otherwise return the real scanner.
+        return (ResultScanner) invocation.callRealMethod();
+      }
+    };
+
+    Table htable = spy(createTable(name));
+    doAnswer(a).when(htable).getScanner((Scan) anyObject());
+    return htable;
+  }
+
+  /**
+   * Run test assuming no errors using newer mapreduce api
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testTableRecordReaderMapreduce() throws IOException,
+      InterruptedException {
+    Table table = createTable("table1-mr".getBytes());
+    runTestMapreduce(table);
+  }
+
+  /**
+   * Run test assuming Scanner IOException failure using newer mapreduce api
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testTableRecordReaderScannerFailMapreduce() throws IOException,
+      InterruptedException {
+    Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
+    runTestMapreduce(htable);
+  }
+
+  /**
+   * Run test assuming Scanner IOException failure using newer mapreduce api
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test(expected = IOException.class)
+  public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
+      InterruptedException {
+    Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
+    runTestMapreduce(htable);
+  }
+
+  /**
+   * Run test assuming NotServingRegionException using newer mapreduce api
+   *
+   * @throws InterruptedException
+   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+   */
+  @Test
+  public void testTableRecordReaderScannerTimeoutMapreduce()
+      throws IOException, InterruptedException {
+    Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
+    runTestMapreduce(htable);
+  }
+
+  /**
+   * Run test assuming NotServingRegionException using newer mapreduce api
+   *
+   * @throws InterruptedException
+   * @throws org.apache.hadoop.hbase.NotServingRegionException
+   */
+  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
+  public void testTableRecordReaderScannerTimeoutMapreduceTwice()
+      throws IOException, InterruptedException {
+    Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
+    runTestMapreduce(htable);
+  }
+
+  /**
+   * Verify the example we present in javadocs on TableInputFormatBase
+   */
+  @Test
+  public void testExtensionOfTableInputFormatBase()
+      throws IOException, InterruptedException, ClassNotFoundException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
+    final Table htable = createTable(Bytes.toBytes("exampleTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleTIF.class);
+  }
+
+  @Test
+  public void testJobConfigurableExtensionOfTableInputFormatBase()
+      throws IOException, InterruptedException, ClassNotFoundException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
+        "using JobConfigurable.");
+    final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleJobConfigurableTIF.class);
+  }
+
+  @Test
+  public void testDeprecatedExtensionOfTableInputFormatBase()
+      throws IOException, InterruptedException, ClassNotFoundException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
+        "using the approach documented in 0.98.");
+    final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+    testInputFormat(ExampleDeprecatedTIF.class);
+  }
+
+  void testInputFormat(Class<? extends InputFormat> clazz)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
+    job.setInputFormatClass(clazz);
+    job.setOutputFormatClass(NullOutputFormat.class);
+    job.setMapperClass(ExampleVerifier.class);
+    job.setNumReduceTasks(0);
+
+    LOG.debug("submitting job.");
+    assertTrue("job failed!", job.waitForCompletion(true));
+    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
+    assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
+    assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
+    assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
+    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
+    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
+        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
+  }
+
+  public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException {
+      for (Cell cell : value.listCells()) {
+        context.getCounter(TestTableInputFormat.class.getName() + ":row",
+            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
+            .increment(1l);
+        context.getCounter(TestTableInputFormat.class.getName() + ":family",
+            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
+            .increment(1l);
+        context.getCounter(TestTableInputFormat.class.getName() + ":value",
+            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
+            .increment(1l);
+      }
+    }
+
+  }
+
+  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
+
+    @Override
+    public void configure(JobConf job) {
+      try {
+        Connection connection = ConnectionFactory.createConnection(job);
+        Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
+        // mandatory
+        initializeTable(connection, exampleTable.getName());
+        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+          Bytes.toBytes("columnB") };
+        // optional
+        Scan scan = new Scan();
+        for (byte[] family : inputColumns) {
+          scan.addFamily(family);
+        }
+        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+        scan.setFilter(exampleFilter);
+        setScan(scan);
+      } catch (IOException exception) {
+        throw new RuntimeException("Failed to configure for job.", exception);
+      }
+    }
+
+  }
+
+
+  public static class ExampleJobConfigurableTIF extends TableInputFormatBase
+      implements JobConfigurable {
+
+    @Override
+    public void configure(JobConf job) {
+      try {
+        Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+        TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
+        // mandatory
+        initializeTable(connection, tableName);
+        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+          Bytes.toBytes("columnB") };
+        //optional
+        Scan scan = new Scan();
+        for (byte[] family : inputColumns) {
+          scan.addFamily(family);
+        }
+        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+        scan.setFilter(exampleFilter);
+        setScan(scan);
+      } catch (IOException exception) {
+        throw new RuntimeException("Failed to initialize.", exception);
+      }
+    }
+  }
+
+
+  public static class ExampleTIF extends TableInputFormatBase {
+
+    @Override
+    protected void initialize(JobContext job) throws IOException {
+      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
+          job.getConfiguration()));
+      TableName tableName = TableName.valueOf("exampleTable");
+      // mandatory
+      initializeTable(connection, tableName);
+      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+        Bytes.toBytes("columnB") };
+      //optional
+      Scan scan = new Scan();
+      for (byte[] family : inputColumns) {
+        scan.addFamily(family);
+      }
+      Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+      scan.setFilter(exampleFilter);
+      setScan(scan);
+    }
+
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
new file mode 100644
index 0000000..699e773
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
@@ -0,0 +1,53 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.*;
+
+import java.net.Inet6Address;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({SmallTests.class})
+public class TestTableInputFormatBase {
+  @Test
+  public void testTableInputFormatBaseReverseDNSForIPv6()
+      throws UnknownHostException {
+    String address = "ipv6.google.com";
+    String localhost = null;
+    InetAddress addr = null;
+    TableInputFormat inputFormat = new TableInputFormat();
+    try {
+      localhost = InetAddress.getByName(address).getCanonicalHostName();
+      addr = Inet6Address.getByName(address);
+    } catch (UnknownHostException e) {
+      // google.com is down, we can probably forgive this test.
+      return;
+    }
+    System.out.println("Should retrun the hostname for this host " +
+        localhost + " addr : " + addr);
+    String actualHostName = inputFormat.reverseDNS(addr);
+    assertEquals("Should retrun the hostname for this host. Expected : " +
+        localhost + " Actual : " + actualHostName, localhost, actualHostName);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
new file mode 100644
index 0000000..99b40b9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
@@ -0,0 +1,200 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * TestTableInputFormatScan part 1.
+ * @see TestTableInputFormatScanBase
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableInputFormatScan1 extends TestTableInputFormatScanBase {
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanEmptyToEmpty()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan(null, null, null);
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanEmptyToAPP()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan(null, "app", "apo");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanEmptyToBBA()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan(null, "bba", "baz");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanEmptyToBBB()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan(null, "bbb", "bba");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanEmptyToOPP()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan(null, "opp", "opo");
+  }
+
+  /**
+   * Tests a MR scan using specific number of mappers. The test table has 25 regions,
+   * and all region sizes are set as 0 as default. The average region size is 1 (the smallest
+   * positive). When we set hbase.mapreduce.input.ratio as -1, all regions will be cut into two
+   * MapRedcue input splits, the number of MR input splits should be 50; when we set hbase
+   * .mapreduce.input.ratio as 100, the sum of all region sizes is less then the average region
+   * size, all regions will be combined into 1 MapRedcue input split.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testGetSplits() throws IOException, InterruptedException, ClassNotFoundException {
+    testNumOfSplits("-1", 52);
+    testNumOfSplits("100", 1);
+  }
+
+  /**
+   * Tests the getSplitKey() method in TableInputFormatBase.java
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testGetSplitsPoint() throws IOException, InterruptedException,
+          ClassNotFoundException {
+    byte[] start1 = { 'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f' };
+    byte[] end1 = { 'a', 'a', 'a', 'f', 'f' };
+    byte[] splitPoint1 = { 'a', 'a', 'a', 'd', 'd', -78, 50, -77  };
+    testGetSplitKey(start1, end1, splitPoint1, true);
+
+    byte[] start2 = { '1', '1', '1', '0', '0', '0' };
+    byte[] end2 = { '1', '1', '2', '5', '7', '9', '0' };
+    byte[] splitPoint2 = { '1', '1', '1',  -78, -77, -76, -104 };
+    testGetSplitKey(start2, end2, splitPoint2, true);
+
+    byte[] start3 = { 'a', 'a', 'a', 'a', 'a', 'a' };
+    byte[] end3 = { 'a', 'a', 'b' };
+    byte[] splitPoint3 = { 'a', 'a', 'a', -80, -80, -80 };
+    testGetSplitKey(start3, end3, splitPoint3, true);
+
+    byte[] start4 = { 'a', 'a', 'a' };
+    byte[] end4 = { 'a', 'a', 'a', 'z' };
+    byte[] splitPoint4 = { 'a', 'a', 'a', '=' };
+    testGetSplitKey(start4, end4, splitPoint4, true);
+
+    byte[] start5 = { 'a', 'a', 'a' };
+    byte[] end5 = { 'a', 'a', 'b', 'a' };
+    byte[] splitPoint5 = { 'a', 'a', 'a', -80 };
+    testGetSplitKey(start5, end5, splitPoint5, true);
+
+    // Test Case 6: empty key and "hhhqqqwww", split point is "h"
+    byte[] start6 = {};
+    byte[] end6 = { 'h', 'h', 'h', 'q', 'q', 'q', 'w', 'w' };
+    byte[] splitPointText6 = { 'h' };
+    byte[] splitPointBinary6 = { 104 };
+    testGetSplitKey(start6, end6, splitPointText6, true);
+    testGetSplitKey(start6, end6, splitPointBinary6, false);
+
+    // Test Case 7: "ffffaaa" and empty key, split point depends on the mode we choose(text key or
+    // binary key).
+    byte[] start7 = { 'f', 'f', 'f', 'f', 'a', 'a', 'a' };
+    byte[] end7 = {};
+    byte[] splitPointText7 = { 'f', '~', '~', '~', '~', '~', '~'  };
+    byte[] splitPointBinary7 = { 'f', -1, -1, -1, -1, -1, -1  };
+    testGetSplitKey(start7, end7, splitPointText7, true);
+    testGetSplitKey(start7, end7, splitPointBinary7, false);
+
+    // Test Case 8: both start key and end key are empty. Split point depends on the mode we
+    // choose (text key or binary key).
+    byte[] start8 = {};
+    byte[] end8 = {};
+    byte[] splitPointText8 = { 'O' };
+    byte[] splitPointBinary8 = { 0 };
+    testGetSplitKey(start8, end8, splitPointText8, true);
+    testGetSplitKey(start8, end8, splitPointBinary8, false);
+
+    // Test Case 9: Binary Key example
+    byte[] start9 = { 13, -19, 126, 127 };
+    byte[] end9 = { 13, -19, 127, 0 };
+    byte[] splitPoint9 = { 13, -19, 126, -65 };
+    testGetSplitKey(start9, end9, splitPoint9, false);
+
+    // Test Case 10: Binary key split when the start key is an unsigned byte and the end byte is a
+    // signed byte
+    byte[] start10 = { 'x' };
+    byte[] end10 = { -128 };
+    byte[] splitPoint10 = { '|' };
+    testGetSplitKey(start10, end10, splitPoint10, false);
+
+    // Test Case 11: Binary key split when the start key is an signed byte and the end byte is a
+    // signed byte
+    byte[] start11 = { -100 };
+    byte[] end11 = { -90 };
+    byte[] splitPoint11 = { -95 };
+    testGetSplitKey(start11, end11, splitPoint11, false);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
new file mode 100644
index 0000000..02f893f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
@@ -0,0 +1,118 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * TestTableInputFormatScan part 2.
+ * @see TestTableInputFormatScanBase
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableInputFormatScan2 extends TestTableInputFormatScanBase {
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanOBBToOPP()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("obb", "opp", "opo");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanOBBToQPP()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("obb", "qpp", "qpo");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanOPPToEmpty()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("opp", null, "zzz");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanYYXToEmpty()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("yyx", null, "zzz");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanYYYToEmpty()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("yyy", null, "zzz");
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testScanYZYToEmpty()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScan("yzy", null, "zzz");
+  }
+
+  @Test
+  public void testScanFromConfiguration()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    testScanFromConfiguration("bba", "bbd", "bbc");
+  }
+}

[16/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
deleted file mode 100644
index e18b3aa..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.text.MessageFormat;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.RegionSizeCalculator;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Iterator;
-/**
- * A base for {@link MultiTableInputFormat}s. Receives a list of
- * {@link Scan} instances that define the input tables and
- * filters etc. Subclasses may use other TableRecordReader implementations.
- */
-@InterfaceAudience.Public
-public abstract class MultiTableInputFormatBase extends
-    InputFormat<ImmutableBytesWritable, Result> {
-
-  private static final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
-
-  /** Holds the set of scans used to define the input. */
-  private List<Scan> scans;
-
-  /** The reader scanning the table, can be a custom one. */
-  private TableRecordReader tableRecordReader = null;
-
-  /**
-   * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
-   * default.
-   *
-   * @param split The split to work with.
-   * @param context The current context.
-   * @return The newly created record reader.
-   * @throws IOException When creating the reader fails.
-   * @throws InterruptedException when record reader initialization fails
-   * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
-   *      org.apache.hadoop.mapreduce.InputSplit,
-   *      org.apache.hadoop.mapreduce.TaskAttemptContext)
-   */
-  @Override
-  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
-      InputSplit split, TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    TableSplit tSplit = (TableSplit) split;
-    LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
-
-    if (tSplit.getTable() == null) {
-      throw new IOException("Cannot create a record reader because of a"
-          + " previous error. Please look at the previous logs lines from"
-          + " the task's full log for more details.");
-    }
-    final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
-    Table table = connection.getTable(tSplit.getTable());
-
-    if (this.tableRecordReader == null) {
-      this.tableRecordReader = new TableRecordReader();
-    }
-    final TableRecordReader trr = this.tableRecordReader;
-
-    try {
-      Scan sc = tSplit.getScan();
-      sc.setStartRow(tSplit.getStartRow());
-      sc.setStopRow(tSplit.getEndRow());
-      trr.setScan(sc);
-      trr.setTable(table);
-      return new RecordReader<ImmutableBytesWritable, Result>() {
-
-        @Override
-        public void close() throws IOException {
-          trr.close();
-          connection.close();
-        }
-
-        @Override
-        public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
-          return trr.getCurrentKey();
-        }
-
-        @Override
-        public Result getCurrentValue() throws IOException, InterruptedException {
-          return trr.getCurrentValue();
-        }
-
-        @Override
-        public float getProgress() throws IOException, InterruptedException {
-          return trr.getProgress();
-        }
-
-        @Override
-        public void initialize(InputSplit inputsplit, TaskAttemptContext context)
-            throws IOException, InterruptedException {
-          trr.initialize(inputsplit, context);
-        }
-
-        @Override
-        public boolean nextKeyValue() throws IOException, InterruptedException {
-          return trr.nextKeyValue();
-        }
-      };
-    } catch (IOException ioe) {
-      // If there is an exception make sure that all
-      // resources are closed and released.
-      trr.close();
-      connection.close();
-      throw ioe;
-    }
-  }
-
-  /**
-   * Calculates the splits that will serve as input for the map tasks. The
-   * number of splits matches the number of regions in a table.
-   *
-   * @param context The current job context.
-   * @return The list of input splits.
-   * @throws IOException When creating the list of splits fails.
-   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
-   */
-  @Override
-  public List<InputSplit> getSplits(JobContext context) throws IOException {
-    if (scans.isEmpty()) {
-      throw new IOException("No scans were provided.");
-    }
-
-    Map<TableName, List<Scan>> tableMaps = new HashMap<>();
-    for (Scan scan : scans) {
-      byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
-      if (tableNameBytes == null)
-        throw new IOException("A scan object did not have a table name");
-
-      TableName tableName = TableName.valueOf(tableNameBytes);
-
-      List<Scan> scanList = tableMaps.get(tableName);
-      if (scanList == null) {
-        scanList = new ArrayList<>();
-        tableMaps.put(tableName, scanList);
-      }
-      scanList.add(scan);
-    }
-
-    List<InputSplit> splits = new ArrayList<>();
-    Iterator iter = tableMaps.entrySet().iterator();
-    while (iter.hasNext()) {
-      Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
-      TableName tableName = entry.getKey();
-      List<Scan> scanList = entry.getValue();
-
-      try (Connection conn = ConnectionFactory.createConnection(context.getConfiguration());
-        Table table = conn.getTable(tableName);
-        RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
-        RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
-                regionLocator, conn.getAdmin());
-        Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
-        for (Scan scan : scanList) {
-          if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
-            throw new IOException("Expecting at least one region for table : "
-                    + tableName.getNameAsString());
-          }
-          int count = 0;
-
-          byte[] startRow = scan.getStartRow();
-          byte[] stopRow = scan.getStopRow();
-
-          for (int i = 0; i < keys.getFirst().length; i++) {
-            if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
-              continue;
-            }
-
-            if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
-                    Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
-                    (stopRow.length == 0 || Bytes.compareTo(stopRow,
-                            keys.getFirst()[i]) > 0)) {
-              byte[] splitStart = startRow.length == 0 ||
-                      Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
-                      keys.getFirst()[i] : startRow;
-              byte[] splitStop = (stopRow.length == 0 ||
-                      Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
-                      keys.getSecond()[i].length > 0 ?
-                      keys.getSecond()[i] : stopRow;
-
-              HRegionLocation hregionLocation = regionLocator.getRegionLocation(
-                      keys.getFirst()[i], false);
-              String regionHostname = hregionLocation.getHostname();
-              HRegionInfo regionInfo = hregionLocation.getRegionInfo();
-              String encodedRegionName = regionInfo.getEncodedName();
-              long regionSize = sizeCalculator.getRegionSize(
-                      regionInfo.getRegionName());
-
-              TableSplit split = new TableSplit(table.getName(),
-                      scan, splitStart, splitStop, regionHostname,
-                      encodedRegionName, regionSize);
-
-              splits.add(split);
-
-              if (LOG.isDebugEnabled())
-                LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
-            }
-          }
-        }
-      }
-    }
-
-    return splits;
-  }
-
-  /**
-   * Test if the given region is to be included in the InputSplit while
-   * splitting the regions of a table.
-   * <p>
-   * This optimization is effective when there is a specific reasoning to
-   * exclude an entire region from the M-R job, (and hence, not contributing to
-   * the InputSplit), given the start and end keys of the same. <br>
-   * Useful when we need to remember the last-processed top record and revisit
-   * the [last, current) interval for M-R processing, continuously. In addition
-   * to reducing InputSplits, reduces the load on the region server as well, due
-   * to the ordering of the keys. <br>
-   * <br>
-   * Note: It is possible that <code>endKey.length() == 0 </code> , for the last
-   * (recent) region. <br>
-   * Override this method, if you want to bulk exclude regions altogether from
-   * M-R. By default, no region is excluded( i.e. all regions are included).
-   *
-   * @param startKey Start key of the region
-   * @param endKey End key of the region
-   * @return true, if this region needs to be included as part of the input
-   *         (default).
-   */
-  protected boolean includeRegionInSplit(final byte[] startKey,
-      final byte[] endKey) {
-    return true;
-  }
-
-  /**
-   * Allows subclasses to get the list of {@link Scan} objects.
-   */
-  protected List<Scan> getScans() {
-    return this.scans;
-  }
-
-  /**
-   * Allows subclasses to set the list of {@link Scan} objects.
-   *
-   * @param scans The list of {@link Scan} used to define the input
-   */
-  protected void setScans(List<Scan> scans) {
-    this.scans = scans;
-  }
-
-  /**
-   * Allows subclasses to set the {@link TableRecordReader}.
-   *
-   * @param tableRecordReader A different {@link TableRecordReader}
-   *          implementation.
-   */
-  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
-    this.tableRecordReader = tableRecordReader;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
deleted file mode 100644
index 4cc784f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * <p>
- * Hadoop output format that writes to one or more HBase tables. The key is
- * taken to be the table name while the output value <em>must</em> be either a
- * {@link Put} or a {@link Delete} instance. All tables must already exist, and
- * all Puts and Deletes must reference only valid column families.
- * </p>
- *
- * <p>
- * Write-ahead logging (WAL) for Puts can be disabled by setting
- * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
- * Note that disabling write-ahead logging is only appropriate for jobs where
- * loss of data due to region server failure can be tolerated (for example,
- * because it is easy to rerun a bulk import).
- * </p>
- */
-@InterfaceAudience.Public
-public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
-  /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (WAL) */
-  public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
-  /** Property value to use write-ahead logging */
-  public static final boolean WAL_ON = true;
-  /** Property value to disable write-ahead logging */
-  public static final boolean WAL_OFF = false;
-  /**
-   * Record writer for outputting to multiple HTables.
-   */
-  protected static class MultiTableRecordWriter extends
-      RecordWriter<ImmutableBytesWritable, Mutation> {
-    private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
-    Connection connection;
-    Map<ImmutableBytesWritable, BufferedMutator> mutatorMap = new HashMap<>();
-    Configuration conf;
-    boolean useWriteAheadLogging;
-
-    /**
-     * @param conf
-     *          HBaseConfiguration to used
-     * @param useWriteAheadLogging
-     *          whether to use write ahead logging. This can be turned off (
-     *          <tt>false</tt>) to improve performance when bulk loading data.
-     */
-    public MultiTableRecordWriter(Configuration conf,
-        boolean useWriteAheadLogging) throws IOException {
-      LOG.debug("Created new MultiTableRecordReader with WAL "
-          + (useWriteAheadLogging ? "on" : "off"));
-      this.conf = conf;
-      this.useWriteAheadLogging = useWriteAheadLogging;
-    }
-
-    /**
-     * @param tableName
-     *          the name of the table, as a string
-     * @return the named mutator
-     * @throws IOException
-     *           if there is a problem opening a table
-     */
-    BufferedMutator getBufferedMutator(ImmutableBytesWritable tableName) throws IOException {
-      if(this.connection == null){
-        this.connection = ConnectionFactory.createConnection(conf);
-      }
-      if (!mutatorMap.containsKey(tableName)) {
-        LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
-
-        BufferedMutator mutator =
-            connection.getBufferedMutator(TableName.valueOf(tableName.get()));
-        mutatorMap.put(tableName, mutator);
-      }
-      return mutatorMap.get(tableName);
-    }
-
-    @Override
-    public void close(TaskAttemptContext context) throws IOException {
-      for (BufferedMutator mutator : mutatorMap.values()) {
-        mutator.close();
-      }
-      if (connection != null) {
-        connection.close();
-      }
-    }
-
-    /**
-     * Writes an action (Put or Delete) to the specified table.
-     *
-     * @param tableName
-     *          the table being updated.
-     * @param action
-     *          the update, either a put or a delete.
-     * @throws IllegalArgumentException
-     *          if the action is not a put or a delete.
-     */
-    @Override
-    public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
-      BufferedMutator mutator = getBufferedMutator(tableName);
-      // The actions are not immutable, so we defensively copy them
-      if (action instanceof Put) {
-        Put put = new Put((Put) action);
-        put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
-            : Durability.SKIP_WAL);
-        mutator.mutate(put);
-      } else if (action instanceof Delete) {
-        Delete delete = new Delete((Delete) action);
-        mutator.mutate(delete);
-      } else
-        throw new IllegalArgumentException(
-            "action must be either Delete or Put");
-    }
-  }
-
-  @Override
-  public void checkOutputSpecs(JobContext context) throws IOException,
-      InterruptedException {
-    // we can't know ahead of time if it's going to blow up when the user
-    // passes a table name that doesn't exist, so nothing useful here.
-  }
-
-  @Override
-  public OutputCommitter getOutputCommitter(TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    return new TableOutputCommitter();
-  }
-
-  @Override
-  public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    Configuration conf = context.getConfiguration();
-    return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
-        conf.getBoolean(WAL_PROPERTY, WAL_ON));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
deleted file mode 100644
index 0f07a58..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * MultiTableSnapshotInputFormat generalizes
- * {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}
- * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
- * configured for each.
- * Internally, the input format delegates to
- * {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}
- * and thus has the same performance advantages;
- * see {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for
- * more details.
- * Usage is similar to TableSnapshotInputFormat, with the following exception:
- * initMultiTableSnapshotMapperJob takes in a map
- * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
- * scan will be applied;
- * the overall dataset for the job is defined by the concatenation of the regions and tables
- * included in each snapshot/scan
- * pair.
- * {@link org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initMultiTableSnapshotMapperJob
- * (java.util.Map, Class, Class, Class, org.apache.hadoop.mapreduce.Job, boolean, org.apache
- * .hadoop.fs.Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
- *    "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
- *    "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
- * );
- * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
- * TableMapReduceUtil.initTableSnapshotMapperJob(
- *     snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
- *      MyMapOutputValueWritable.class, job, true, restoreDir);
- * }
- * </pre>
- * Internally, this input format restores each snapshot into a subdirectory of the given tmp
- * directory. Input splits and
- * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * (one per region).
- * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
- * permissioning; the
- * same caveats apply here.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat {
-
-  private final MultiTableSnapshotInputFormatImpl delegate;
-
-  public MultiTableSnapshotInputFormat() {
-    this.delegate = new MultiTableSnapshotInputFormatImpl();
-  }
-
-  @Override
-  public List<InputSplit> getSplits(JobContext jobContext)
-      throws IOException, InterruptedException {
-    List<TableSnapshotInputFormatImpl.InputSplit> splits =
-        delegate.getSplits(jobContext.getConfiguration());
-    List<InputSplit> rtn = Lists.newArrayListWithCapacity(splits.size());
-
-    for (TableSnapshotInputFormatImpl.InputSplit split : splits) {
-      rtn.add(new TableSnapshotInputFormat.TableSnapshotRegionSplit(split));
-    }
-
-    return rtn;
-  }
-
-  public static void setInput(Configuration configuration,
-      Map<String, Collection<Scan>> snapshotScans, Path tmpRestoreDir) throws IOException {
-    new MultiTableSnapshotInputFormatImpl().setInput(configuration, snapshotScans, tmpRestoreDir);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
deleted file mode 100644
index 4331c0f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
-import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
-import org.apache.hadoop.hbase.util.ConfigurationUtil;
-import org.apache.hadoop.hbase.util.FSUtils;
-
-import java.io.IOException;
-import java.util.AbstractMap;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-
-/**
- * Shared implementation of mapreduce code over multiple table snapshots.
- * Utilized by both mapreduce ({@link org.apache.hadoop.hbase.mapreduce
- * .MultiTableSnapshotInputFormat} and mapred
- * ({@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
- */
-@InterfaceAudience.LimitedPrivate({ "HBase" })
-@InterfaceStability.Evolving
-public class MultiTableSnapshotInputFormatImpl {
-
-  private static final Log LOG = LogFactory.getLog(MultiTableSnapshotInputFormatImpl.class);
-
-  public static final String RESTORE_DIRS_KEY =
-      "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
-  public static final String SNAPSHOT_TO_SCANS_KEY =
-      "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";
-
-  /**
-   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
-   * restoreDir.
-   * Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY}
-   *
-   * @param conf
-   * @param snapshotScans
-   * @param restoreDir
-   * @throws IOException
-   */
-  public void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
-      Path restoreDir) throws IOException {
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-
-    setSnapshotToScans(conf, snapshotScans);
-    Map<String, Path> restoreDirs =
-        generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir);
-    setSnapshotDirs(conf, restoreDirs);
-    restoreSnapshots(conf, restoreDirs, fs);
-  }
-
-  /**
-   * Return the list of splits extracted from the scans/snapshots pushed to conf by
-   * {@link
-   * #setInput(org.apache.hadoop.conf.Configuration, java.util.Map, org.apache.hadoop.fs.Path)}
-   *
-   * @param conf Configuration to determine splits from
-   * @return Return the list of splits extracted from the scans/snapshots pushed to conf
-   * @throws IOException
-   */
-  public List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Configuration conf)
-      throws IOException {
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-
-    List<TableSnapshotInputFormatImpl.InputSplit> rtn = Lists.newArrayList();
-
-    Map<String, Collection<Scan>> snapshotsToScans = getSnapshotsToScans(conf);
-    Map<String, Path> snapshotsToRestoreDirs = getSnapshotDirs(conf);
-    for (Map.Entry<String, Collection<Scan>> entry : snapshotsToScans.entrySet()) {
-      String snapshotName = entry.getKey();
-
-      Path restoreDir = snapshotsToRestoreDirs.get(snapshotName);
-
-      SnapshotManifest manifest =
-          TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs);
-      List<HRegionInfo> regionInfos =
-          TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest);
-
-      for (Scan scan : entry.getValue()) {
-        List<TableSnapshotInputFormatImpl.InputSplit> splits =
-            TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf);
-        rtn.addAll(splits);
-      }
-    }
-    return rtn;
-  }
-
-  /**
-   * Retrieve the snapshot name -&gt; list&lt;scan&gt; mapping pushed to configuration by
-   * {@link #setSnapshotToScans(org.apache.hadoop.conf.Configuration, java.util.Map)}
-   *
-   * @param conf Configuration to extract name -&gt; list&lt;scan&gt; mappings from.
-   * @return the snapshot name -&gt; list&lt;scan&gt; mapping pushed to configuration
-   * @throws IOException
-   */
-  public Map<String, Collection<Scan>> getSnapshotsToScans(Configuration conf) throws IOException {
-
-    Map<String, Collection<Scan>> rtn = Maps.newHashMap();
-
-    for (Map.Entry<String, String> entry : ConfigurationUtil
-        .getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) {
-      String snapshotName = entry.getKey();
-      String scan = entry.getValue();
-
-      Collection<Scan> snapshotScans = rtn.get(snapshotName);
-      if (snapshotScans == null) {
-        snapshotScans = Lists.newArrayList();
-        rtn.put(snapshotName, snapshotScans);
-      }
-
-      snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan));
-    }
-
-    return rtn;
-  }
-
-  /**
-   * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY})
-   *
-   * @param conf
-   * @param snapshotScans
-   * @throws IOException
-   */
-  public void setSnapshotToScans(Configuration conf, Map<String, Collection<Scan>> snapshotScans)
-      throws IOException {
-    // flatten out snapshotScans for serialization to the job conf
-    List<Map.Entry<String, String>> snapshotToSerializedScans = Lists.newArrayList();
-
-    for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
-      String snapshotName = entry.getKey();
-      Collection<Scan> scans = entry.getValue();
-
-      // serialize all scans and map them to the appropriate snapshot
-      for (Scan scan : scans) {
-        snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry<>(snapshotName,
-            TableMapReduceUtil.convertScanToString(scan)));
-      }
-    }
-
-    ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans);
-  }
-
-  /**
-   * Retrieve the directories into which snapshots have been restored from
-   * ({@link #RESTORE_DIRS_KEY})
-   *
-   * @param conf Configuration to extract restore directories from
-   * @return the directories into which snapshots have been restored from
-   * @throws IOException
-   */
-  public Map<String, Path> getSnapshotDirs(Configuration conf) throws IOException {
-    List<Map.Entry<String, String>> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY);
-    Map<String, Path> rtn = Maps.newHashMapWithExpectedSize(kvps.size());
-
-    for (Map.Entry<String, String> kvp : kvps) {
-      rtn.put(kvp.getKey(), new Path(kvp.getValue()));
-    }
-
-    return rtn;
-  }
-
-  public void setSnapshotDirs(Configuration conf, Map<String, Path> snapshotDirs) {
-    Map<String, String> toSet = Maps.newHashMap();
-
-    for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
-      toSet.put(entry.getKey(), entry.getValue().toString());
-    }
-
-    ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet());
-  }
-
-  /**
-   * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and
-   * return a map from the snapshot to the restore directory.
-   *
-   * @param snapshots      collection of snapshot names to restore
-   * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored
-   * @return a mapping from snapshot name to the directory in which that snapshot has been restored
-   */
-  private Map<String, Path> generateSnapshotToRestoreDirMapping(Collection<String> snapshots,
-      Path baseRestoreDir) {
-    Map<String, Path> rtn = Maps.newHashMap();
-
-    for (String snapshotName : snapshots) {
-      Path restoreSnapshotDir =
-          new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString());
-      rtn.put(snapshotName, restoreSnapshotDir);
-    }
-
-    return rtn;
-  }
-
-  /**
-   * Restore each (snapshot name, restore directory) pair in snapshotToDir
-   *
-   * @param conf          configuration to restore with
-   * @param snapshotToDir mapping from snapshot names to restore directories
-   * @param fs            filesystem to do snapshot restoration on
-   * @throws IOException
-   */
-  public void restoreSnapshots(Configuration conf, Map<String, Path> snapshotToDir, FileSystem fs)
-      throws IOException {
-    // TODO: restore from record readers to parallelize.
-    Path rootDir = FSUtils.getRootDir(conf);
-
-    for (Map.Entry<String, Path> entry : snapshotToDir.entrySet()) {
-      String snapshotName = entry.getKey();
-      Path restoreDir = entry.getValue();
-      LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir
-          + " for MultiTableSnapshotInputFormat");
-      restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs);
-    }
-  }
-
-  void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir,
-      FileSystem fs) throws IOException {
-    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
deleted file mode 100644
index d1dba1d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.MapContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.StatusReporter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.util.ReflectionUtils;
-
-
-/**
- * Multithreaded implementation for @link org.apache.hbase.mapreduce.TableMapper
- * <p>
- * It can be used instead when the Map operation is not CPU
- * bound in order to improve throughput.
- * <p>
- * Mapper implementations using this MapRunnable must be thread-safe.
- * <p>
- * The Map-Reduce job has to be configured with the mapper to use via
- * {@link #setMapperClass} and the number of thread the thread-pool can use with the
- * {@link #getNumberOfThreads} method. The default value is 10 threads.
- * <p>
- */
-
-public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
-  private static final Log LOG = LogFactory.getLog(MultithreadedTableMapper.class);
-  private Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> mapClass;
-  private Context outer;
-  private ExecutorService executor;
-  public static final String NUMBER_OF_THREADS = "hbase.mapreduce.multithreadedmapper.threads";
-  public static final String MAPPER_CLASS = "hbase.mapreduce.multithreadedmapper.mapclass";
-
-  /**
-   * The number of threads in the thread pool that will run the map function.
-   * @param job the job
-   * @return the number of threads
-   */
-  public static int getNumberOfThreads(JobContext job) {
-    return job.getConfiguration().
-        getInt(NUMBER_OF_THREADS, 10);
-  }
-
-  /**
-   * Set the number of threads in the pool for running maps.
-   * @param job the job to modify
-   * @param threads the new number of threads
-   */
-  public static void setNumberOfThreads(Job job, int threads) {
-    job.getConfiguration().setInt(NUMBER_OF_THREADS,
-        threads);
-  }
-
-  /**
-   * Get the application's mapper class.
-   * @param <K2> the map's output key type
-   * @param <V2> the map's output value type
-   * @param job the job
-   * @return the mapper class to run
-   */
-  @SuppressWarnings("unchecked")
-  public static <K2,V2>
-  Class<Mapper<ImmutableBytesWritable, Result,K2,V2>> getMapperClass(JobContext job) {
-    return (Class<Mapper<ImmutableBytesWritable, Result,K2,V2>>)
-        job.getConfiguration().getClass( MAPPER_CLASS,
-            Mapper.class);
-  }
-
-  /**
-   * Set the application's mapper class.
-   * @param <K2> the map output key type
-   * @param <V2> the map output value type
-   * @param job the job to modify
-   * @param cls the class to use as the mapper
-   */
-  public static <K2,V2>
-  void setMapperClass(Job job,
-      Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
-    if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
-      throw new IllegalArgumentException("Can't have recursive " +
-          "MultithreadedTableMapper instances.");
-    }
-    job.getConfiguration().setClass(MAPPER_CLASS,
-        cls, Mapper.class);
-  }
-
-  /**
-   * Run the application's maps using a thread pool.
-   */
-  @Override
-  public void run(Context context) throws IOException, InterruptedException {
-    outer = context;
-    int numberOfThreads = getNumberOfThreads(context);
-    mapClass = getMapperClass(context);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Configuring multithread runner to use " + numberOfThreads +
-          " threads");
-    }
-    executor = Executors.newFixedThreadPool(numberOfThreads);
-    for(int i=0; i < numberOfThreads; ++i) {
-      MapRunner thread = new MapRunner(context);
-      executor.execute(thread);
-    }
-    executor.shutdown();
-    while (!executor.isTerminated()) {
-      // wait till all the threads are done
-      Thread.sleep(1000);
-    }
-  }
-
-  private class SubMapRecordReader
-  extends RecordReader<ImmutableBytesWritable, Result> {
-    private ImmutableBytesWritable key;
-    private Result value;
-    private Configuration conf;
-
-    @Override
-    public void close() throws IOException {
-    }
-
-    @Override
-    public float getProgress() throws IOException, InterruptedException {
-      return 0;
-    }
-
-    @Override
-    public void initialize(InputSplit split,
-        TaskAttemptContext context
-        ) throws IOException, InterruptedException {
-      conf = context.getConfiguration();
-    }
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      synchronized (outer) {
-        if (!outer.nextKeyValue()) {
-          return false;
-        }
-        key = ReflectionUtils.copy(outer.getConfiguration(),
-            outer.getCurrentKey(), key);
-        value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
-        return true;
-      }
-    }
-
-    public ImmutableBytesWritable getCurrentKey() {
-      return key;
-    }
-
-    @Override
-    public Result getCurrentValue() {
-      return value;
-    }
-  }
-
-  private class SubMapRecordWriter extends RecordWriter<K2,V2> {
-
-    @Override
-    public void close(TaskAttemptContext context) throws IOException,
-    InterruptedException {
-    }
-
-    @Override
-    public void write(K2 key, V2 value) throws IOException,
-    InterruptedException {
-      synchronized (outer) {
-        outer.write(key, value);
-      }
-    }
-  }
-
-  private class SubMapStatusReporter extends StatusReporter {
-
-    @Override
-    public Counter getCounter(Enum<?> name) {
-      return outer.getCounter(name);
-    }
-
-    @Override
-    public Counter getCounter(String group, String name) {
-      return outer.getCounter(group, name);
-    }
-
-    @Override
-    public void progress() {
-      outer.progress();
-    }
-
-    @Override
-    public void setStatus(String status) {
-      outer.setStatus(status);
-    }
-
-    public float getProgress() {
-      return 0;
-    }
-  }
-
-  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
-      justification="Don't understand why FB is complaining about this one. We do throw exception")
-  private class MapRunner implements Runnable {
-    private Mapper<ImmutableBytesWritable, Result, K2,V2> mapper;
-    private Context subcontext;
-
-    @SuppressWarnings({ "rawtypes", "unchecked" })
-    MapRunner(Context context) throws IOException, InterruptedException {
-      mapper = ReflectionUtils.newInstance(mapClass,
-          context.getConfiguration());
-      try {
-        Constructor c = context.getClass().getConstructor(
-          Mapper.class,
-          Configuration.class,
-          TaskAttemptID.class,
-          RecordReader.class,
-          RecordWriter.class,
-          OutputCommitter.class,
-          StatusReporter.class,
-          InputSplit.class);
-        c.setAccessible(true);
-        subcontext = (Context) c.newInstance(
-          mapper,
-          outer.getConfiguration(), 
-          outer.getTaskAttemptID(),
-          new SubMapRecordReader(),
-          new SubMapRecordWriter(),
-          context.getOutputCommitter(),
-          new SubMapStatusReporter(),
-          outer.getInputSplit());
-      } catch (Exception e) {
-        try {
-          Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
-            Configuration.class,
-            TaskAttemptID.class,
-            RecordReader.class,
-            RecordWriter.class,
-            OutputCommitter.class,
-            StatusReporter.class,
-            InputSplit.class);
-          c.setAccessible(true);
-          MapContext mc = (MapContext) c.newInstance(
-            outer.getConfiguration(), 
-            outer.getTaskAttemptID(),
-            new SubMapRecordReader(),
-            new SubMapRecordWriter(),
-            context.getOutputCommitter(),
-            new SubMapStatusReporter(),
-            outer.getInputSplit());
-          Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
-          Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
-          subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
-        } catch (Exception ee) { // FindBugs: REC_CATCH_EXCEPTION
-          // rethrow as IOE
-          throw new IOException(e);
-        }
-      }
-    }
-
-    @Override
-    public void run() {
-      try {
-        mapper.run(subcontext);
-      } catch (Throwable ie) {
-        LOG.error("Problem in running map.", ie);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
deleted file mode 100644
index 8997da9..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class MutationSerialization implements Serialization<Mutation> {
-  @Override
-  public boolean accept(Class<?> c) {
-    return Mutation.class.isAssignableFrom(c);
-  }
-
-  @Override
-  public Deserializer<Mutation> getDeserializer(Class<Mutation> c) {
-    return new MutationDeserializer();
-  }
-
-  @Override
-  public Serializer<Mutation> getSerializer(Class<Mutation> c) {
-    return new MutationSerializer();
-  }
-
-  private static class MutationDeserializer implements Deserializer<Mutation> {
-    private InputStream in;
-
-    @Override
-    public void close() throws IOException {
-      in.close();
-    }
-
-    @Override
-    public Mutation deserialize(Mutation mutation) throws IOException {
-      MutationProto proto = MutationProto.parseDelimitedFrom(in);
-      return ProtobufUtil.toMutation(proto);
-    }
-
-    @Override
-    public void open(InputStream in) throws IOException {
-      this.in = in;
-    }
-    
-  }
-  private static class MutationSerializer implements Serializer<Mutation> {
-    private OutputStream out;
-
-    @Override
-    public void close() throws IOException {
-      out.close();
-    }
-
-    @Override
-    public void open(OutputStream out) throws IOException {
-      this.out = out;
-    }
-
-    @Override
-    public void serialize(Mutation mutation) throws IOException {
-      MutationType type;
-      if (mutation instanceof Put) {
-        type = MutationType.PUT;
-      } else if (mutation instanceof Delete) {
-        type = MutationType.DELETE;
-      } else {
-        throw new IllegalArgumentException("Only Put and Delete are supported");
-      }
-      ProtobufUtil.toMutation(type, mutation).writeDelimitedTo(out);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
deleted file mode 100644
index f01e84f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Combine Puts. Merges Put instances grouped by <code>K</code> into a single
- * instance.
- * @see TableMapReduceUtil
- */
-@InterfaceAudience.Public
-public class PutCombiner<K> extends Reducer<K, Put, K, Put> {
-  private static final Log LOG = LogFactory.getLog(PutCombiner.class);
-
-  @Override
-  protected void reduce(K row, Iterable<Put> vals, Context context)
-      throws IOException, InterruptedException {
-    // Using HeapSize to create an upper bound on the memory size of
-    // the puts and flush some portion of the content while looping. This
-    // flush could result in multiple Puts for a single rowkey. That is
-    // acceptable because Combiner is run as an optimization and it's not
-    // critical that all Puts are grouped perfectly.
-    long threshold = context.getConfiguration().getLong(
-        "putcombiner.row.threshold", 1L * (1<<30));
-    int cnt = 0;
-    long curSize = 0;
-    Put put = null;
-    Map<byte[], List<Cell>> familyMap = null;
-    for (Put p : vals) {
-      cnt++;
-      if (put == null) {
-        put = p;
-        familyMap = put.getFamilyCellMap();
-      } else {
-        for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap()
-            .entrySet()) {
-          List<Cell> cells = familyMap.get(entry.getKey());
-          List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
-          for (Cell cell : entry.getValue()) {
-            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
-            curSize += kv.heapSize();
-            if (kvs != null) {
-              kvs.add(kv);
-            }
-          }
-          if (cells == null) {
-            familyMap.put(entry.getKey(), entry.getValue());
-          }
-        }
-        if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
-        if (curSize > threshold) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
-          }
-          context.write(row, put);
-          put = null;
-          curSize = 0;
-          cnt = 0;
-        }
-      }
-    }
-    if (put != null) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
-      }
-      context.write(row, put);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
deleted file mode 100644
index 17ab9cb..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.TreeSet;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.TagUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.exceptions.DeserializationException;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Emits sorted Puts.
- * Reads in all Puts from passed Iterator, sorts them, then emits
- * Puts in sorted order.  If lots of columns per row, it will use lots of
- * memory sorting.
- * @see HFileOutputFormat2
- * @see KeyValueSortReducer
- */
-@InterfaceAudience.Public
-public class PutSortReducer extends
-    Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
-  // the cell creator
-  private CellCreator kvCreator;
-
-  @Override
-  protected void
-      setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
-          throws IOException, InterruptedException {
-    Configuration conf = context.getConfiguration();
-    this.kvCreator = new CellCreator(conf);
-  }
-
-  @Override
-  protected void reduce(
-      ImmutableBytesWritable row,
-      java.lang.Iterable<Put> puts,
-      Reducer<ImmutableBytesWritable, Put,
-              ImmutableBytesWritable, KeyValue>.Context context)
-      throws java.io.IOException, InterruptedException
-  {
-    // although reduce() is called per-row, handle pathological case
-    long threshold = context.getConfiguration().getLong(
-        "putsortreducer.row.threshold", 1L * (1<<30));
-    Iterator<Put> iter = puts.iterator();
-    while (iter.hasNext()) {
-      TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
-      long curSize = 0;
-      // stop at the end or the RAM threshold
-      List<Tag> tags = new ArrayList<>();
-      while (iter.hasNext() && curSize < threshold) {
-        // clear the tags
-        tags.clear();
-        Put p = iter.next();
-        long t = p.getTTL();
-        if (t != Long.MAX_VALUE) {
-          // add TTL tag if found
-          tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
-        }
-        byte[] acl = p.getACL();
-        if (acl != null) {
-          // add ACL tag if found
-          tags.add(new ArrayBackedTag(TagType.ACL_TAG_TYPE, acl));
-        }
-        try {
-          CellVisibility cellVisibility = p.getCellVisibility();
-          if (cellVisibility != null) {
-            // add the visibility labels if any
-            tags.addAll(kvCreator.getVisibilityExpressionResolver()
-                .createVisibilityExpTags(cellVisibility.getExpression()));
-          }
-        } catch (DeserializationException e) {
-          // We just throw exception here. Should we allow other mutations to proceed by
-          // just ignoring the bad one?
-          throw new IOException("Invalid visibility expression found in mutation " + p, e);
-        }
-        for (List<Cell> cells: p.getFamilyCellMap().values()) {
-          for (Cell cell: cells) {
-            // Creating the KV which needs to be directly written to HFiles. Using the Facade
-            // KVCreator for creation of kvs.
-            KeyValue kv = null;
-            TagUtil.carryForwardTags(tags, cell);
-            if (!tags.isEmpty()) {
-              kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
-                cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
-                cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
-                cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
-                cell.getValueOffset(), cell.getValueLength(), tags);
-            } else {
-              kv = KeyValueUtil.ensureKeyValue(cell);
-            }
-            if (map.add(kv)) {// don't count duplicated kv into size
-              curSize += kv.heapSize();
-            }
-          }
-        }
-      }
-      context.setStatus("Read " + map.size() + " entries of " + map.getClass()
-          + "(" + StringUtils.humanReadableInt(curSize) + ")");
-      int index = 0;
-      for (KeyValue kv : map) {
-        context.write(row, kv);
-        if (++index % 100 == 0)
-          context.setStatus("Wrote " + index);
-      }
-
-      // if we have more entries to process
-      if (iter.hasNext()) {
-        // force flush because we cannot guarantee intra-row sorted order
-        context.write(null, null);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
deleted file mode 100644
index dff04b6..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class ResultSerialization extends Configured implements Serialization<Result> {
-  private static final Log LOG = LogFactory.getLog(ResultSerialization.class);
-  // The following configuration property indicates import file format version.
-  public static final String IMPORT_FORMAT_VER = "hbase.import.version";
-
-  @Override
-  public boolean accept(Class<?> c) {
-    return Result.class.isAssignableFrom(c);
-  }
-
-  @Override
-  public Deserializer<Result> getDeserializer(Class<Result> c) {
-    // check input format version
-    Configuration conf = getConf();
-    if (conf != null) {
-      String inputVersion = conf.get(IMPORT_FORMAT_VER);
-      if (inputVersion != null && inputVersion.equals("0.94")) {
-        LOG.info("Load exported file using deserializer for HBase 0.94 format");
-        return new Result94Deserializer();
-      }
-    }
-
-    return new ResultDeserializer();
-  }
-
-  @Override
-  public Serializer<Result> getSerializer(Class<Result> c) {
-    return new ResultSerializer();
-  }
-
-  /**
-   * The following deserializer class is used to load exported file of 0.94
-   */
-  private static class Result94Deserializer implements Deserializer<Result> {
-    private DataInputStream in;
-
-    @Override
-    public void close() throws IOException {
-      in.close();
-    }
-
-    @Override
-    public Result deserialize(Result mutation) throws IOException {
-      int totalBuffer = in.readInt();
-      if (totalBuffer == 0) {
-        return Result.EMPTY_RESULT;
-      }
-      byte[] buf = new byte[totalBuffer];
-      readChunked(in, buf, 0, totalBuffer);
-      List<Cell> kvs = new ArrayList<>();
-      int offset = 0;
-      while (offset < totalBuffer) {
-        int keyLength = Bytes.toInt(buf, offset);
-        offset += Bytes.SIZEOF_INT;
-        kvs.add(new KeyValue(buf, offset, keyLength));
-        offset += keyLength;
-      }
-      return Result.create(kvs);
-    }
-
-    @Override
-    public void open(InputStream in) throws IOException {
-      if (!(in instanceof DataInputStream)) {
-        throw new IOException("Wrong input stream instance passed in");
-      }
-      this.in = (DataInputStream) in;
-    }
-
-    private void readChunked(final DataInput in, byte[] dest, int ofs, int len) throws IOException {
-      int maxRead = 8192;
-
-      for (; ofs < len; ofs += maxRead)
-        in.readFully(dest, ofs, Math.min(len - ofs, maxRead));
-    }
-  }
-
-  private static class ResultDeserializer implements Deserializer<Result> {
-    private InputStream in;
-
-    @Override
-    public void close() throws IOException {
-      in.close();
-    }
-
-    @Override
-    public Result deserialize(Result mutation) throws IOException {
-      ClientProtos.Result proto = ClientProtos.Result.parseDelimitedFrom(in);
-      return ProtobufUtil.toResult(proto);
-    }
-
-    @Override
-    public void open(InputStream in) throws IOException {
-      this.in = in;
-    }
-  }
-
-  private static class ResultSerializer implements Serializer<Result> {
-    private OutputStream out;
-
-    @Override
-    public void close() throws IOException {
-      out.close();
-    }
-
-    @Override
-    public void open(OutputStream out) throws IOException {
-      this.out = out;
-    }
-
-    @Override
-    public void serialize(Result result) throws IOException {
-      ProtobufUtil.toResult(result).writeDelimitedTo(out);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
deleted file mode 100644
index 2e0591e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.FilterBase;
-import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
-import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A job with a just a map phase to count rows. Map outputs table rows IF the
- * input row has columns that have content.
- */
-@InterfaceAudience.Public
-public class RowCounter extends Configured implements Tool {
-
-  private static final Log LOG = LogFactory.getLog(RowCounter.class);
-
-  /** Name of this 'program'. */
-  static final String NAME = "rowcounter";
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-  private final static String EXPECTED_COUNT_KEY = RowCounter.class.getName() + ".expected_count";
-
-  /**
-   * Mapper that runs the count.
-   */
-  static class RowCounterMapper
-  extends TableMapper<ImmutableBytesWritable, Result> {
-
-    /** Counter enumeration to count the actual rows. */
-    public static enum Counters {ROWS}
-
-    /**
-     * Maps the data.
-     *
-     * @param row  The current table row key.
-     * @param values  The columns.
-     * @param context  The current context.
-     * @throws IOException When something is broken with the data.
-     * @see org.apache.hadoop.mapreduce.Mapper#map(Object, Object, Context)
-     */
-    @Override
-    public void map(ImmutableBytesWritable row, Result values,
-      Context context)
-    throws IOException {
-      // Count every row containing data, whether it's in qualifiers or values
-      context.getCounter(Counters.ROWS).increment(1);
-    }
-  }
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param conf  The current configuration.
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public static Job createSubmittableJob(Configuration conf, String[] args)
-  throws IOException {
-    String tableName = args[0];
-    List<MultiRowRangeFilter.RowRange> rowRangeList = null;
-    long startTime = 0;
-    long endTime = 0;
-
-    StringBuilder sb = new StringBuilder();
-
-    final String rangeSwitch = "--range=";
-    final String startTimeArgKey = "--starttime=";
-    final String endTimeArgKey = "--endtime=";
-    final String expectedCountArg = "--expected-count=";
-
-    // First argument is table name, starting from second
-    for (int i = 1; i < args.length; i++) {
-      if (args[i].startsWith(rangeSwitch)) {
-        try {
-          rowRangeList = parseRowRangeParameter(args[i], rangeSwitch);
-        } catch (IllegalArgumentException e) {
-          return null;
-        }
-        continue;
-      }
-      if (args[i].startsWith(startTimeArgKey)) {
-        startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
-        continue;
-      }
-      if (args[i].startsWith(endTimeArgKey)) {
-        endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
-        continue;
-      }
-      if (args[i].startsWith(expectedCountArg)) {
-        conf.setLong(EXPECTED_COUNT_KEY,
-            Long.parseLong(args[i].substring(expectedCountArg.length())));
-        continue;
-      }
-      // if no switch, assume column names
-      sb.append(args[i]);
-      sb.append(" ");
-    }
-    if (endTime < startTime) {
-      printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
-      return null;
-    }
-
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJarByClass(RowCounter.class);
-    Scan scan = new Scan();
-    scan.setCacheBlocks(false);
-    setScanFilter(scan, rowRangeList);
-    if (sb.length() > 0) {
-      for (String columnName : sb.toString().trim().split(" ")) {
-        String family = StringUtils.substringBefore(columnName, ":");
-        String qualifier = StringUtils.substringAfter(columnName, ":");
-
-        if (StringUtils.isBlank(qualifier)) {
-          scan.addFamily(Bytes.toBytes(family));
-        }
-        else {
-          scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
-        }
-      }
-    }
-    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
-    job.setOutputFormatClass(NullOutputFormat.class);
-    TableMapReduceUtil.initTableMapperJob(tableName, scan,
-      RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
-    job.setNumReduceTasks(0);
-    return job;
-  }
-
-  private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(
-    String arg, String rangeSwitch) {
-    final String[] ranges = arg.substring(rangeSwitch.length()).split(";");
-    final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
-    for (String range : ranges) {
-      String[] startEnd = range.split(",", 2);
-      if (startEnd.length != 2 || startEnd[1].contains(",")) {
-        printUsage("Please specify range in such format as \"--range=a,b\" " +
-            "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
-        throw new IllegalArgumentException("Wrong range specification: " + range);
-      }
-      String startKey = startEnd[0];
-      String endKey = startEnd[1];
-      rangeList.add(new MultiRowRangeFilter.RowRange(
-        Bytes.toBytesBinary(startKey), true,
-        Bytes.toBytesBinary(endKey), false));
-    }
-    return rangeList;
-  }
-
-  /**
-   * Sets filter {@link FilterBase} to the {@link Scan} instance.
-   * If provided rowRangeList contains more than one element,
-   * method sets filter which is instance of {@link MultiRowRangeFilter}.
-   * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}.
-   * If rowRangeList contains exactly one element, startRow and stopRow are set to the scan.
-   * @param scan
-   * @param rowRangeList
-   */
-  private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) {
-    final int size = rowRangeList == null ? 0 : rowRangeList.size();
-    if (size <= 1) {
-      scan.setFilter(new FirstKeyOnlyFilter());
-    }
-    if (size == 1) {
-      MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
-      scan.setStartRow(range.getStartRow()); //inclusive
-      scan.setStopRow(range.getStopRow());   //exclusive
-    } else if (size > 1) {
-      scan.setFilter(new MultiRowRangeFilter(rowRangeList));
-    }
-  }
-
-  /*
-   * @param errorMessage Can attach a message when error occurs.
-   */
-  private static void printUsage(String errorMessage) {
-    System.err.println("ERROR: " + errorMessage);
-    printUsage();
-  }
-
-  /**
-   * Prints usage without error message.
-   * Note that we don't document --expected-count, because it's intended for test.
-   */
-  private static void printUsage() {
-    System.err.println("Usage: RowCounter [options] <tablename> " +
-        "[--starttime=[start] --endtime=[end] " +
-        "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
-    System.err.println("For performance consider the following options:\n"
-        + "-Dhbase.client.scanner.caching=100\n"
-        + "-Dmapreduce.map.speculative=false");
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 1) {
-      printUsage("Wrong number of parameters: " + args.length);
-      return -1;
-    }
-    Job job = createSubmittableJob(getConf(), args);
-    if (job == null) {
-      return -1;
-    }
-    boolean success = job.waitForCompletion(true);
-    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
-    if (success && expectedCount != -1) {
-      final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
-      success = expectedCount == counter.getValue();
-      if (!success) {
-        LOG.error("Failing job because count of '" + counter.getValue() +
-            "' does not match expected count of '" + expectedCount + "'");
-      }
-    }
-    return (success ? 0 : 1);
-  }
-
-  /**
-   * Main entry point.
-   * @param args The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
-    System.exit(errCode);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
deleted file mode 100644
index 4ba1088..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * A partitioner that takes start and end keys and uses bigdecimal to figure
- * which reduce a key belongs to.  Pass the start and end
- * keys in the Configuration using <code>hbase.simpletotalorder.start</code>
- * and <code>hbase.simpletotalorder.end</code>.  The end key needs to be
- * exclusive; i.e. one larger than the biggest key in your key space.
- * You may be surprised at how this class partitions the space; it may not
- * align with preconceptions; e.g. a start key of zero and an end key of 100
- * divided in ten will not make regions whose range is 0-10, 10-20, and so on.
- * Make your own partitioner if you need the region spacing to come out a
- * particular way.
- * @param <VALUE>
- * @see #START
- * @see #END
- */
-@InterfaceAudience.Public
-public class SimpleTotalOrderPartitioner<VALUE> extends Partitioner<ImmutableBytesWritable, VALUE>
-implements Configurable {
-  private final static Log LOG = LogFactory.getLog(SimpleTotalOrderPartitioner.class);
-
-  @Deprecated
-  public static final String START = "hbase.simpletotalorder.start";
-  @Deprecated
-  public static final String END = "hbase.simpletotalorder.end";
-  
-  static final String START_BASE64 = "hbase.simpletotalorder.start.base64";
-  static final String END_BASE64 = "hbase.simpletotalorder.end.base64";
-  
-  private Configuration c;
-  private byte [] startkey;
-  private byte [] endkey;
-  private byte [][] splits;
-  private int lastReduces = -1;
-
-  public static void setStartKey(Configuration conf, byte[] startKey) {
-    conf.set(START_BASE64, Base64.encodeBytes(startKey));
-  }
-  
-  public static void setEndKey(Configuration conf, byte[] endKey) {
-    conf.set(END_BASE64, Base64.encodeBytes(endKey));
-  }
-  
-  @SuppressWarnings("deprecation")
-  static byte[] getStartKey(Configuration conf) {
-    return getKeyFromConf(conf, START_BASE64, START);
-  }
-  
-  @SuppressWarnings("deprecation")
-  static byte[] getEndKey(Configuration conf) {
-    return getKeyFromConf(conf, END_BASE64, END);
-  }
-  
-  private static byte[] getKeyFromConf(Configuration conf,
-      String base64Key, String deprecatedKey) {
-    String encoded = conf.get(base64Key);
-    if (encoded != null) {
-      return Base64.decode(encoded);
-    }
-    String oldStyleVal = conf.get(deprecatedKey);
-    if (oldStyleVal == null) {
-      return null;
-    }
-    LOG.warn("Using deprecated configuration " + deprecatedKey +
-        " - please use static accessor methods instead.");
-    return Bytes.toBytesBinary(oldStyleVal);
-  }
-  
-  @Override
-  public int getPartition(final ImmutableBytesWritable key, final VALUE value,
-      final int reduces) {
-    if (reduces == 1) return 0;
-    if (this.lastReduces != reduces) {
-      this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
-      for (int i = 0; i < splits.length; i++) {
-        LOG.info(Bytes.toStringBinary(splits[i]));
-      }
-      this.lastReduces = reduces;
-    }
-    int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
-      key.getLength());
-    // Below code is from hfile index search.
-    if (pos < 0) {
-      pos++;
-      pos *= -1;
-      if (pos == 0) {
-        // falls before the beginning of the file.
-        throw new RuntimeException("Key outside start/stop range: " +
-          key.toString());
-      }
-      pos--;
-    }
-    return pos;
-  }
-
-  @Override
-  public Configuration getConf() {
-    return this.c;
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    this.c = conf;
-    this.startkey = getStartKey(conf);
-    this.endkey = getEndKey(conf);
-    if (startkey == null || endkey == null) {
-      throw new RuntimeException(this.getClass() + " not configured");
-    }
-    LOG.info("startkey=" + Bytes.toStringBinary(startkey) +
-        ", endkey=" + Bytes.toStringBinary(endkey));
-    // Reset last reduces count on change of Start / End key
-    this.lastReduces = -1;
-  }
-}

[04/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
deleted file mode 100644
index 694a359..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestMultithreadedTableMapper {
-  private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
-  private static final HBaseTestingUtility UTIL =
-      new HBaseTestingUtility();
-  static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
-  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
-  static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
-  static final int    NUMBER_OF_THREADS = 10;
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    // Up the handlers; this test needs more than usual.
-    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
-    UTIL.startMiniCluster();
-    Table table =
-        UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
-            OUTPUT_FAMILY });
-    UTIL.loadTable(table, INPUT_FAMILY, false);
-    UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME);
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Pass the given key and processed record reduce
-   */
-  public static class ProcessContentsMapper
-  extends TableMapper<ImmutableBytesWritable, Put> {
-
-    /**
-     * Pass the key, and reversed value to reduce
-     *
-     * @param key
-     * @param value
-     * @param context
-     * @throws IOException
-     */
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-        Context context)
-            throws IOException, InterruptedException {
-      if (value.size() != 1) {
-        throw new IOException("There should only be one input column");
-      }
-      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
-      cf = value.getMap();
-      if(!cf.containsKey(INPUT_FAMILY)) {
-        throw new IOException("Wrong input columns. Missing: '" +
-            Bytes.toString(INPUT_FAMILY) + "'.");
-      }
-      // Get the original value and reverse it
-      String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
-      StringBuilder newValue = new StringBuilder(originalValue);
-      newValue.reverse();
-      // Now set the value to be collected
-      Put outval = new Put(key.get());
-      outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
-      context.write(key, outval);
-    }
-  }
-
-  /**
-   * Test multithreadedTableMappper map/reduce against a multi-region table
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testMultithreadedTableMapper()
-      throws IOException, InterruptedException, ClassNotFoundException {
-    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
-  }
-
-  private void runTestOnTable(Table table)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    Job job = null;
-    try {
-      LOG.info("Before map/reduce startup");
-      job = new Job(table.getConfiguration(), "process column contents");
-      job.setNumReduceTasks(1);
-      Scan scan = new Scan();
-      scan.addFamily(INPUT_FAMILY);
-      TableMapReduceUtil.initTableMapperJob(
-          table.getName(), scan,
-          MultithreadedTableMapper.class, ImmutableBytesWritable.class,
-          Put.class, job);
-      MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
-      MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
-      TableMapReduceUtil.initTableReducerJob(
-          table.getName().getNameAsString(),
-          IdentityTableReducer.class, job);
-      FileOutputFormat.setOutputPath(job, new Path("test"));
-      LOG.info("Started " + table.getName());
-      assertTrue(job.waitForCompletion(true));
-      LOG.info("After map/reduce completion");
-      // verify map-reduce results
-      verify(table.getName());
-    } finally {
-      table.close();
-      if (job != null) {
-        FileUtil.fullyDelete(
-            new File(job.getConfiguration().get("hadoop.tmp.dir")));
-      }
-    }
-  }
-
-  private void verify(TableName tableName) throws IOException {
-    Table table = UTIL.getConnection().getTable(tableName);
-    boolean verified = false;
-    long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        LOG.info("Verification attempt #" + i);
-        verifyAttempt(table);
-        verified = true;
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty.  Presume its because updates came in
-        // after the scanner had been opened.  Wait a while and retry.
-        LOG.debug("Verification attempt failed: " + e.getMessage());
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    assertTrue(verified);
-    table.close();
-  }
-
-  /**
-   * Looks at every value of the mapreduce output and verifies that indeed
-   * the values have been reversed.
-   *
-   * @param table Table to scan.
-   * @throws IOException
-   * @throws NullPointerException if we failed to find a cell value
-   */
-  private void verifyAttempt(final Table table)
-      throws IOException, NullPointerException {
-    Scan scan = new Scan();
-    scan.addFamily(INPUT_FAMILY);
-    scan.addFamily(OUTPUT_FAMILY);
-    ResultScanner scanner = table.getScanner(scan);
-    try {
-      Iterator<Result> itr = scanner.iterator();
-      assertTrue(itr.hasNext());
-      while(itr.hasNext()) {
-        Result r = itr.next();
-        if (LOG.isDebugEnabled()) {
-          if (r.size() > 2 ) {
-            throw new IOException("Too many results, expected 2 got " +
-                r.size());
-          }
-        }
-        byte[] firstValue = null;
-        byte[] secondValue = null;
-        int count = 0;
-        for(Cell kv : r.listCells()) {
-          if (count == 0) {
-            firstValue = CellUtil.cloneValue(kv);
-          }else if (count == 1) {
-            secondValue = CellUtil.cloneValue(kv);
-          }else if (count == 2) {
-            break;
-          }
-          count++;
-        }
-        String first = "";
-        if (firstValue == null) {
-          throw new NullPointerException(Bytes.toString(r.getRow()) +
-              ": first value is null");
-        }
-        first = Bytes.toString(firstValue);
-        String second = "";
-        if (secondValue == null) {
-          throw new NullPointerException(Bytes.toString(r.getRow()) +
-              ": second value is null");
-        }
-        byte[] secondReversed = new byte[secondValue.length];
-        for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
-          secondReversed[i] = secondValue[j];
-        }
-        second = Bytes.toString(secondReversed);
-        if (first.compareTo(second) != 0) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("second key is not the reverse of first. row=" +
-                Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
-                ", second value=" + second);
-          }
-          fail();
-        }
-      }
-    } finally {
-      scanner.close();
-    }
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
deleted file mode 100644
index 3b84e2d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ /dev/null
@@ -1,400 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Test the rowcounter map reduce job.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestRowCounter {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private static final Log LOG = LogFactory.getLog(TestRowCounter.class);
-  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  private final static String TABLE_NAME = "testRowCounter";
-  private final static String TABLE_NAME_TS_RANGE = "testRowCounter_ts_range";
-  private final static String COL_FAM = "col_fam";
-  private final static String COL1 = "c1";
-  private final static String COL2 = "c2";
-  private final static String COMPOSITE_COLUMN = "C:A:A";
-  private final static int TOTAL_ROWS = 10;
-  private final static int ROWS_WITH_ONE_COL = 2;
-
-  /**
-   * @throws java.lang.Exception
-   */
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    TEST_UTIL.startMiniCluster();
-    Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes(COL_FAM));
-    writeRows(table, TOTAL_ROWS, ROWS_WITH_ONE_COL);
-    table.close();
-  }
-
-  /**
-   * @throws java.lang.Exception
-   */
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Test a case when no column was specified in command line arguments.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterNoColumn() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME
-    };
-    runRowCount(args, 10);
-  }
-
-  /**
-   * Test a case when the column specified in command line arguments is
-   * exclusive for few rows.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterExclusiveColumn() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, COL_FAM + ":" + COL1
-    };
-    runRowCount(args, 8);
-  }
-
-  /**
-   * Test a case when the column specified in command line arguments is
-   * one for which the qualifier contains colons.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterColumnWithColonInQualifier() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
-    };
-    runRowCount(args, 8);
-  }
-
-  /**
-   * Test a case when the column specified in command line arguments is not part
-   * of first KV for a row.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterHiddenColumn() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, COL_FAM + ":" + COL2
-    };
-    runRowCount(args, 10);
-  }
-
-
-  /**
-   * Test a case when the column specified in command line arguments is
-   * exclusive for few rows and also a row range filter is specified
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterColumnAndRowRange() throws Exception {
-    String[] args = new String[] {
-            TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
-    };
-    runRowCount(args, 8);
-  }
-
-  /**
-   * Test a case when a range is specified with single range of start-end keys
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterRowSingleRange() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, "--range=\\x00row1,\\x00row3"
-    };
-    runRowCount(args, 2);
-  }
-
-  /**
-   * Test a case when a range is specified with single range with end key only
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterRowSingleRangeUpperBound() throws Exception {
-    String[] args = new String[] {
-      TABLE_NAME, "--range=,\\x00row3"
-    };
-    runRowCount(args, 3);
-  }
-
-  /**
-   * Test a case when a range is specified with two ranges where one range is with end key only
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterRowMultiRangeUpperBound() throws Exception {
-    String[] args = new String[] {
-      TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7"
-    };
-    runRowCount(args, 5);
-  }
-
-  /**
-   * Test a case when a range is specified with multiple ranges of start-end keys
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterRowMultiRange() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
-    };
-    runRowCount(args, 5);
-  }
-
-  /**
-   * Test a case when a range is specified with multiple ranges of start-end keys;
-   * one range is filled, another two are not
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterRowMultiEmptyRange() throws Exception {
-    String[] args = new String[] {
-        TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
-    };
-    runRowCount(args, 2);
-  }
-
-  @Test
-  public void testRowCounter10kRowRange() throws Exception {
-    String tableName = TABLE_NAME + "10k";
-
-    try (Table table = TEST_UTIL.createTable(
-      TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
-      writeRows(table, 10000, 0);
-    }
-    String[] args = new String[] {
-      tableName, "--range=\\x00row9872,\\x00row9875"
-    };
-    runRowCount(args, 3);
-  }
-
-  /**
-   * Test a case when the timerange is specified with --starttime and --endtime options
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testRowCounterTimeRange() throws Exception {
-    final byte[] family = Bytes.toBytes(COL_FAM);
-    final byte[] col1 = Bytes.toBytes(COL1);
-    Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
-    Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
-    Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
-
-    long ts;
-
-    // clean up content of TABLE_NAME
-    Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_TS_RANGE), Bytes.toBytes(COL_FAM));
-
-    ts = System.currentTimeMillis();
-    put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
-    table.put(put1);
-    Thread.sleep(100);
-
-    ts = System.currentTimeMillis();
-    put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
-    put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
-    table.put(put2);
-    table.put(put3);
-    table.close();
-
-    String[] args = new String[] {
-        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
-        "--starttime=" + 0,
-        "--endtime=" + ts
-    };
-    runRowCount(args, 1);
-
-    args = new String[] {
-        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
-        "--starttime=" + 0,
-        "--endtime=" + (ts - 10)
-    };
-    runRowCount(args, 1);
-
-    args = new String[] {
-        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
-        "--starttime=" + ts,
-        "--endtime=" + (ts + 1000)
-    };
-    runRowCount(args, 2);
-
-    args = new String[] {
-        TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
-        "--starttime=" + (ts - 30 * 1000),
-        "--endtime=" + (ts + 30 * 1000),
-    };
-    runRowCount(args, 3);
-  }
-
-  /**
-   * Run the RowCounter map reduce job and verify the row count.
-   *
-   * @param args the command line arguments to be used for rowcounter job.
-   * @param expectedCount the expected row count (result of map reduce job).
-   * @throws Exception
-   */
-  private void runRowCount(String[] args, int expectedCount) throws Exception {
-    Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
-    long start = System.currentTimeMillis();
-    job.waitForCompletion(true);
-    long duration = System.currentTimeMillis() - start;
-    LOG.debug("row count duration (ms): " + duration);
-    assertTrue(job.isSuccessful());
-    Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
-    assertEquals(expectedCount, counter.getValue());
-  }
-
-  /**
-   * Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have
-   * two columns, Few have one.
-   *
-   * @param table
-   * @throws IOException
-   */
-  private static void writeRows(Table table, int totalRows, int rowsWithOneCol) throws IOException {
-    final byte[] family = Bytes.toBytes(COL_FAM);
-    final byte[] value = Bytes.toBytes("abcd");
-    final byte[] col1 = Bytes.toBytes(COL1);
-    final byte[] col2 = Bytes.toBytes(COL2);
-    final byte[] col3 = Bytes.toBytes(COMPOSITE_COLUMN);
-    ArrayList<Put> rowsUpdate = new ArrayList<>();
-    // write few rows with two columns
-    int i = 0;
-    for (; i < totalRows - rowsWithOneCol; i++) {
-      // Use binary rows values to test for HBASE-15287.
-      byte[] row = Bytes.toBytesBinary("\\x00row" + i);
-      Put put = new Put(row);
-      put.addColumn(family, col1, value);
-      put.addColumn(family, col2, value);
-      put.addColumn(family, col3, value);
-      rowsUpdate.add(put);
-    }
-
-    // write few rows with only one column
-    for (; i < totalRows; i++) {
-      byte[] row = Bytes.toBytes("row" + i);
-      Put put = new Put(row);
-      put.addColumn(family, col2, value);
-      rowsUpdate.add(put);
-    }
-    table.put(rowsUpdate);
-  }
-
-  /**
-   * test main method. Import should print help and call System.exit
-   */
-  @Test
-  public void testImportMain() throws Exception {
-    PrintStream oldPrintStream = System.err;
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    String[] args = {};
-    System.setErr(new PrintStream(data));
-    try {
-      System.setErr(new PrintStream(data));
-
-      try {
-        RowCounter.main(args);
-        fail("should be SecurityException");
-      } catch (SecurityException e) {
-        assertEquals(-1, newSecurityManager.getExitCode());
-        assertTrue(data.toString().contains("Wrong number of parameters:"));
-        assertTrue(data.toString().contains(
-            "Usage: RowCounter [options] <tablename> " +
-            "[--starttime=[start] --endtime=[end] " +
-            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
-            "[<column1> <column2>...]"));
-        assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
-        assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
-      }
-      data.reset();
-      try {
-        args = new String[2];
-        args[0] = "table";
-        args[1] = "--range=1";
-        RowCounter.main(args);
-        fail("should be SecurityException");
-      } catch (SecurityException e) {
-        assertEquals(-1, newSecurityManager.getExitCode());
-        assertTrue(data.toString().contains(
-            "Please specify range in such format as \"--range=a,b\" or, with only one boundary," +
-            " \"--range=,b\" or \"--range=a,\""));
-        assertTrue(data.toString().contains(
-            "Usage: RowCounter [options] <tablename> " +
-            "[--starttime=[start] --endtime=[end] " +
-            "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
-            "[<column1> <column2>...]"));
-      }
-
-    } finally {
-      System.setErr(oldPrintStream);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
deleted file mode 100644
index 78fddbc..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-/**
- * Reruns TestLoadIncrementalHFiles using LoadIncrementalHFiles in secure mode.
- * This suite is unable to verify the security handoff/turnover
- * as miniCluster is running as system user thus has root privileges
- * and delegation tokens don't seem to work on miniDFS.
- *
- * Thus SecureBulkload can only be completely verified by running
- * integration tests against a secure cluster. This suite is still
- * invaluable as it verifies the other mechanisms that need to be
- * supported as part of a LoadIncrementalFiles call.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestSecureLoadIncrementalHFiles extends  TestLoadIncrementalHFiles{
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    // set the always on security provider
-    UserProvider.setUserProviderForTesting(util.getConfiguration(),
-      HadoopSecurityEnabledUserProviderForTesting.class);
-    // setup configuration
-    SecureTestUtil.enableSecurity(util.getConfiguration());
-    util.getConfiguration().setInt(
-        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
-        MAX_FILES_PER_REGION_PER_FAMILY);
-    // change default behavior so that tag values are returned with normal rpcs
-    util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
-        KeyValueCodecWithTags.class.getCanonicalName());
-
-    util.startMiniCluster();
-
-    // Wait for the ACL table to become available
-    util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
-
-    setupNamespace();
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
deleted file mode 100644
index 0e877ad..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-
-/**
- * Reruns TestSecureLoadIncrementalHFilesSplitRecovery
- * using LoadIncrementalHFiles in secure mode.
- * This suite is unable to verify the security handoff/turnove
- * as miniCluster is running as system user thus has root privileges
- * and delegation tokens don't seem to work on miniDFS.
- *
- * Thus SecureBulkload can only be completely verified by running
- * integration tests against a secure cluster. This suite is still
- * invaluable as it verifies the other mechanisms that need to be
- * supported as part of a LoadIncrementalFiles call.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestSecureLoadIncrementalHFilesSplitRecovery extends TestLoadIncrementalHFilesSplitRecovery {
-
-  //This "overrides" the parent static method
-  //make sure they are in sync
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    util = new HBaseTestingUtility();
-    // set the always on security provider
-    UserProvider.setUserProviderForTesting(util.getConfiguration(),
-      HadoopSecurityEnabledUserProviderForTesting.class);
-    // setup configuration
-    SecureTestUtil.enableSecurity(util.getConfiguration());
-
-    util.startMiniCluster();
-
-    // Wait for the ACL table to become available
-    util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
-  }
-
-  //Disabling this test as it does not work in secure mode
-  @Test (timeout=180000)
-  @Override
-  public void testBulkLoadPhaseFailure() {
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
deleted file mode 100644
index 0f41f33..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.experimental.categories.Category;
-
-import org.junit.Test;
-
-/**
- * Test of simple partitioner.
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestSimpleTotalOrderPartitioner {
-  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  Configuration conf = TEST_UTIL.getConfiguration();
-  
-  @Test
-  public void testSplit() throws Exception {
-    String start = "a";
-    String end = "{";
-    SimpleTotalOrderPartitioner<byte []> p = new SimpleTotalOrderPartitioner<>();
-    
-    this.conf.set(SimpleTotalOrderPartitioner.START, start);
-    this.conf.set(SimpleTotalOrderPartitioner.END, end);
-    p.setConf(this.conf);
-    ImmutableBytesWritable c = new ImmutableBytesWritable(Bytes.toBytes("c"));
-    // If one reduce, partition should be 0.
-    int partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 1);
-    assertEquals(0, partition);
-    // If two reduces, partition should be 0.
-    partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 2);
-    assertEquals(0, partition);
-    // Divide in 3.
-    partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 3);
-    assertEquals(0, partition);
-    ImmutableBytesWritable q = new ImmutableBytesWritable(Bytes.toBytes("q"));
-    partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 2);
-    assertEquals(1, partition);
-    partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 3);
-    assertEquals(2, partition);
-    // What about end and start keys.
-    ImmutableBytesWritable startBytes =
-      new ImmutableBytesWritable(Bytes.toBytes(start));
-    partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
-    assertEquals(0, partition);
-    partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
-    assertEquals(0, partition);
-    ImmutableBytesWritable endBytes =
-      new ImmutableBytesWritable(Bytes.toBytes("z"));
-    partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
-    assertEquals(1, partition);
-    partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
-    assertEquals(2, partition);
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
deleted file mode 100644
index 79b2cf0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counters;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-
-/**
- * Basic test for the SyncTable M/R tool
- */
-@Category(LargeTests.class)
-public class TestSyncTable {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
-  
-  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
-  @Rule
-  public TestName name = new TestName();
-  
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    TEST_UTIL.startMiniCluster(3);
-  }
-  
-  @AfterClass
-  public static void afterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-  
-  private static byte[][] generateSplits(int numRows, int numRegions) {
-    byte[][] splitRows = new byte[numRegions-1][];
-    for (int i = 1; i < numRegions; i++) {
-      splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
-    }
-    return splitRows;
-  }
-  
-  @Test
-  public void testSyncTable() throws Exception {
-    final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source");
-    final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target");
-    Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
-    
-    writeTestData(sourceTableName, targetTableName);
-    hashSourceTable(sourceTableName, testDir);
-    Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
-    assertEqualTables(90, sourceTableName, targetTableName);
-    
-    assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
-    assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
-    assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
-    assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
-    assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
-    assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
-    
-    TEST_UTIL.deleteTable(sourceTableName);
-    TEST_UTIL.deleteTable(targetTableName);
-    TEST_UTIL.cleanupDataTestDirOnTestFS();
-  }
-
-  private void assertEqualTables(int expectedRows, TableName sourceTableName,
-      TableName targetTableName) throws Exception {
-    Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName);
-    Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName);
-    
-    ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
-    ResultScanner targetScanner = targetTable.getScanner(new Scan());
-    
-    for (int i = 0; i < expectedRows; i++) {
-      Result sourceRow = sourceScanner.next();
-      Result targetRow = targetScanner.next();
-      
-      LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
-          + " cells:" + sourceRow);
-      LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
-          + " cells:" + targetRow);
-      
-      if (sourceRow == null) {
-        Assert.fail("Expected " + expectedRows
-            + " source rows but only found " + i); 
-      }
-      if (targetRow == null) {
-        Assert.fail("Expected " + expectedRows
-            + " target rows but only found " + i); 
-      }
-      Cell[] sourceCells = sourceRow.rawCells();
-      Cell[] targetCells = targetRow.rawCells();
-      if (sourceCells.length != targetCells.length) {
-        LOG.debug("Source cells: " + Arrays.toString(sourceCells));
-        LOG.debug("Target cells: " + Arrays.toString(targetCells));
-        Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
-            + " has " + sourceCells.length
-            + " cells in source table but " + targetCells.length
-            + " cells in target table");
-      }
-      for (int j = 0; j < sourceCells.length; j++) {
-        Cell sourceCell = sourceCells[j];
-        Cell targetCell = targetCells[j];
-        try {
-          if (!CellUtil.matchingRow(sourceCell, targetCell)) {
-            Assert.fail("Rows don't match");
-          }
-          if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
-            Assert.fail("Families don't match");
-          }
-          if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
-            Assert.fail("Qualifiers don't match");
-          }
-          if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
-            Assert.fail("Timestamps don't match");
-          }
-          if (!CellUtil.matchingValue(sourceCell, targetCell)) {
-            Assert.fail("Values don't match");
-          }
-        } catch (Throwable t) {
-          LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
-          Throwables.propagate(t);
-        }
-      }
-    }
-    Result sourceRow = sourceScanner.next();
-    if (sourceRow != null) {
-      Assert.fail("Source table has more than " + expectedRows
-          + " rows.  Next row: " + Bytes.toInt(sourceRow.getRow()));
-    }
-    Result targetRow = targetScanner.next();
-    if (targetRow != null) {
-      Assert.fail("Target table has more than " + expectedRows
-          + " rows.  Next row: " + Bytes.toInt(targetRow.getRow()));
-    }
-    sourceScanner.close();
-    targetScanner.close();
-    sourceTable.close();
-    targetTable.close();
-  }
-
-  private Counters syncTables(TableName sourceTableName, TableName targetTableName,
-      Path testDir) throws Exception {
-    SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
-    int code = syncTable.run(new String[] { 
-        testDir.toString(),
-        sourceTableName.getNameAsString(),
-        targetTableName.getNameAsString()
-        });
-    assertEquals("sync table job failed", 0, code);
-    
-    LOG.info("Sync tables completed");
-    return syncTable.counters;
-  }
-
-  private void hashSourceTable(TableName sourceTableName, Path testDir)
-      throws Exception, IOException {
-    int numHashFiles = 3;
-    long batchSize = 100;  // should be 2 batches per region
-    int scanBatch = 1;
-    HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
-    int code = hashTable.run(new String[] { 
-        "--batchsize=" + batchSize,
-        "--numhashfiles=" + numHashFiles,
-        "--scanbatch=" + scanBatch,
-        sourceTableName.getNameAsString(),
-        testDir.toString()});
-    assertEquals("hash table job failed", 0, code);
-    
-    FileSystem fs = TEST_UTIL.getTestFileSystem();
-    
-    HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
-    assertEquals(sourceTableName.getNameAsString(), tableHash.tableName);
-    assertEquals(batchSize, tableHash.batchSize);
-    assertEquals(numHashFiles, tableHash.numHashFiles);
-    assertEquals(numHashFiles - 1, tableHash.partitions.size());
-
-    LOG.info("Hash table completed");
-  }
-
-  private void writeTestData(TableName sourceTableName, TableName targetTableName)
-      throws Exception {
-    final byte[] family = Bytes.toBytes("family");
-    final byte[] column1 = Bytes.toBytes("c1");
-    final byte[] column2 = Bytes.toBytes("c2");
-    final byte[] value1 = Bytes.toBytes("val1");
-    final byte[] value2 = Bytes.toBytes("val2");
-    final byte[] value3 = Bytes.toBytes("val3");
-    
-    int numRows = 100;
-    int sourceRegions = 10;
-    int targetRegions = 6;
-    
-    Table sourceTable = TEST_UTIL.createTable(sourceTableName,
-        family, generateSplits(numRows, sourceRegions));
-
-    Table targetTable = TEST_UTIL.createTable(targetTableName,
-        family, generateSplits(numRows, targetRegions));
-
-    long timestamp = 1430764183454L;
-
-    int rowIndex = 0;
-    // a bunch of identical rows
-    for (; rowIndex < 40; rowIndex++) {
-      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
-      sourcePut.addColumn(family, column1, timestamp, value1);
-      sourcePut.addColumn(family, column2, timestamp, value2);
-      sourceTable.put(sourcePut);
-     
-      Put targetPut = new Put(Bytes.toBytes(rowIndex));
-      targetPut.addColumn(family, column1, timestamp, value1);
-      targetPut.addColumn(family, column2, timestamp, value2);
-      targetTable.put(targetPut);
-    }
-    // some rows only in the source table
-    // ROWSWITHDIFFS: 10
-    // TARGETMISSINGROWS: 10
-    // TARGETMISSINGCELLS: 20
-    for (; rowIndex < 50; rowIndex++) {
-      Put put = new Put(Bytes.toBytes(rowIndex));
-      put.addColumn(family, column1, timestamp, value1);
-      put.addColumn(family, column2, timestamp, value2);
-      sourceTable.put(put);
-    }
-    // some rows only in the target table
-    // ROWSWITHDIFFS: 10
-    // SOURCEMISSINGROWS: 10
-    // SOURCEMISSINGCELLS: 20
-    for (; rowIndex < 60; rowIndex++) {
-      Put put = new Put(Bytes.toBytes(rowIndex));
-      put.addColumn(family, column1, timestamp, value1);
-      put.addColumn(family, column2, timestamp, value2);
-      targetTable.put(put);
-    }
-    // some rows with 1 missing cell in target table
-    // ROWSWITHDIFFS: 10
-    // TARGETMISSINGCELLS: 10
-    for (; rowIndex < 70; rowIndex++) {
-      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
-      sourcePut.addColumn(family, column1, timestamp, value1);
-      sourcePut.addColumn(family, column2, timestamp, value2);
-      sourceTable.put(sourcePut);
-
-      Put targetPut = new Put(Bytes.toBytes(rowIndex));
-      targetPut.addColumn(family, column1, timestamp, value1);
-      targetTable.put(targetPut);
-    }
-    // some rows with 1 missing cell in source table
-    // ROWSWITHDIFFS: 10
-    // SOURCEMISSINGCELLS: 10
-    for (; rowIndex < 80; rowIndex++) {
-      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
-      sourcePut.addColumn(family, column1, timestamp, value1);
-      sourceTable.put(sourcePut);
-
-      Put targetPut = new Put(Bytes.toBytes(rowIndex));
-      targetPut.addColumn(family, column1, timestamp, value1);
-      targetPut.addColumn(family, column2, timestamp, value2);
-      targetTable.put(targetPut);
-    }
-    // some rows differing only in timestamp
-    // ROWSWITHDIFFS: 10
-    // SOURCEMISSINGCELLS: 20
-    // TARGETMISSINGCELLS: 20
-    for (; rowIndex < 90; rowIndex++) {
-      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
-      sourcePut.addColumn(family, column1, timestamp, column1);
-      sourcePut.addColumn(family, column2, timestamp, value2);
-      sourceTable.put(sourcePut);
-
-      Put targetPut = new Put(Bytes.toBytes(rowIndex));
-      targetPut.addColumn(family, column1, timestamp+1, column1);
-      targetPut.addColumn(family, column2, timestamp-1, value2);
-      targetTable.put(targetPut);
-    }
-    // some rows with different values
-    // ROWSWITHDIFFS: 10
-    // DIFFERENTCELLVALUES: 20
-    for (; rowIndex < numRows; rowIndex++) {
-      Put sourcePut = new Put(Bytes.toBytes(rowIndex));
-      sourcePut.addColumn(family, column1, timestamp, value1);
-      sourcePut.addColumn(family, column2, timestamp, value2);
-      sourceTable.put(sourcePut);
-      
-      Put targetPut = new Put(Bytes.toBytes(rowIndex));
-      targetPut.addColumn(family, column1, timestamp, value3);
-      targetPut.addColumn(family, column2, timestamp, value3);
-      targetTable.put(targetPut);
-    }
-    
-    sourceTable.close();
-    targetTable.close();
-  }
-  
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
deleted file mode 100644
index 4693519..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
+++ /dev/null
@@ -1,481 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.anyObject;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.spy;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * This tests the TableInputFormat and its recovery semantics
- *
- */
-@Category(LargeTests.class)
-public class TestTableInputFormat {
-
-  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
-
-  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  private static MiniMRCluster mrCluster;
-  static final byte[] FAMILY = Bytes.toBytes("family");
-
-  private static final byte[][] columns = new byte[][] { FAMILY };
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void before() throws IOException {
-    LOG.info("before");
-    UTIL.ensureSomeRegionServersAvailable(1);
-    LOG.info("before done");
-  }
-
-  /**
-   * Setup a table with two rows and values.
-   *
-   * @param tableName
-   * @return
-   * @throws IOException
-   */
-  public static Table createTable(byte[] tableName) throws IOException {
-    return createTable(tableName, new byte[][] { FAMILY });
-  }
-
-  /**
-   * Setup a table with two rows and values per column family.
-   *
-   * @param tableName
-   * @return
-   * @throws IOException
-   */
-  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
-    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
-    Put p = new Put("aaa".getBytes());
-    for (byte[] family : families) {
-      p.addColumn(family, null, "value aaa".getBytes());
-    }
-    table.put(p);
-    p = new Put("bbb".getBytes());
-    for (byte[] family : families) {
-      p.addColumn(family, null, "value bbb".getBytes());
-    }
-    table.put(p);
-    return table;
-  }
-
-  /**
-   * Verify that the result and key have expected values.
-   *
-   * @param r
-   * @param key
-   * @param expectedKey
-   * @param expectedValue
-   * @return
-   */
-  static boolean checkResult(Result r, ImmutableBytesWritable key,
-      byte[] expectedKey, byte[] expectedValue) {
-    assertEquals(0, key.compareTo(expectedKey));
-    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
-    byte[] value = vals.values().iterator().next();
-    assertTrue(Arrays.equals(value, expectedValue));
-    return true; // if succeed
-  }
-
-  /**
-   * Create table data and run tests on specified htable using the
-   * o.a.h.hbase.mapreduce API.
-   *
-   * @param table
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  static void runTestMapreduce(Table table) throws IOException,
-      InterruptedException {
-    org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
-        new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
-    Scan s = new Scan();
-    s.setStartRow("aaa".getBytes());
-    s.setStopRow("zzz".getBytes());
-    s.addFamily(FAMILY);
-    trr.setScan(s);
-    trr.setHTable(table);
-
-    trr.initialize(null, null);
-    Result r = new Result();
-    ImmutableBytesWritable key = new ImmutableBytesWritable();
-
-    boolean more = trr.nextKeyValue();
-    assertTrue(more);
-    key = trr.getCurrentKey();
-    r = trr.getCurrentValue();
-    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
-
-    more = trr.nextKeyValue();
-    assertTrue(more);
-    key = trr.getCurrentKey();
-    r = trr.getCurrentValue();
-    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
-
-    // no more data
-    more = trr.nextKeyValue();
-    assertFalse(more);
-  }
-
-  /**
-   * Create a table that IOE's on first scanner next call
-   *
-   * @throws IOException
-   */
-  static Table createIOEScannerTable(byte[] name, final int failCnt)
-      throws IOException {
-    // build up a mock scanner stuff to fail the first time
-    Answer<ResultScanner> a = new Answer<ResultScanner>() {
-      int cnt = 0;
-
-      @Override
-      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
-        // first invocation return the busted mock scanner
-        if (cnt++ < failCnt) {
-          // create mock ResultScanner that always fails.
-          Scan scan = mock(Scan.class);
-          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
-          ResultScanner scanner = mock(ResultScanner.class);
-          // simulate TimeoutException / IOException
-          doThrow(new IOException("Injected exception")).when(scanner).next();
-          return scanner;
-        }
-
-        // otherwise return the real scanner.
-        return (ResultScanner) invocation.callRealMethod();
-      }
-    };
-
-    Table htable = spy(createTable(name));
-    doAnswer(a).when(htable).getScanner((Scan) anyObject());
-    return htable;
-  }
-
-  /**
-   * Create a table that throws a NotServingRegionException on first scanner 
-   * next call
-   *
-   * @throws IOException
-   */
-  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
-      throws IOException {
-    // build up a mock scanner stuff to fail the first time
-    Answer<ResultScanner> a = new Answer<ResultScanner>() {
-      int cnt = 0;
-
-      @Override
-      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
-        // first invocation return the busted mock scanner
-        if (cnt++ < failCnt) {
-          // create mock ResultScanner that always fails.
-          Scan scan = mock(Scan.class);
-          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
-          ResultScanner scanner = mock(ResultScanner.class);
-
-          invocation.callRealMethod(); // simulate NotServingRegionException 
-          doThrow(
-              new NotServingRegionException("Injected simulated TimeoutException"))
-              .when(scanner).next();
-          return scanner;
-        }
-
-        // otherwise return the real scanner.
-        return (ResultScanner) invocation.callRealMethod();
-      }
-    };
-
-    Table htable = spy(createTable(name));
-    doAnswer(a).when(htable).getScanner((Scan) anyObject());
-    return htable;
-  }
-
-  /**
-   * Run test assuming no errors using newer mapreduce api
-   *
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testTableRecordReaderMapreduce() throws IOException,
-      InterruptedException {
-    Table table = createTable("table1-mr".getBytes());
-    runTestMapreduce(table);
-  }
-
-  /**
-   * Run test assuming Scanner IOException failure using newer mapreduce api
-   * 
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testTableRecordReaderScannerFailMapreduce() throws IOException,
-      InterruptedException {
-    Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
-    runTestMapreduce(htable);
-  }
-
-  /**
-   * Run test assuming Scanner IOException failure using newer mapreduce api
-   * 
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  @Test(expected = IOException.class)
-  public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
-      InterruptedException {
-    Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
-    runTestMapreduce(htable);
-  }
-
-  /**
-   * Run test assuming NotServingRegionException using newer mapreduce api
-   * 
-   * @throws InterruptedException
-   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
-   */
-  @Test
-  public void testTableRecordReaderScannerTimeoutMapreduce()
-      throws IOException, InterruptedException {
-    Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
-    runTestMapreduce(htable);
-  }
-
-  /**
-   * Run test assuming NotServingRegionException using newer mapreduce api
-   * 
-   * @throws InterruptedException
-   * @throws org.apache.hadoop.hbase.NotServingRegionException
-   */
-  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
-  public void testTableRecordReaderScannerTimeoutMapreduceTwice()
-      throws IOException, InterruptedException {
-    Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
-    runTestMapreduce(htable);
-  }
-
-  /**
-   * Verify the example we present in javadocs on TableInputFormatBase
-   */
-  @Test
-  public void testExtensionOfTableInputFormatBase()
-      throws IOException, InterruptedException, ClassNotFoundException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
-    final Table htable = createTable(Bytes.toBytes("exampleTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleTIF.class);
-  }
-
-  @Test
-  public void testJobConfigurableExtensionOfTableInputFormatBase()
-      throws IOException, InterruptedException, ClassNotFoundException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
-        "using JobConfigurable.");
-    final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleJobConfigurableTIF.class);
-  }
-
-  @Test
-  public void testDeprecatedExtensionOfTableInputFormatBase()
-      throws IOException, InterruptedException, ClassNotFoundException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
-        "using the approach documented in 0.98.");
-    final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleDeprecatedTIF.class);
-  }
-
-  void testInputFormat(Class<? extends InputFormat> clazz)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
-    job.setInputFormatClass(clazz);
-    job.setOutputFormatClass(NullOutputFormat.class);
-    job.setMapperClass(ExampleVerifier.class);
-    job.setNumReduceTasks(0);
-
-    LOG.debug("submitting job.");
-    assertTrue("job failed!", job.waitForCompletion(true));
-    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
-    assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
-    assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
-    assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
-    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
-    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
-  }
-
-  public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
-
-    @Override
-    public void map(ImmutableBytesWritable key, Result value, Context context)
-        throws IOException {
-      for (Cell cell : value.listCells()) {
-        context.getCounter(TestTableInputFormat.class.getName() + ":row",
-            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
-            .increment(1l);
-        context.getCounter(TestTableInputFormat.class.getName() + ":family",
-            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
-            .increment(1l);
-        context.getCounter(TestTableInputFormat.class.getName() + ":value",
-            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
-            .increment(1l);
-      }
-    }
-
-  }
-
-  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
-
-    @Override
-    public void configure(JobConf job) {
-      try {
-        Connection connection = ConnectionFactory.createConnection(job);
-        Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
-        // mandatory
-        initializeTable(connection, exampleTable.getName());
-        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-          Bytes.toBytes("columnB") };
-        // optional
-        Scan scan = new Scan();
-        for (byte[] family : inputColumns) {
-          scan.addFamily(family);
-        }
-        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-        scan.setFilter(exampleFilter);
-        setScan(scan);
-      } catch (IOException exception) {
-        throw new RuntimeException("Failed to configure for job.", exception);
-      }
-    }
-
-  }
-
-
-  public static class ExampleJobConfigurableTIF extends TableInputFormatBase
-      implements JobConfigurable {
-
-    @Override
-    public void configure(JobConf job) {
-      try {
-        Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
-        TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
-        // mandatory
-        initializeTable(connection, tableName);
-        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-          Bytes.toBytes("columnB") };
-        //optional
-        Scan scan = new Scan();
-        for (byte[] family : inputColumns) {
-          scan.addFamily(family);
-        }
-        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-        scan.setFilter(exampleFilter);
-        setScan(scan);
-      } catch (IOException exception) {
-        throw new RuntimeException("Failed to initialize.", exception);
-      }
-    }
-  }
-
-
-  public static class ExampleTIF extends TableInputFormatBase {
-
-    @Override
-    protected void initialize(JobContext job) throws IOException {
-      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
-          job.getConfiguration()));
-      TableName tableName = TableName.valueOf("exampleTable");
-      // mandatory
-      initializeTable(connection, tableName);
-      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-        Bytes.toBytes("columnB") };
-      //optional
-      Scan scan = new Scan();
-      for (byte[] family : inputColumns) {
-        scan.addFamily(family);
-      }
-      Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-      scan.setFilter(exampleFilter);
-      setScan(scan);
-    }
-
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
deleted file mode 100644
index 699e773..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.*;
-
-import java.net.Inet6Address;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-@Category({SmallTests.class})
-public class TestTableInputFormatBase {
-  @Test
-  public void testTableInputFormatBaseReverseDNSForIPv6()
-      throws UnknownHostException {
-    String address = "ipv6.google.com";
-    String localhost = null;
-    InetAddress addr = null;
-    TableInputFormat inputFormat = new TableInputFormat();
-    try {
-      localhost = InetAddress.getByName(address).getCanonicalHostName();
-      addr = Inet6Address.getByName(address);
-    } catch (UnknownHostException e) {
-      // google.com is down, we can probably forgive this test.
-      return;
-    }
-    System.out.println("Should retrun the hostname for this host " +
-        localhost + " addr : " + addr);
-    String actualHostName = inputFormat.reverseDNS(addr);
-    assertEquals("Should retrun the hostname for this host. Expected : " +
-        localhost + " Actual : " + actualHostName, localhost, actualHostName);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
deleted file mode 100644
index 99b40b9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * TestTableInputFormatScan part 1.
- * @see TestTableInputFormatScanBase
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableInputFormatScan1 extends TestTableInputFormatScanBase {
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanEmptyToEmpty()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan(null, null, null);
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanEmptyToAPP()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan(null, "app", "apo");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanEmptyToBBA()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan(null, "bba", "baz");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanEmptyToBBB()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan(null, "bbb", "bba");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanEmptyToOPP()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan(null, "opp", "opo");
-  }
-
-  /**
-   * Tests a MR scan using specific number of mappers. The test table has 25 regions,
-   * and all region sizes are set as 0 as default. The average region size is 1 (the smallest
-   * positive). When we set hbase.mapreduce.input.ratio as -1, all regions will be cut into two
-   * MapRedcue input splits, the number of MR input splits should be 50; when we set hbase
-   * .mapreduce.input.ratio as 100, the sum of all region sizes is less then the average region
-   * size, all regions will be combined into 1 MapRedcue input split.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testGetSplits() throws IOException, InterruptedException, ClassNotFoundException {
-    testNumOfSplits("-1", 52);
-    testNumOfSplits("100", 1);
-  }
-
-  /**
-   * Tests the getSplitKey() method in TableInputFormatBase.java
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testGetSplitsPoint() throws IOException, InterruptedException,
-          ClassNotFoundException {
-    byte[] start1 = { 'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f' };
-    byte[] end1 = { 'a', 'a', 'a', 'f', 'f' };
-    byte[] splitPoint1 = { 'a', 'a', 'a', 'd', 'd', -78, 50, -77  };
-    testGetSplitKey(start1, end1, splitPoint1, true);
-
-    byte[] start2 = { '1', '1', '1', '0', '0', '0' };
-    byte[] end2 = { '1', '1', '2', '5', '7', '9', '0' };
-    byte[] splitPoint2 = { '1', '1', '1',  -78, -77, -76, -104 };
-    testGetSplitKey(start2, end2, splitPoint2, true);
-
-    byte[] start3 = { 'a', 'a', 'a', 'a', 'a', 'a' };
-    byte[] end3 = { 'a', 'a', 'b' };
-    byte[] splitPoint3 = { 'a', 'a', 'a', -80, -80, -80 };
-    testGetSplitKey(start3, end3, splitPoint3, true);
-
-    byte[] start4 = { 'a', 'a', 'a' };
-    byte[] end4 = { 'a', 'a', 'a', 'z' };
-    byte[] splitPoint4 = { 'a', 'a', 'a', '=' };
-    testGetSplitKey(start4, end4, splitPoint4, true);
-
-    byte[] start5 = { 'a', 'a', 'a' };
-    byte[] end5 = { 'a', 'a', 'b', 'a' };
-    byte[] splitPoint5 = { 'a', 'a', 'a', -80 };
-    testGetSplitKey(start5, end5, splitPoint5, true);
-
-    // Test Case 6: empty key and "hhhqqqwww", split point is "h"
-    byte[] start6 = {};
-    byte[] end6 = { 'h', 'h', 'h', 'q', 'q', 'q', 'w', 'w' };
-    byte[] splitPointText6 = { 'h' };
-    byte[] splitPointBinary6 = { 104 };
-    testGetSplitKey(start6, end6, splitPointText6, true);
-    testGetSplitKey(start6, end6, splitPointBinary6, false);
-
-    // Test Case 7: "ffffaaa" and empty key, split point depends on the mode we choose(text key or
-    // binary key).
-    byte[] start7 = { 'f', 'f', 'f', 'f', 'a', 'a', 'a' };
-    byte[] end7 = {};
-    byte[] splitPointText7 = { 'f', '~', '~', '~', '~', '~', '~'  };
-    byte[] splitPointBinary7 = { 'f', -1, -1, -1, -1, -1, -1  };
-    testGetSplitKey(start7, end7, splitPointText7, true);
-    testGetSplitKey(start7, end7, splitPointBinary7, false);
-
-    // Test Case 8: both start key and end key are empty. Split point depends on the mode we
-    // choose (text key or binary key).
-    byte[] start8 = {};
-    byte[] end8 = {};
-    byte[] splitPointText8 = { 'O' };
-    byte[] splitPointBinary8 = { 0 };
-    testGetSplitKey(start8, end8, splitPointText8, true);
-    testGetSplitKey(start8, end8, splitPointBinary8, false);
-
-    // Test Case 9: Binary Key example
-    byte[] start9 = { 13, -19, 126, 127 };
-    byte[] end9 = { 13, -19, 127, 0 };
-    byte[] splitPoint9 = { 13, -19, 126, -65 };
-    testGetSplitKey(start9, end9, splitPoint9, false);
-
-    // Test Case 10: Binary key split when the start key is an unsigned byte and the end byte is a
-    // signed byte
-    byte[] start10 = { 'x' };
-    byte[] end10 = { -128 };
-    byte[] splitPoint10 = { '|' };
-    testGetSplitKey(start10, end10, splitPoint10, false);
-
-    // Test Case 11: Binary key split when the start key is an signed byte and the end byte is a
-    // signed byte
-    byte[] start11 = { -100 };
-    byte[] end11 = { -90 };
-    byte[] splitPoint11 = { -95 };
-    testGetSplitKey(start11, end11, splitPoint11, false);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
deleted file mode 100644
index 02f893f..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * TestTableInputFormatScan part 2.
- * @see TestTableInputFormatScanBase
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableInputFormatScan2 extends TestTableInputFormatScanBase {
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanOBBToOPP()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("obb", "opp", "opo");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanOBBToQPP()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("obb", "qpp", "qpo");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanOPPToEmpty()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("opp", null, "zzz");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanYYXToEmpty()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("yyx", null, "zzz");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanYYYToEmpty()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("yyy", null, "zzz");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  @Test
-  public void testScanYZYToEmpty()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScan("yzy", null, "zzz");
-  }
-
-  @Test
-  public void testScanFromConfiguration()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    testScanFromConfiguration("bba", "bbd", "bbc");
-  }
-}

[21/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
new file mode 100644
index 0000000..6b5cbe2
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
@@ -0,0 +1,915 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.lang.reflect.Constructor;
+import java.security.SecureRandom;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.crypto.spec.SecretKeySpec;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.crypto.Cipher;
+import org.apache.hadoop.hbase.io.crypto.Encryption;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.security.EncryptionUtil;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.access.AccessControlClient;
+import org.apache.hadoop.hbase.security.access.Permission;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGeneratorWithACL;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A command-line utility that reads, writes, and verifies data. Unlike
+ * {@link org.apache.hadoop.hbase.PerformanceEvaluation}, this tool validates the data written,
+ * and supports simultaneously writing and reading the same set of keys.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class LoadTestTool extends AbstractHBaseTool {
+
+  private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
+  private static final String COLON = ":";
+
+  /** Table name for the test */
+  private TableName tableName;
+
+  /** Column families for the test */
+  private byte[][] families;
+
+  /** Table name to use of not overridden on the command line */
+  protected static final String DEFAULT_TABLE_NAME = "cluster_test";
+
+  /** The default data size if not specified */
+  protected static final int DEFAULT_DATA_SIZE = 64;
+
+  /** The number of reader/writer threads if not specified */
+  protected static final int DEFAULT_NUM_THREADS = 20;
+
+  /** Usage string for the load option */
+  protected static final String OPT_USAGE_LOAD =
+      "<avg_cols_per_key>:<avg_data_size>" +
+      "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
+
+  /** Usage string for the read option */
+  protected static final String OPT_USAGE_READ =
+      "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
+
+  /** Usage string for the update option */
+  protected static final String OPT_USAGE_UPDATE =
+      "<update_percent>[:<#threads=" + DEFAULT_NUM_THREADS
+      + ">][:<#whether to ignore nonce collisions=0>]";
+
+  protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
+      Arrays.toString(BloomType.values());
+
+  protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
+      "one of " + Arrays.toString(Compression.Algorithm.values());
+
+  public static final String OPT_BLOOM = "bloom";
+  public static final String OPT_COMPRESSION = "compression";
+  public static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
+  public static final String OPT_DEFERRED_LOG_FLUSH_USAGE = "Enable deferred log flush.";
+
+  public static final String OPT_INMEMORY = "in_memory";
+  public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
+      "inmemory as far as possible.  Not guaranteed that reads are always served from inmemory";
+
+  public static final String OPT_GENERATOR = "generator";
+  public static final String OPT_GENERATOR_USAGE = "The class which generates load for the tool."
+      + " Any args for this class can be passed as colon separated after class name";
+
+  public static final String OPT_WRITER = "writer";
+  public static final String OPT_WRITER_USAGE = "The class for executing the write requests";
+
+  public static final String OPT_UPDATER = "updater";
+  public static final String OPT_UPDATER_USAGE = "The class for executing the update requests";
+
+  public static final String OPT_READER = "reader";
+  public static final String OPT_READER_USAGE = "The class for executing the read requests";
+
+  protected static final String OPT_KEY_WINDOW = "key_window";
+  protected static final String OPT_WRITE = "write";
+  protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
+  public static final String OPT_MULTIPUT = "multiput";
+  public static final String OPT_MULTIGET = "multiget_batchsize";
+  protected static final String OPT_NUM_KEYS = "num_keys";
+  protected static final String OPT_READ = "read";
+  protected static final String OPT_START_KEY = "start_key";
+  public static final String OPT_TABLE_NAME = "tn";
+  public static final String OPT_COLUMN_FAMILIES = "families";
+  protected static final String OPT_ZK_QUORUM = "zk";
+  protected static final String OPT_ZK_PARENT_NODE = "zk_root";
+  protected static final String OPT_SKIP_INIT = "skip_init";
+  protected static final String OPT_INIT_ONLY = "init_only";
+  protected static final String NUM_TABLES = "num_tables";
+  protected static final String OPT_REGIONS_PER_SERVER = "regions_per_server";
+  protected static final String OPT_BATCHUPDATE = "batchupdate";
+  protected static final String OPT_UPDATE = "update";
+
+  public static final String OPT_ENCRYPTION = "encryption";
+  protected static final String OPT_ENCRYPTION_USAGE =
+    "Enables transparent encryption on the test table, one of " +
+    Arrays.toString(Encryption.getSupportedCiphers());
+
+  public static final String OPT_NUM_REGIONS_PER_SERVER = "num_regions_per_server";
+  protected static final String OPT_NUM_REGIONS_PER_SERVER_USAGE
+    = "Desired number of regions per region server. Defaults to 5.";
+  public static int DEFAULT_NUM_REGIONS_PER_SERVER = 5;
+
+  public static final String OPT_REGION_REPLICATION = "region_replication";
+  protected static final String OPT_REGION_REPLICATION_USAGE =
+      "Desired number of replicas per region";
+
+  public static final String OPT_REGION_REPLICA_ID = "region_replica_id";
+  protected static final String OPT_REGION_REPLICA_ID_USAGE =
+      "Region replica id to do the reads from";
+
+  public static final String OPT_MOB_THRESHOLD = "mob_threshold";
+  protected static final String OPT_MOB_THRESHOLD_USAGE =
+      "Desired cell size to exceed in bytes that will use the MOB write path";
+
+  protected static final long DEFAULT_START_KEY = 0;
+
+  /** This will be removed as we factor out the dependency on command line */
+  protected CommandLine cmd;
+
+  protected MultiThreadedWriter writerThreads = null;
+  protected MultiThreadedReader readerThreads = null;
+  protected MultiThreadedUpdater updaterThreads = null;
+
+  protected long startKey, endKey;
+
+  protected boolean isWrite, isRead, isUpdate;
+  protected boolean deferredLogFlush;
+
+  // Column family options
+  protected DataBlockEncoding dataBlockEncodingAlgo;
+  protected Compression.Algorithm compressAlgo;
+  protected BloomType bloomType;
+  private boolean inMemoryCF;
+
+  private User userOwner;
+  // Writer options
+  protected int numWriterThreads = DEFAULT_NUM_THREADS;
+  protected int minColsPerKey, maxColsPerKey;
+  protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
+  protected boolean isMultiPut;
+
+  // Updater options
+  protected int numUpdaterThreads = DEFAULT_NUM_THREADS;
+  protected int updatePercent;
+  protected boolean ignoreConflicts = false;
+  protected boolean isBatchUpdate;
+
+  // Reader options
+  private int numReaderThreads = DEFAULT_NUM_THREADS;
+  private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
+  private int multiGetBatchSize = MultiThreadedReader.DEFAULT_BATCH_SIZE;
+  private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
+  private int verifyPercent;
+
+  private int numTables = 1;
+
+  private String superUser;
+
+  private String userNames;
+  //This file is used to read authentication information in secure clusters.
+  private String authnFileName;
+
+  private int numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
+  private int regionReplication = -1; // not set
+  private int regionReplicaId = -1; // not set
+
+  private int mobThreshold = -1; // not set
+
+  // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
+  //       console tool itself should only be used from console.
+  protected boolean isSkipInit = false;
+  protected boolean isInitOnly = false;
+
+  protected Cipher cipher = null;
+
+  protected String[] splitColonSeparated(String option,
+      int minNumCols, int maxNumCols) {
+    String optVal = cmd.getOptionValue(option);
+    String[] cols = optVal.split(COLON);
+    if (cols.length < minNumCols || cols.length > maxNumCols) {
+      throw new IllegalArgumentException("Expected at least "
+          + minNumCols + " columns but no more than " + maxNumCols +
+          " in the colon-separated value '" + optVal + "' of the " +
+          "-" + option + " option");
+    }
+    return cols;
+  }
+
+  protected int getNumThreads(String numThreadsStr) {
+    return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
+  }
+
+  public byte[][] getColumnFamilies() {
+    return families;
+  }
+
+  /**
+   * Apply column family options such as Bloom filters, compression, and data
+   * block encoding.
+   */
+  protected void applyColumnFamilyOptions(TableName tableName,
+      byte[][] columnFamilies) throws IOException {
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+        Admin admin = conn.getAdmin()) {
+      TableDescriptor tableDesc = admin.getTableDescriptor(tableName);
+      LOG.info("Disabling table " + tableName);
+      admin.disableTable(tableName);
+      for (byte[] cf : columnFamilies) {
+        HColumnDescriptor columnDesc = (HColumnDescriptor) tableDesc.getColumnFamily(cf);
+        boolean isNewCf = columnDesc == null;
+        if (isNewCf) {
+          columnDesc = new HColumnDescriptor(cf);
+        }
+        if (bloomType != null) {
+          columnDesc.setBloomFilterType(bloomType);
+        }
+        if (compressAlgo != null) {
+          columnDesc.setCompressionType(compressAlgo);
+        }
+        if (dataBlockEncodingAlgo != null) {
+          columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
+        }
+        if (inMemoryCF) {
+          columnDesc.setInMemory(inMemoryCF);
+        }
+        if (cipher != null) {
+          byte[] keyBytes = new byte[cipher.getKeyLength()];
+          new SecureRandom().nextBytes(keyBytes);
+          columnDesc.setEncryptionType(cipher.getName());
+          columnDesc.setEncryptionKey(
+              EncryptionUtil.wrapKey(conf,
+                  User.getCurrent().getShortName(),
+                  new SecretKeySpec(keyBytes,
+                      cipher.getName())));
+        }
+        if (mobThreshold >= 0) {
+          columnDesc.setMobEnabled(true);
+          columnDesc.setMobThreshold(mobThreshold);
+        }
+
+        if (isNewCf) {
+          admin.addColumnFamily(tableName, columnDesc);
+        } else {
+          admin.modifyColumnFamily(tableName, columnDesc);
+        }
+      }
+      LOG.info("Enabling table " + tableName);
+      admin.enableTable(tableName);
+    }
+  }
+
+  @Override
+  protected void addOptions() {
+    addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
+        "without port numbers");
+    addOptWithArg(OPT_ZK_PARENT_NODE, "name of parent znode in zookeeper");
+    addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
+    addOptWithArg(OPT_COLUMN_FAMILIES, "The name of the column families to use separated by comma");
+    addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
+    addOptWithArg(OPT_READ, OPT_USAGE_READ);
+    addOptWithArg(OPT_UPDATE, OPT_USAGE_UPDATE);
+    addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
+    addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
+    addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
+    addOptWithArg(HFileTestUtil.OPT_DATA_BLOCK_ENCODING, HFileTestUtil.OPT_DATA_BLOCK_ENCODING_USAGE);
+    addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
+        "to tolerate before terminating all reader threads. The default is " +
+        MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
+    addOptWithArg(OPT_MULTIGET, "Whether to use multi-gets as opposed to " +
+        "separate gets for every column in a row");
+    addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
+        "reads and writes for concurrent write/read workload. The default " +
+        "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
+
+    addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
+        "separate puts for every column in a row");
+    addOptNoArg(OPT_BATCHUPDATE, "Whether to use batch as opposed to " +
+        "separate updates for every column in a row");
+    addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
+    addOptWithArg(OPT_GENERATOR, OPT_GENERATOR_USAGE);
+    addOptWithArg(OPT_WRITER, OPT_WRITER_USAGE);
+    addOptWithArg(OPT_UPDATER, OPT_UPDATER_USAGE);
+    addOptWithArg(OPT_READER, OPT_READER_USAGE);
+
+    addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
+    addOptWithArg(OPT_START_KEY, "The first key to read/write " +
+        "(a 0-based index). The default value is " +
+        DEFAULT_START_KEY + ".");
+    addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
+        + "already exists");
+
+    addOptWithArg(NUM_TABLES,
+      "A positive integer number. When a number n is speicfied, load test "
+          + "tool  will load n table parallely. -tn parameter value becomes "
+          + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
+
+    addOptWithArg(OPT_REGIONS_PER_SERVER,
+      "A positive integer number. When a number n is specified, load test "
+          + "tool will create the test table with n regions per server");
+
+    addOptWithArg(OPT_ENCRYPTION, OPT_ENCRYPTION_USAGE);
+    addOptNoArg(OPT_DEFERRED_LOG_FLUSH, OPT_DEFERRED_LOG_FLUSH_USAGE);
+    addOptWithArg(OPT_NUM_REGIONS_PER_SERVER, OPT_NUM_REGIONS_PER_SERVER_USAGE);
+    addOptWithArg(OPT_REGION_REPLICATION, OPT_REGION_REPLICATION_USAGE);
+    addOptWithArg(OPT_REGION_REPLICA_ID, OPT_REGION_REPLICA_ID_USAGE);
+    addOptWithArg(OPT_MOB_THRESHOLD, OPT_MOB_THRESHOLD_USAGE);
+  }
+
+  @Override
+  protected void processOptions(CommandLine cmd) {
+    this.cmd = cmd;
+
+    tableName = TableName.valueOf(cmd.getOptionValue(OPT_TABLE_NAME,
+        DEFAULT_TABLE_NAME));
+
+    if (cmd.hasOption(OPT_COLUMN_FAMILIES)) {
+      String[] list = cmd.getOptionValue(OPT_COLUMN_FAMILIES).split(",");
+      families = new byte[list.length][];
+      for (int i = 0; i < list.length; i++) {
+        families[i] = Bytes.toBytes(list[i]);
+      }
+    } else {
+      families = HFileTestUtil.DEFAULT_COLUMN_FAMILIES;
+    }
+
+    isWrite = cmd.hasOption(OPT_WRITE);
+    isRead = cmd.hasOption(OPT_READ);
+    isUpdate = cmd.hasOption(OPT_UPDATE);
+    isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
+    deferredLogFlush = cmd.hasOption(OPT_DEFERRED_LOG_FLUSH);
+
+    if (!isWrite && !isRead && !isUpdate && !isInitOnly) {
+      throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
+        "-" + OPT_UPDATE + " or -" + OPT_READ + " has to be specified");
+    }
+
+    if (isInitOnly && (isRead || isWrite || isUpdate)) {
+      throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
+          + " either -" + OPT_WRITE + " or -" + OPT_UPDATE + " or -" + OPT_READ);
+    }
+
+    if (!isInitOnly) {
+      if (!cmd.hasOption(OPT_NUM_KEYS)) {
+        throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
+            + "read or write mode");
+      }
+      startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
+          String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
+      long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
+          Long.MAX_VALUE - startKey);
+      endKey = startKey + numKeys;
+      isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
+      System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
+    }
+
+    parseColumnFamilyOptions(cmd);
+
+    if (isWrite) {
+      String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
+
+      int colIndex = 0;
+      minColsPerKey = 1;
+      maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
+      int avgColDataSize =
+          parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
+      minColDataSize = avgColDataSize / 2;
+      maxColDataSize = avgColDataSize * 3 / 2;
+
+      if (colIndex < writeOpts.length) {
+        numWriterThreads = getNumThreads(writeOpts[colIndex++]);
+      }
+
+      isMultiPut = cmd.hasOption(OPT_MULTIPUT);
+
+      mobThreshold = -1;
+      if (cmd.hasOption(OPT_MOB_THRESHOLD)) {
+        mobThreshold = Integer.parseInt(cmd.getOptionValue(OPT_MOB_THRESHOLD));
+      }
+
+      System.out.println("Multi-puts: " + isMultiPut);
+      System.out.println("Columns per key: " + minColsPerKey + ".."
+          + maxColsPerKey);
+      System.out.println("Data size per column: " + minColDataSize + ".."
+          + maxColDataSize);
+    }
+
+    if (isUpdate) {
+      String[] mutateOpts = splitColonSeparated(OPT_UPDATE, 1, 3);
+      int colIndex = 0;
+      updatePercent = parseInt(mutateOpts[colIndex++], 0, 100);
+      if (colIndex < mutateOpts.length) {
+        numUpdaterThreads = getNumThreads(mutateOpts[colIndex++]);
+      }
+      if (colIndex < mutateOpts.length) {
+        ignoreConflicts = parseInt(mutateOpts[colIndex++], 0, 1) == 1;
+      }
+
+      isBatchUpdate = cmd.hasOption(OPT_BATCHUPDATE);
+
+      System.out.println("Batch updates: " + isBatchUpdate);
+      System.out.println("Percent of keys to update: " + updatePercent);
+      System.out.println("Updater threads: " + numUpdaterThreads);
+      System.out.println("Ignore nonce conflicts: " + ignoreConflicts);
+    }
+
+    if (isRead) {
+      String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
+      int colIndex = 0;
+      verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
+      if (colIndex < readOpts.length) {
+        numReaderThreads = getNumThreads(readOpts[colIndex++]);
+      }
+
+      if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
+        maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
+            0, Integer.MAX_VALUE);
+      }
+
+      if (cmd.hasOption(OPT_KEY_WINDOW)) {
+        keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
+            0, Integer.MAX_VALUE);
+      }
+
+      if (cmd.hasOption(OPT_MULTIGET)) {
+        multiGetBatchSize = parseInt(cmd.getOptionValue(OPT_MULTIGET),
+            0, Integer.MAX_VALUE);
+      }
+
+      System.out.println("Multi-gets (value of 1 means no multigets): " + multiGetBatchSize);
+      System.out.println("Percent of keys to verify: " + verifyPercent);
+      System.out.println("Reader threads: " + numReaderThreads);
+    }
+
+    numTables = 1;
+    if (cmd.hasOption(NUM_TABLES)) {
+      numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
+    }
+
+    numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
+    if (cmd.hasOption(OPT_NUM_REGIONS_PER_SERVER)) {
+      numRegionsPerServer = Integer.parseInt(cmd.getOptionValue(OPT_NUM_REGIONS_PER_SERVER));
+    }
+
+    regionReplication = 1;
+    if (cmd.hasOption(OPT_REGION_REPLICATION)) {
+      regionReplication = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICATION));
+    }
+
+    regionReplicaId = -1;
+    if (cmd.hasOption(OPT_REGION_REPLICA_ID)) {
+      regionReplicaId = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICA_ID));
+    }
+  }
+
+  private void parseColumnFamilyOptions(CommandLine cmd) {
+    String dataBlockEncodingStr = cmd.getOptionValue(HFileTestUtil.OPT_DATA_BLOCK_ENCODING);
+    dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
+        DataBlockEncoding.valueOf(dataBlockEncodingStr);
+
+    String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
+    compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
+        Compression.Algorithm.valueOf(compressStr);
+
+    String bloomStr = cmd.getOptionValue(OPT_BLOOM);
+    bloomType = bloomStr == null ? BloomType.ROW :
+        BloomType.valueOf(bloomStr);
+
+    inMemoryCF = cmd.hasOption(OPT_INMEMORY);
+    if (cmd.hasOption(OPT_ENCRYPTION)) {
+      cipher = Encryption.getCipher(conf, cmd.getOptionValue(OPT_ENCRYPTION));
+    }
+
+  }
+
+  public void initTestTable() throws IOException {
+    Durability durability = Durability.USE_DEFAULT;
+    if (deferredLogFlush) {
+      durability = Durability.ASYNC_WAL;
+    }
+
+    HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
+      getColumnFamilies(), compressAlgo, dataBlockEncodingAlgo, numRegionsPerServer,
+        regionReplication, durability);
+    applyColumnFamilyOptions(tableName, getColumnFamilies());
+  }
+
+  @Override
+  protected int doWork() throws IOException {
+    if (numTables > 1) {
+      return parallelLoadTables();
+    } else {
+      return loadTable();
+    }
+  }
+
+  protected int loadTable() throws IOException {
+    if (cmd.hasOption(OPT_ZK_QUORUM)) {
+      conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
+    }
+    if (cmd.hasOption(OPT_ZK_PARENT_NODE)) {
+      conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, cmd.getOptionValue(OPT_ZK_PARENT_NODE));
+    }
+
+    if (isInitOnly) {
+      LOG.info("Initializing only; no reads or writes");
+      initTestTable();
+      return 0;
+    }
+
+    if (!isSkipInit) {
+      initTestTable();
+    }
+    LoadTestDataGenerator dataGen = null;
+    if (cmd.hasOption(OPT_GENERATOR)) {
+      String[] clazzAndArgs = cmd.getOptionValue(OPT_GENERATOR).split(COLON);
+      dataGen = getLoadGeneratorInstance(clazzAndArgs[0]);
+      String[] args;
+      if (dataGen instanceof LoadTestDataGeneratorWithACL) {
+        LOG.info("Using LoadTestDataGeneratorWithACL");
+        if (User.isHBaseSecurityEnabled(conf)) {
+          LOG.info("Security is enabled");
+          authnFileName = clazzAndArgs[1];
+          superUser = clazzAndArgs[2];
+          userNames = clazzAndArgs[3];
+          args = Arrays.copyOfRange(clazzAndArgs, 2, clazzAndArgs.length);
+          Properties authConfig = new Properties();
+          authConfig.load(this.getClass().getClassLoader().getResourceAsStream(authnFileName));
+          try {
+            addAuthInfoToConf(authConfig, conf, superUser, userNames);
+          } catch (IOException exp) {
+            LOG.error(exp);
+            return EXIT_FAILURE;
+          }
+          userOwner = User.create(HBaseKerberosUtils.loginAndReturnUGI(conf, superUser));
+        } else {
+          superUser = clazzAndArgs[1];
+          userNames = clazzAndArgs[2];
+          args = Arrays.copyOfRange(clazzAndArgs, 1, clazzAndArgs.length);
+          userOwner = User.createUserForTesting(conf, superUser, new String[0]);
+        }
+      } else {
+        args = clazzAndArgs.length == 1 ? new String[0] : Arrays.copyOfRange(clazzAndArgs, 1,
+            clazzAndArgs.length);
+      }
+      dataGen.initialize(args);
+    } else {
+      // Default DataGenerator is MultiThreadedAction.DefaultDataGenerator
+      dataGen = new MultiThreadedAction.DefaultDataGenerator(minColDataSize, maxColDataSize,
+          minColsPerKey, maxColsPerKey, families);
+    }
+
+    if (userOwner != null) {
+      LOG.info("Granting permissions for user " + userOwner.getShortName());
+      Permission.Action[] actions = {
+        Permission.Action.ADMIN, Permission.Action.CREATE,
+        Permission.Action.READ, Permission.Action.WRITE };
+      try {
+        AccessControlClient.grant(ConnectionFactory.createConnection(conf),
+            tableName, userOwner.getShortName(), null, null, actions);
+      } catch (Throwable e) {
+        LOG.fatal("Error in granting permission for the user " + userOwner.getShortName(), e);
+        return EXIT_FAILURE;
+      }
+    }
+
+    if (userNames != null) {
+      // This will be comma separated list of expressions.
+      String users[] = userNames.split(",");
+      User user = null;
+      for (String userStr : users) {
+        if (User.isHBaseSecurityEnabled(conf)) {
+          user = User.create(HBaseKerberosUtils.loginAndReturnUGI(conf, userStr));
+        } else {
+          user = User.createUserForTesting(conf, userStr, new String[0]);
+        }
+      }
+    }
+
+    if (isWrite) {
+      if (userOwner != null) {
+        writerThreads = new MultiThreadedWriterWithACL(dataGen, conf, tableName, userOwner);
+      } else {
+        String writerClass = null;
+        if (cmd.hasOption(OPT_WRITER)) {
+          writerClass = cmd.getOptionValue(OPT_WRITER);
+        } else {
+          writerClass = MultiThreadedWriter.class.getCanonicalName();
+        }
+
+        writerThreads = getMultiThreadedWriterInstance(writerClass, dataGen);
+      }
+      writerThreads.setMultiPut(isMultiPut);
+    }
+
+    if (isUpdate) {
+      if (userOwner != null) {
+        updaterThreads = new MultiThreadedUpdaterWithACL(dataGen, conf, tableName, updatePercent,
+            userOwner, userNames);
+      } else {
+        String updaterClass = null;
+        if (cmd.hasOption(OPT_UPDATER)) {
+          updaterClass = cmd.getOptionValue(OPT_UPDATER);
+        } else {
+          updaterClass = MultiThreadedUpdater.class.getCanonicalName();
+        }
+        updaterThreads = getMultiThreadedUpdaterInstance(updaterClass, dataGen);
+      }
+      updaterThreads.setBatchUpdate(isBatchUpdate);
+      updaterThreads.setIgnoreNonceConflicts(ignoreConflicts);
+    }
+
+    if (isRead) {
+      if (userOwner != null) {
+        readerThreads = new MultiThreadedReaderWithACL(dataGen, conf, tableName, verifyPercent,
+            userNames);
+      } else {
+        String readerClass = null;
+        if (cmd.hasOption(OPT_READER)) {
+          readerClass = cmd.getOptionValue(OPT_READER);
+        } else {
+          readerClass = MultiThreadedReader.class.getCanonicalName();
+        }
+        readerThreads = getMultiThreadedReaderInstance(readerClass, dataGen);
+      }
+      readerThreads.setMaxErrors(maxReadErrors);
+      readerThreads.setKeyWindow(keyWindow);
+      readerThreads.setMultiGetBatchSize(multiGetBatchSize);
+      readerThreads.setRegionReplicaId(regionReplicaId);
+    }
+
+    if (isUpdate && isWrite) {
+      LOG.info("Concurrent write/update workload: making updaters aware of the " +
+        "write point");
+      updaterThreads.linkToWriter(writerThreads);
+    }
+
+    if (isRead && (isUpdate || isWrite)) {
+      LOG.info("Concurrent write/read workload: making readers aware of the " +
+        "write point");
+      readerThreads.linkToWriter(isUpdate ? updaterThreads : writerThreads);
+    }
+
+    if (isWrite) {
+      System.out.println("Starting to write data...");
+      writerThreads.start(startKey, endKey, numWriterThreads);
+    }
+
+    if (isUpdate) {
+      LOG.info("Starting to mutate data...");
+      System.out.println("Starting to mutate data...");
+      // TODO : currently append and increment operations not tested with tags
+      // Will update this aftet it is done
+      updaterThreads.start(startKey, endKey, numUpdaterThreads);
+    }
+
+    if (isRead) {
+      System.out.println("Starting to read data...");
+      readerThreads.start(startKey, endKey, numReaderThreads);
+    }
+
+    if (isWrite) {
+      writerThreads.waitForFinish();
+    }
+
+    if (isUpdate) {
+      updaterThreads.waitForFinish();
+    }
+
+    if (isRead) {
+      readerThreads.waitForFinish();
+    }
+
+    boolean success = true;
+    if (isWrite) {
+      success = success && writerThreads.getNumWriteFailures() == 0;
+    }
+    if (isUpdate) {
+      success = success && updaterThreads.getNumWriteFailures() == 0;
+    }
+    if (isRead) {
+      success = success && readerThreads.getNumReadErrors() == 0
+          && readerThreads.getNumReadFailures() == 0;
+    }
+    return success ? EXIT_SUCCESS : EXIT_FAILURE;
+  }
+
+  private LoadTestDataGenerator getLoadGeneratorInstance(String clazzName) throws IOException {
+    try {
+      Class<?> clazz = Class.forName(clazzName);
+      Constructor<?> constructor = clazz.getConstructor(int.class, int.class, int.class, int.class,
+          byte[][].class);
+      return (LoadTestDataGenerator) constructor.newInstance(minColDataSize, maxColDataSize,
+          minColsPerKey, maxColsPerKey, families);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+  }
+
+  private MultiThreadedWriter getMultiThreadedWriterInstance(String clazzName
+      , LoadTestDataGenerator dataGen) throws IOException {
+    try {
+      Class<?> clazz = Class.forName(clazzName);
+      Constructor<?> constructor = clazz.getConstructor(
+        LoadTestDataGenerator.class, Configuration.class, TableName.class);
+      return (MultiThreadedWriter) constructor.newInstance(dataGen, conf, tableName);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+  }
+
+  private MultiThreadedUpdater getMultiThreadedUpdaterInstance(String clazzName
+      , LoadTestDataGenerator dataGen) throws IOException {
+    try {
+      Class<?> clazz = Class.forName(clazzName);
+      Constructor<?> constructor = clazz.getConstructor(
+        LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
+      return (MultiThreadedUpdater) constructor.newInstance(
+        dataGen, conf, tableName, updatePercent);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+  }
+
+  private MultiThreadedReader getMultiThreadedReaderInstance(String clazzName
+      , LoadTestDataGenerator dataGen) throws IOException {
+    try {
+      Class<?> clazz = Class.forName(clazzName);
+      Constructor<?> constructor = clazz.getConstructor(
+        LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
+      return (MultiThreadedReader) constructor.newInstance(dataGen, conf, tableName, verifyPercent);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+  }
+
+  public static void main(String[] args) {
+    new LoadTestTool().doStaticMain(args);
+  }
+
+  /**
+   * When NUM_TABLES is specified, the function starts multiple worker threads
+   * which individually start a LoadTestTool instance to load a table. Each
+   * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
+   * , table names will be "test_1", "test_2"
+   *
+   * @throws IOException
+   */
+  private int parallelLoadTables()
+      throws IOException {
+    // create new command args
+    String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
+    String[] newArgs = null;
+    if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
+      newArgs = new String[cmdLineArgs.length + 2];
+      newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
+      newArgs[1] = LoadTestTool.DEFAULT_TABLE_NAME;
+      System.arraycopy(cmdLineArgs, 0, newArgs, 2, cmdLineArgs.length);
+    } else {
+      newArgs = cmdLineArgs;
+    }
+
+    int tableNameValueIndex = -1;
+    for (int j = 0; j < newArgs.length; j++) {
+      if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
+        tableNameValueIndex = j + 1;
+      } else if (newArgs[j].endsWith(NUM_TABLES)) {
+        // change NUM_TABLES to 1 so that each worker loads one table
+        newArgs[j + 1] = "1";
+      }
+    }
+
+    // starting to load multiple tables
+    List<WorkerThread> workers = new ArrayList<>();
+    for (int i = 0; i < numTables; i++) {
+      String[] workerArgs = newArgs.clone();
+      workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
+      WorkerThread worker = new WorkerThread(i, workerArgs);
+      workers.add(worker);
+      LOG.info(worker + " starting");
+      worker.start();
+    }
+
+    // wait for all workers finish
+    LOG.info("Waiting for worker threads to finish");
+    for (WorkerThread t : workers) {
+      try {
+        t.join();
+      } catch (InterruptedException ie) {
+        IOException iie = new InterruptedIOException();
+        iie.initCause(ie);
+        throw iie;
+      }
+      checkForErrors();
+    }
+
+    return EXIT_SUCCESS;
+  }
+
+  // If an exception is thrown by one of worker threads, it will be
+  // stored here.
+  protected AtomicReference<Throwable> thrown = new AtomicReference<>();
+
+  private void workerThreadError(Throwable t) {
+    thrown.compareAndSet(null, t);
+  }
+
+  /**
+   * Check for errors in the writer threads. If any is found, rethrow it.
+   */
+  private void checkForErrors() throws IOException {
+    Throwable thrown = this.thrown.get();
+    if (thrown == null) return;
+    if (thrown instanceof IOException) {
+      throw (IOException) thrown;
+    } else {
+      throw new RuntimeException(thrown);
+    }
+  }
+
+  class WorkerThread extends Thread {
+    private String[] workerArgs;
+
+    WorkerThread(int i, String[] args) {
+      super("WorkerThread-" + i);
+      workerArgs = args;
+    }
+
+    @Override
+    public void run() {
+      try {
+        int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
+        if (ret != 0) {
+          throw new RuntimeException("LoadTestTool exit with non-zero return code.");
+        }
+      } catch (Exception ex) {
+        LOG.error("Error in worker thread", ex);
+        workerThreadError(ex);
+      }
+    }
+  }
+
+  private void addAuthInfoToConf(Properties authConfig, Configuration conf, String owner,
+      String userList) throws IOException {
+    List<String> users = new ArrayList(Arrays.asList(userList.split(",")));
+    users.add(owner);
+    for (String user : users) {
+      String keyTabFileConfKey = "hbase." + user + ".keytab.file";
+      String principalConfKey = "hbase." + user + ".kerberos.principal";
+      if (!authConfig.containsKey(keyTabFileConfKey) || !authConfig.containsKey(principalConfKey)) {
+        throw new IOException("Authentication configs missing for user : " + user);
+      }
+    }
+    for (String key : authConfig.stringPropertyNames()) {
+      conf.set(key, authConfig.getProperty(key));
+    }
+    LOG.debug("Added authentication properties to config successfully.");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hbase-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hbase-site.xml b/hbase-mapreduce/src/test/resources/hbase-site.xml
new file mode 100644
index 0000000..64a1964
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hbase-site.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+  <property>
+    <name>hbase.regionserver.msginterval</name>
+    <value>1000</value>
+    <description>Interval between messages from the RegionServer to HMaster
+    in milliseconds.  Default is 15. Set this value low if you want unit
+    tests to be responsive.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>1000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as hbase:meta scanner and log roller.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.event.waiting.time</name>
+    <value>50</value>
+    <description>Time to sleep between checks to see if a table event took place.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.count</name>
+    <value>5</value>
+  </property>
+  <property>
+    <name>hbase.regionserver.metahandler.count</name>
+    <value>6</value>
+  </property>
+  <property>
+      <name>hbase.ipc.server.read.threadpool.size</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>hbase.master.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase master web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hbase.regionserver.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase regionserver web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.port.auto</name>
+    <value>true</value>
+    <description>Info server auto port bind. Enables automatic port
+    search if hbase.regionserver.info.port is already in use.
+    Enabled for testing to run multiple tests on one machine.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.safemode</name>
+    <value>false</value>
+    <description>
+    Turn on/off safe mode in region server. Always on for production, always off
+    for tests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.max.filesize</name>
+    <value>67108864</value>
+    <description>
+    Maximum desired file size for an HRegion.  If filesize exceeds
+    value + (value / 2), the HRegion is split in two.  Default: 256M.
+
+    Keep the maximum filesize small so we split more often in tests.
+    </description>
+  </property>
+  <property>
+    <name>hadoop.log.dir</name>
+    <value>${user.dir}/../logs</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.clientPort</name>
+    <value>21818</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+    The port at which the clients will connect.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+    <description>
+    Set to true to skip the 'hbase.defaults.for.version'.
+    Setting this to true can be useful in contexts other than
+    the other side of a maven generation; i.e. running in an
+    ide.  You'll want to set this boolean to true to avoid
+    seeing the RuntimeException complaint: "hbase-default.xml file
+    seems to be for and old version of HBase (@@@VERSION@@@), this
+    version is X.X.X-SNAPSHOT"
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.sanity.checks</name>
+    <value>false</value>
+    <description>Skip sanity checks in tests
+    </description>
+  </property>
+  <property>
+    <name>hbase.procedure.fail.on.corruption</name>
+    <value>true</value>
+    <description>
+      Enable replay sanity checks on procedure tests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hconnection.threads.keepalivetime</name>
+    <value>3</value>
+  </property>
+</configuration>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hbase-site2.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hbase-site2.xml b/hbase-mapreduce/src/test/resources/hbase-site2.xml
new file mode 100644
index 0000000..8bef31a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hbase-site2.xml
@@ -0,0 +1,146 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+  <property>
+    <name>hbase.custom.config</name>
+    <value>1000</value>
+  </property>
+  <property>
+    <name>hbase.regionserver.msginterval</name>
+    <value>1000</value>
+    <description>Interval between messages from the RegionServer to HMaster
+    in milliseconds.  Default is 15. Set this value low if you want unit
+    tests to be responsive.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>1000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as hbase:meta scanner and log roller.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.event.waiting.time</name>
+    <value>50</value>
+    <description>Time to sleep between checks to see if a table event took place.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.count</name>
+    <value>5</value>
+  </property>
+  <property>
+    <name>hbase.master.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase master web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.master.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.port</name>
+    <value>0</value>
+    <description>Always have masters and regionservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
+    <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hbase.regionserver.info.port</name>
+    <value>-1</value>
+    <description>The port for the hbase regionserver web UI
+    Set to -1 if you do not want the info server to run.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.info.port.auto</name>
+    <value>true</value>
+    <description>Info server auto port bind. Enables automatic port
+    search if hbase.regionserver.info.port is already in use.
+    Enabled for testing to run multiple tests on one machine.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.safemode</name>
+    <value>false</value>
+    <description>
+    Turn on/off safe mode in region server. Always on for production, always off
+    for tests.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.max.filesize</name>
+    <value>67108864</value>
+    <description>
+    Maximum desired file size for an HRegion.  If filesize exceeds
+    value + (value / 2), the HRegion is split in two.  Default: 256M.
+
+    Keep the maximum filesize small so we split more often in tests.
+    </description>
+  </property>
+  <property>
+    <name>hadoop.log.dir</name>
+    <value>${user.dir}/../logs</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.clientPort</name>
+    <value>21818</value>
+    <description>Property from ZooKeeper's config zoo.cfg.
+    The port at which the clients will connect.
+    </description>
+  </property>
+  <property>
+    <name>hbase.defaults.for.version.skip</name>
+    <value>true</value>
+    <description>
+    Set to true to skip the 'hbase.defaults.for.version'.
+    Setting this to true can be useful in contexts other than
+    the other side of a maven generation; i.e. running in an
+    ide.  You'll want to set this boolean to true to avoid
+    seeing the RuntimeException complaint: "hbase-default.xml file
+    seems to be for and old version of HBase (@@@VERSION@@@), this
+    version is X.X.X-SNAPSHOT"
+    </description>
+  </property>
+  <property>
+    <name>hbase.table.sanity.checks</name>
+    <value>false</value>
+    <description>Skip sanity checks in tests
+    </description>
+  </property>
+</configuration>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hdfs-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hdfs-site.xml b/hbase-mapreduce/src/test/resources/hdfs-site.xml
new file mode 100644
index 0000000..03be0c7
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hdfs-site.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+
+  <!-- hadoop-2.0.5+'s HDFS-4305 by default enforces a min blocks size
+       of 1024*1024.  Many unit tests that use the hlog use smaller
+       blocks.  Setting this config to 0 to have tests pass -->
+  <property>
+    <name>dfs.namenode.fs-limits.min-block-size</name>
+    <value>0</value>
+  </property>
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/log4j.properties b/hbase-mapreduce/src/test/resources/log4j.properties
new file mode 100644
index 0000000..c322699
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/log4j.properties
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hbase.root.logger=INFO,console
+hbase.log.dir=.
+hbase.log.file=hbase.log
+
+# Define the root logger to the system property "hbase.root.logger".
+log4j.rootLogger=${hbase.root.logger}
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+# Debugging Pattern format
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.zookeeper=ERROR
+log4j.logger.org.apache.hadoop.hbase=DEBUG
+
+#These settings are workarounds against spurious logs from the minicluster.
+#See HBASE-4709
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
+log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
+# Enable this to get detailed connection error/retry logging.
+# log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/mapred-queues.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/mapred-queues.xml b/hbase-mapreduce/src/test/resources/mapred-queues.xml
new file mode 100644
index 0000000..43f3e2a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/mapred-queues.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<!-- This is the template for queue configuration. The format supports nesting of
+     queues within queues - a feature called hierarchical queues. All queues are
+     defined within the 'queues' tag which is the top level element for this
+     XML document.
+     The 'aclsEnabled' attribute should be set to true, if ACLs should be checked
+     on queue operations such as submitting jobs, killing jobs etc. -->
+<queues aclsEnabled="false">
+
+  <!-- Configuration for a queue is specified by defining a 'queue' element. -->
+  <queue>
+
+    <!-- Name of a queue. Queue name cannot contain a ':'  -->
+    <name>default</name>
+
+    <!-- properties for a queue, typically used by schedulers,
+    can be defined here -->
+    <properties>
+    </properties>
+
+	<!-- State of the queue. If running, the queue will accept new jobs.
+         If stopped, the queue will not accept new jobs. -->
+    <state>running</state>
+
+    <!-- Specifies the ACLs to check for submitting jobs to this queue.
+         If set to '*', it allows all users to submit jobs to the queue.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2 -->
+    <acl-submit-job>*</acl-submit-job>
+
+    <!-- Specifies the ACLs to check for modifying jobs in this queue.
+         Modifications include killing jobs, tasks of jobs or changing
+         priorities.
+         If set to '*', it allows all users to submit jobs to the queue.
+         For specifying a list of users and groups the format to use is
+         user1,user2 group1,group2 -->
+    <acl-administer-jobs>*</acl-administer-jobs>
+  </queue>
+
+  <!-- Here is a sample of a hierarchical queue configuration
+       where q2 is a child of q1. In this example, q2 is a leaf level
+       queue as it has no queues configured within it. Currently, ACLs
+       and state are only supported for the leaf level queues.
+       Note also the usage of properties for the queue q2.
+  <queue>
+    <name>q1</name>
+    <queue>
+      <name>q2</name>
+      <properties>
+        <property key="capacity" value="20"/>
+        <property key="user-limit" value="30"/>
+      </properties>
+    </queue>
+  </queue>
+ -->
+</queues>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/mapred-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/mapred-site.xml b/hbase-mapreduce/src/test/resources/mapred-site.xml
new file mode 100644
index 0000000..787ffb7
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/mapred-site.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+<property>
+  <name>mapred.map.child.java.opts</name>
+  <value>-Djava.awt.headless=true</value>
+</property>
+
+<property>
+  <name>mapred.reduce.child.java.opts</name>
+  <value>-Djava.awt.headless=true</value>
+</property>
+</configuration>
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
new file mode 100644
index 0000000..6fca96a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
@@ -0,0 +1,28 @@
+# ResourceBundle properties file for Map-Reduce counters
+
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements.  See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership.  The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+CounterGroupName=              HBase Performance Evaluation
+ELAPSED_TIME.name=             Elapsed time in milliseconds
+ROWS.name=									  Row count
+# ResourceBundle properties file for Map-Reduce counters
+
+CounterGroupName=              HBase Performance Evaluation
+ELAPSED_TIME.name=             Elapsed time in milliseconds
+ROWS.name=									  Row count

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
new file mode 100755
index 0000000..762ddd7
Binary files /dev/null and b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format differ

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-rest/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-rest/pom.xml b/hbase-rest/pom.xml
index 3af9829..639c0c2 100644
--- a/hbase-rest/pom.xml
+++ b/hbase-rest/pom.xml
@@ -212,6 +212,16 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-hadoop-compat</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java b/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
index 3559ee0..6ed170e 100644
--- a/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
+++ b/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
@@ -220,8 +220,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
   /**
    *  This class works as the InputSplit of Performance Evaluation
-   *  MapReduce InputFormat, and the Record Value of RecordReader. 
-   *  Each map task will only read one record from a PeInputSplit, 
+   *  MapReduce InputFormat, and the Record Value of RecordReader.
+   *  Each map task will only read one record from a PeInputSplit,
    *  the record value is the PeInputSplit itself.
    */
   public static class PeInputSplit extends InputSplit implements Writable {
@@ -950,7 +950,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
   static abstract class TableTest extends Test {
     protected Table table;
-    
+
     public TableTest(Configuration conf, TestOptions options, Status status) {
       super(conf, options, status);
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
index bcd433c..d520113 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
@@ -43,7 +43,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
  * <p>
  * This also allows one to run the scan from an
  * online or offline hbase cluster. The snapshot files can be exported by using the
- * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool,
+ * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool,
  * to a pure-hdfs cluster, and this scanner can be used to
  * run the scan directly over the snapshot files. The snapshot should not be deleted while there
  * are open scanners reading from snapshot files.
@@ -60,7 +60,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
  * snapshot files, the job has to be run as the HBase user or the user must have group or other
  * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
  * snapshot/data files will completely circumvent the access control enforced by HBase.
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
  */
 @InterfaceAudience.Public
 public class TableSnapshotScanner extends AbstractClientScanner {

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
deleted file mode 100644
index 618c14a..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.util.ProgramDriver;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Driver for hbase mapreduce jobs. Select which to run by passing name of job
- * to this main.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-@InterfaceStability.Stable
-public class Driver {
-
-  private static ProgramDriver pgd = new ProgramDriver();
-
-  @VisibleForTesting
-  static void setProgramDriver(ProgramDriver pgd0) {
-    pgd = pgd0;
-  }
-
-  /**
-   * @param args
-   * @throws Throwable
-   */
-  public static void main(String[] args) throws Throwable {
-    pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase table");
-    ProgramDriver.class.getMethod("driver", new Class[] { String[].class })
-        .invoke(pgd, new Object[] { args });
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
deleted file mode 100644
index a534224..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-
-/**
- * Extract grouping columns from input record
- */
-@InterfaceAudience.Public
-public class GroupingTableMap
-extends MapReduceBase
-implements TableMap<ImmutableBytesWritable,Result> {
-
-  /**
-   * JobConf parameter to specify the columns used to produce the key passed to
-   * collect from the map phase
-   */
-  public static final String GROUP_COLUMNS =
-    "hbase.mapred.groupingtablemap.columns";
-
-  protected byte [][] columns;
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up the
-   * JobConf.
-   *
-   * @param table table to be processed
-   * @param columns space separated list of columns to fetch
-   * @param groupColumns space separated list of columns used to form the key
-   * used in collect
-   * @param mapper map class
-   * @param job job configuration object
-   */
-  @SuppressWarnings("unchecked")
-  public static void initJob(String table, String columns, String groupColumns,
-    Class<? extends TableMap> mapper, JobConf job) {
-
-    TableMapReduceUtil.initTableMapJob(table, columns, mapper,
-        ImmutableBytesWritable.class, Result.class, job);
-    job.set(GROUP_COLUMNS, groupColumns);
-  }
-
-  @Override
-  public void configure(JobConf job) {
-    super.configure(job);
-    String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
-    columns = new byte[cols.length][];
-    for(int i = 0; i < cols.length; i++) {
-      columns[i] = Bytes.toBytes(cols[i]);
-    }
-  }
-
-  /**
-   * Extract the grouping columns from value to construct a new key.
-   *
-   * Pass the new key and value to reduce.
-   * If any of the grouping columns are not found in the value, the record is skipped.
-   * @param key
-   * @param value
-   * @param output
-   * @param reporter
-   * @throws IOException
-   */
-  public void map(ImmutableBytesWritable key, Result value,
-      OutputCollector<ImmutableBytesWritable,Result> output,
-      Reporter reporter) throws IOException {
-
-    byte[][] keyVals = extractKeyValues(value);
-    if(keyVals != null) {
-      ImmutableBytesWritable tKey = createGroupKey(keyVals);
-      output.collect(tKey, value);
-    }
-  }
-
-  /**
-   * Extract columns values from the current record. This method returns
-   * null if any of the columns are not found.
-   *
-   * Override this method if you want to deal with nulls differently.
-   *
-   * @param r
-   * @return array of byte values
-   */
-  protected byte[][] extractKeyValues(Result r) {
-    byte[][] keyVals = null;
-    ArrayList<byte[]> foundList = new ArrayList<>();
-    int numCols = columns.length;
-    if (numCols > 0) {
-      for (Cell value: r.listCells()) {
-        byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
-            CellUtil.cloneQualifier(value));
-        for (int i = 0; i < numCols; i++) {
-          if (Bytes.equals(column, columns[i])) {
-            foundList.add(CellUtil.cloneValue(value));
-            break;
-          }
-        }
-      }
-      if(foundList.size() == numCols) {
-        keyVals = foundList.toArray(new byte[numCols][]);
-      }
-    }
-    return keyVals;
-  }
-
-  /**
-   * Create a key by concatenating multiple column values.
-   * Override this function in order to produce different types of keys.
-   *
-   * @param vals
-   * @return key generated by concatenating multiple column values
-   */
-  protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
-    if(vals == null) {
-      return null;
-    }
-    StringBuilder sb =  new StringBuilder();
-    for(int i = 0; i < vals.length; i++) {
-      if(i > 0) {
-        sb.append(" ");
-      }
-      sb.append(Bytes.toString(vals[i]));
-    }
-    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
deleted file mode 100644
index 0011a60..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Partitioner;
-
-
-/**
- * This is used to partition the output keys into groups of keys.
- * Keys are grouped according to the regions that currently exist
- * so that each reducer fills a single region so load is distributed.
- *
- * @param <K2>
- * @param <V2>
- */
-@InterfaceAudience.Public
-public class HRegionPartitioner<K2,V2>
-implements Partitioner<ImmutableBytesWritable, V2> {
-  private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
-  // Connection and locator are not cleaned up; they just die when partitioner is done.
-  private Connection connection;
-  private RegionLocator locator;
-  private byte[][] startKeys;
-
-  public void configure(JobConf job) {
-    try {
-      this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
-      TableName tableName = TableName.valueOf(job.get(TableOutputFormat.OUTPUT_TABLE));
-      this.locator = this.connection.getRegionLocator(tableName);
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-
-    try {
-      this.startKeys = this.locator.getStartKeys();
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-  }
-
-  public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
-    byte[] region = null;
-    // Only one region return 0
-    if (this.startKeys.length == 1){
-      return 0;
-    }
-    try {
-      // Not sure if this is cached after a split so we could have problems
-      // here if a region splits while mapping
-      region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-    for (int i = 0; i < this.startKeys.length; i++){
-      if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
-        if (i >= numPartitions-1){
-          // cover if we have less reduces then regions.
-          return (Integer.toString(i).hashCode()
-              & Integer.MAX_VALUE) % numPartitions;
-        }
-        return i;
-      }
-    }
-    // if above fails to find start key that match we need to return something
-    return 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
deleted file mode 100644
index dfacff9..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Pass the given key and record as-is to reduce
- */
-@InterfaceAudience.Public
-public class IdentityTableMap
-extends MapReduceBase
-implements TableMap<ImmutableBytesWritable, Result> {
-
-  /** constructor */
-  public IdentityTableMap() {
-    super();
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table table name
-   * @param columns columns to scan
-   * @param mapper mapper class
-   * @param job job configuration
-   */
-  @SuppressWarnings("unchecked")
-  public static void initJob(String table, String columns,
-    Class<? extends TableMap> mapper, JobConf job) {
-    TableMapReduceUtil.initTableMapJob(table, columns, mapper,
-      ImmutableBytesWritable.class,
-      Result.class, job);
-  }
-
-  /**
-   * Pass the key, value to reduce
-   * @param key
-   * @param value
-   * @param output
-   * @param reporter
-   * @throws IOException
-   */
-  public void map(ImmutableBytesWritable key, Result value,
-      OutputCollector<ImmutableBytesWritable,Result> output,
-      Reporter reporter) throws IOException {
-
-    // convert
-    output.collect(key, value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
deleted file mode 100644
index 9c2e604..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Write to table each key, record pair
- */
-@InterfaceAudience.Public
-public class IdentityTableReduce
-extends MapReduceBase
-implements TableReduce<ImmutableBytesWritable, Put> {
-  @SuppressWarnings("unused")
-  private static final Log LOG =
-    LogFactory.getLog(IdentityTableReduce.class.getName());
-
-  /**
-   * No aggregation, output pairs of (key, record)
-   * @param key
-   * @param values
-   * @param output
-   * @param reporter
-   * @throws IOException
-   */
-  public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
-      OutputCollector<ImmutableBytesWritable, Put> output,
-      Reporter reporter)
-      throws IOException {
-
-    while(values.hasNext()) {
-      output.collect(key, values.next());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
deleted file mode 100644
index 3e121fe..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * MultiTableSnapshotInputFormat generalizes {@link org.apache.hadoop.hbase.mapred
- * .TableSnapshotInputFormat}
- * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
- * configured for each.
- * Internally, the input format delegates to {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * and thus has the same performance advantages; see {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat} for
- * more details.
- * Usage is similar to TableSnapshotInputFormat, with the following exception:
- * initMultiTableSnapshotMapperJob takes in a map
- * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
- * scan will be applied;
- * the overall dataset for the job is defined by the concatenation of the regions and tables
- * included in each snapshot/scan
- * pair.
- * {@link org.apache.hadoop.hbase.mapred.TableMapReduceUtil#initMultiTableSnapshotMapperJob(Map,
- * Class, Class, Class, JobConf, boolean, Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
- *    "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
- *    "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
- * );
- * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
- * TableMapReduceUtil.initTableSnapshotMapperJob(
- *     snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
- *      MyMapOutputValueWritable.class, job, true, restoreDir);
- * }
- * </pre>
- * Internally, this input format restores each snapshot into a subdirectory of the given tmp
- * directory. Input splits and
- * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * (one per region).
- * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
- * permissioning; the
- * same caveats apply here.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat
-    implements InputFormat<ImmutableBytesWritable, Result> {
-
-  private final MultiTableSnapshotInputFormatImpl delegate;
-
-  public MultiTableSnapshotInputFormat() {
-    this.delegate = new MultiTableSnapshotInputFormatImpl();
-  }
-
-  @Override
-  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
-    List<TableSnapshotInputFormatImpl.InputSplit> splits = delegate.getSplits(job);
-    InputSplit[] results = new InputSplit[splits.size()];
-    for (int i = 0; i < splits.size(); i++) {
-      results[i] = new TableSnapshotRegionSplit(splits.get(i));
-    }
-    return results;
-  }
-
-  @Override
-  public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
-      Reporter reporter) throws IOException {
-    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
-  }
-
-  /**
-   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
-   * restoreDir.
-   * Sets: {@link org.apache.hadoop.hbase.mapreduce
-   * .MultiTableSnapshotInputFormatImpl#RESTORE_DIRS_KEY},
-   * {@link org.apache.hadoop.hbase.mapreduce
-   * .MultiTableSnapshotInputFormatImpl#SNAPSHOT_TO_SCANS_KEY}
-   *
-   * @param conf
-   * @param snapshotScans
-   * @param restoreDir
-   * @throws IOException
-   */
-  public static void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
-      Path restoreDir) throws IOException {
-    new MultiTableSnapshotInputFormatImpl().setInput(conf, snapshotScans, restoreDir);
-  }
-
-}

[30/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
new file mode 100644
index 0000000..23a70a9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
@@ -0,0 +1,2627 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.lang.reflect.Constructor;
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.text.DecimalFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Random;
+import java.util.TreeMap;
+import java.util.NoSuchElementException;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Append;
+import org.apache.hadoop.hbase.client.AsyncConnection;
+import org.apache.hadoop.hbase.client.AsyncTable;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Consistency;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RawAsyncTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.RowMutations;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.BinaryComparator;
+import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterAllFilter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.PageFilter;
+import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
+import org.apache.hadoop.hbase.filter.WhileMatchFilter;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.RandomDistribution;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
+import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem;
+import org.apache.hadoop.hbase.trace.HBaseHTraceConfiguration;
+import org.apache.hadoop.hbase.trace.SpanReceiverHost;
+import org.apache.hadoop.hbase.util.*;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.apache.htrace.Sampler;
+import org.apache.htrace.Trace;
+import org.apache.htrace.TraceScope;
+import org.apache.htrace.impl.ProbabilitySampler;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects;
+import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.UniformReservoir;
+
+/**
+ * Script used evaluating HBase performance and scalability.  Runs a HBase
+ * client that steps through one of a set of hardcoded tests or 'experiments'
+ * (e.g. a random reads test, a random writes test, etc.). Pass on the
+ * command-line which test to run and how many clients are participating in
+ * this experiment. Run {@code PerformanceEvaluation --help} to obtain usage.
+ *
+ * <p>This class sets up and runs the evaluation programs described in
+ * Section 7, <i>Performance Evaluation</i>, of the <a
+ * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
+ * paper, pages 8-10.
+ *
+ * <p>By default, runs as a mapreduce job where each mapper runs a single test
+ * client. Can also run as a non-mapreduce, multithreaded application by
+ * specifying {@code --nomapred}. Each client does about 1GB of data, unless
+ * specified otherwise.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class PerformanceEvaluation extends Configured implements Tool {
+  static final String RANDOM_SEEK_SCAN = "randomSeekScan";
+  static final String RANDOM_READ = "randomRead";
+  private static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  static {
+    MAPPER.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
+  }
+
+  public static final String TABLE_NAME = "TestTable";
+  public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
+  public static final byte [] COLUMN_ZERO = Bytes.toBytes("" + 0);
+  public static final byte [] QUALIFIER_NAME = COLUMN_ZERO;
+  public static final int DEFAULT_VALUE_LENGTH = 1000;
+  public static final int ROW_LENGTH = 26;
+
+  private static final int ONE_GB = 1024 * 1024 * 1000;
+  private static final int DEFAULT_ROWS_PER_GB = ONE_GB / DEFAULT_VALUE_LENGTH;
+  // TODO : should we make this configurable
+  private static final int TAG_LENGTH = 256;
+  private static final DecimalFormat FMT = new DecimalFormat("0.##");
+  private static final MathContext CXT = MathContext.DECIMAL64;
+  private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
+  private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
+  private static final TestOptions DEFAULT_OPTS = new TestOptions();
+
+  private static Map<String, CmdDescriptor> COMMANDS = new TreeMap<>();
+  private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
+
+  static {
+    addCommandDescriptor(AsyncRandomReadTest.class, "asyncRandomRead",
+        "Run async random read test");
+    addCommandDescriptor(AsyncRandomWriteTest.class, "asyncRandomWrite",
+        "Run async random write test");
+    addCommandDescriptor(AsyncSequentialReadTest.class, "asyncSequentialRead",
+        "Run async sequential read test");
+    addCommandDescriptor(AsyncSequentialWriteTest.class, "asyncSequentialWrite",
+        "Run async sequential write test");
+    addCommandDescriptor(AsyncScanTest.class, "asyncScan",
+        "Run async scan test (read every row)");
+    addCommandDescriptor(RandomReadTest.class, RANDOM_READ,
+      "Run random read test");
+    addCommandDescriptor(RandomSeekScanTest.class, RANDOM_SEEK_SCAN,
+      "Run random seek and scan 100 test");
+    addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
+      "Run random seek scan with both start and stop row (max 10 rows)");
+    addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
+      "Run random seek scan with both start and stop row (max 100 rows)");
+    addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
+      "Run random seek scan with both start and stop row (max 1000 rows)");
+    addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
+      "Run random seek scan with both start and stop row (max 10000 rows)");
+    addCommandDescriptor(RandomWriteTest.class, "randomWrite",
+      "Run random write test");
+    addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
+      "Run sequential read test");
+    addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
+      "Run sequential write test");
+    addCommandDescriptor(ScanTest.class, "scan",
+      "Run scan test (read every row)");
+    addCommandDescriptor(FilteredScanTest.class, "filterScan",
+      "Run scan test using a filter to find a specific row based on it's value " +
+      "(make sure to use --rows=20)");
+    addCommandDescriptor(IncrementTest.class, "increment",
+      "Increment on each row; clients overlap on keyspace so some concurrent operations");
+    addCommandDescriptor(AppendTest.class, "append",
+      "Append on each row; clients overlap on keyspace so some concurrent operations");
+    addCommandDescriptor(CheckAndMutateTest.class, "checkAndMutate",
+      "CheckAndMutate on each row; clients overlap on keyspace so some concurrent operations");
+    addCommandDescriptor(CheckAndPutTest.class, "checkAndPut",
+      "CheckAndPut on each row; clients overlap on keyspace so some concurrent operations");
+    addCommandDescriptor(CheckAndDeleteTest.class, "checkAndDelete",
+      "CheckAndDelete on each row; clients overlap on keyspace so some concurrent operations");
+  }
+
+  /**
+   * Enum for map metrics.  Keep it out here rather than inside in the Map
+   * inner-class so we can find associated properties.
+   */
+  protected static enum Counter {
+    /** elapsed time */
+    ELAPSED_TIME,
+    /** number of rows */
+    ROWS
+  }
+
+  protected static class RunResult implements Comparable<RunResult> {
+    public RunResult(long duration, Histogram hist) {
+      this.duration = duration;
+      this.hist = hist;
+    }
+
+    public final long duration;
+    public final Histogram hist;
+
+    @Override
+    public String toString() {
+      return Long.toString(duration);
+    }
+
+    @Override public int compareTo(RunResult o) {
+      return Long.compare(this.duration, o.duration);
+    }
+  }
+
+  /**
+   * Constructor
+   * @param conf Configuration object
+   */
+  public PerformanceEvaluation(final Configuration conf) {
+    super(conf);
+  }
+
+  protected static void addCommandDescriptor(Class<? extends TestBase> cmdClass,
+      String name, String description) {
+    CmdDescriptor cmdDescriptor = new CmdDescriptor(cmdClass, name, description);
+    COMMANDS.put(name, cmdDescriptor);
+  }
+
+  /**
+   * Implementations can have their status set.
+   */
+  interface Status {
+    /**
+     * Sets status
+     * @param msg status message
+     * @throws IOException
+     */
+    void setStatus(final String msg) throws IOException;
+  }
+
+  /**
+   * MapReduce job that runs a performance evaluation client in each map task.
+   */
+  public static class EvaluationMapTask
+      extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
+
+    /** configuration parameter name that contains the command */
+    public final static String CMD_KEY = "EvaluationMapTask.command";
+    /** configuration parameter name that contains the PE impl */
+    public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
+
+    private Class<? extends Test> cmd;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
+
+      // this is required so that extensions of PE are instantiated within the
+      // map reduce task...
+      Class<? extends PerformanceEvaluation> peClass =
+          forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
+      try {
+        peClass.getConstructor(Configuration.class).newInstance(context.getConfiguration());
+      } catch (Exception e) {
+        throw new IllegalStateException("Could not instantiate PE instance", e);
+      }
+    }
+
+    private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
+      try {
+        return Class.forName(className).asSubclass(type);
+      } catch (ClassNotFoundException e) {
+        throw new IllegalStateException("Could not find class for name: " + className, e);
+      }
+    }
+
+    @Override
+    protected void map(LongWritable key, Text value, final Context context)
+           throws IOException, InterruptedException {
+
+      Status status = new Status() {
+        @Override
+        public void setStatus(String msg) {
+           context.setStatus(msg);
+        }
+      };
+
+      ObjectMapper mapper = new ObjectMapper();
+      TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
+      Configuration conf = HBaseConfiguration.create(context.getConfiguration());
+      final Connection con = ConnectionFactory.createConnection(conf);
+      AsyncConnection asyncCon = null;
+      try {
+        asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
+      } catch (ExecutionException e) {
+        throw new IOException(e);
+      }
+
+      // Evaluation task
+      RunResult result = PerformanceEvaluation.runOneClient(this.cmd, conf, con, asyncCon, opts, status);
+      // Collect how much time the thing took. Report as map output and
+      // to the ELAPSED_TIME counter.
+      context.getCounter(Counter.ELAPSED_TIME).increment(result.duration);
+      context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
+      context.write(new LongWritable(opts.startRow), new LongWritable(result.duration));
+      context.progress();
+    }
+  }
+
+  /*
+   * If table does not already exist, create. Also create a table when
+   * {@code opts.presplitRegions} is specified or when the existing table's
+   * region replica count doesn't match {@code opts.replicas}.
+   */
+  static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
+    TableName tableName = TableName.valueOf(opts.tableName);
+    boolean needsDelete = false, exists = admin.tableExists(tableName);
+    boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
+      || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
+    if (!exists && isReadCmd) {
+      throw new IllegalStateException(
+        "Must specify an existing table for read commands. Run a write command first.");
+    }
+    HTableDescriptor desc =
+      exists ? admin.getTableDescriptor(TableName.valueOf(opts.tableName)) : null;
+    byte[][] splits = getSplits(opts);
+
+    // recreate the table when user has requested presplit or when existing
+    // {RegionSplitPolicy,replica count} does not match requested.
+    if ((exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions)
+      || (!isReadCmd && desc != null &&
+          !StringUtils.equals(desc.getRegionSplitPolicyClassName(), opts.splitPolicy))
+      || (!isReadCmd && desc != null && desc.getRegionReplication() != opts.replicas)) {
+      needsDelete = true;
+      // wait, why did it delete my table?!?
+      LOG.debug(MoreObjects.toStringHelper("needsDelete")
+        .add("needsDelete", needsDelete)
+        .add("isReadCmd", isReadCmd)
+        .add("exists", exists)
+        .add("desc", desc)
+        .add("presplit", opts.presplitRegions)
+        .add("splitPolicy", opts.splitPolicy)
+        .add("replicas", opts.replicas));
+    }
+
+    // remove an existing table
+    if (needsDelete) {
+      if (admin.isTableEnabled(tableName)) {
+        admin.disableTable(tableName);
+      }
+      admin.deleteTable(tableName);
+    }
+
+    // table creation is necessary
+    if (!exists || needsDelete) {
+      desc = getTableDescriptor(opts);
+      if (splits != null) {
+        if (LOG.isDebugEnabled()) {
+          for (int i = 0; i < splits.length; i++) {
+            LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
+          }
+        }
+      }
+      admin.createTable(desc, splits);
+      LOG.info("Table " + desc + " created");
+    }
+    return admin.tableExists(tableName);
+  }
+
+  /**
+   * Create an HTableDescriptor from provided TestOptions.
+   */
+  protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
+    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(opts.tableName));
+    HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
+    family.setDataBlockEncoding(opts.blockEncoding);
+    family.setCompressionType(opts.compression);
+    family.setBloomFilterType(opts.bloomType);
+    family.setBlocksize(opts.blockSize);
+    if (opts.inMemoryCF) {
+      family.setInMemory(true);
+    }
+    family.setInMemoryCompaction(opts.inMemoryCompaction);
+    desc.addFamily(family);
+    if (opts.replicas != DEFAULT_OPTS.replicas) {
+      desc.setRegionReplication(opts.replicas);
+    }
+    if (opts.splitPolicy != DEFAULT_OPTS.splitPolicy) {
+      desc.setRegionSplitPolicyClassName(opts.splitPolicy);
+    }
+    return desc;
+  }
+
+  /**
+   * generates splits based on total number of rows and specified split regions
+   */
+  protected static byte[][] getSplits(TestOptions opts) {
+    if (opts.presplitRegions == DEFAULT_OPTS.presplitRegions)
+      return null;
+
+    int numSplitPoints = opts.presplitRegions - 1;
+    byte[][] splits = new byte[numSplitPoints][];
+    int jump = opts.totalRows / opts.presplitRegions;
+    for (int i = 0; i < numSplitPoints; i++) {
+      int rowkey = jump * (1 + i);
+      splits[i] = format(rowkey);
+    }
+    return splits;
+  }
+
+  /*
+   * Run all clients in this vm each to its own thread.
+   */
+  static RunResult[] doLocalClients(final TestOptions opts, final Configuration conf)
+      throws IOException, InterruptedException, ExecutionException {
+    final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
+    assert cmd != null;
+    @SuppressWarnings("unchecked")
+    Future<RunResult>[] threads = new Future[opts.numClientThreads];
+    RunResult[] results = new RunResult[opts.numClientThreads];
+    ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
+      new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
+    final Connection con = ConnectionFactory.createConnection(conf);
+    final AsyncConnection asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
+    for (int i = 0; i < threads.length; i++) {
+      final int index = i;
+      threads[i] = pool.submit(new Callable<RunResult>() {
+        @Override
+        public RunResult call() throws Exception {
+          TestOptions threadOpts = new TestOptions(opts);
+          if (threadOpts.startRow == 0) threadOpts.startRow = index * threadOpts.perClientRunRows;
+          RunResult run = runOneClient(cmd, conf, con, asyncCon, threadOpts, new Status() {
+            @Override
+            public void setStatus(final String msg) throws IOException {
+              LOG.info(msg);
+            }
+          });
+          LOG.info("Finished " + Thread.currentThread().getName() + " in " + run.duration +
+            "ms over " + threadOpts.perClientRunRows + " rows");
+          return run;
+        }
+      });
+    }
+    pool.shutdown();
+
+    for (int i = 0; i < threads.length; i++) {
+      try {
+        results[i] = threads[i].get();
+      } catch (ExecutionException e) {
+        throw new IOException(e.getCause());
+      }
+    }
+    final String test = cmd.getSimpleName();
+    LOG.info("[" + test + "] Summary of timings (ms): "
+             + Arrays.toString(results));
+    Arrays.sort(results);
+    long total = 0;
+    for (RunResult result : results) {
+      total += result.duration;
+    }
+    LOG.info("[" + test + "]"
+      + "\tMin: " + results[0] + "ms"
+      + "\tMax: " + results[results.length - 1] + "ms"
+      + "\tAvg: " + (total / results.length) + "ms");
+
+    con.close();
+    asyncCon.close();
+
+    return results;
+  }
+
+  /*
+   * Run a mapreduce job.  Run as many maps as asked-for clients.
+   * Before we start up the job, write out an input file with instruction
+   * per client regards which row they are to start on.
+   * @param cmd Command to run.
+   * @throws IOException
+   */
+  static Job doMapReduce(TestOptions opts, final Configuration conf)
+      throws IOException, InterruptedException, ClassNotFoundException {
+    final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
+    assert cmd != null;
+    Path inputDir = writeInputFile(conf, opts);
+    conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
+    conf.set(EvaluationMapTask.PE_KEY, PerformanceEvaluation.class.getName());
+    Job job = Job.getInstance(conf);
+    job.setJarByClass(PerformanceEvaluation.class);
+    job.setJobName("HBase Performance Evaluation - " + opts.cmdName);
+
+    job.setInputFormatClass(NLineInputFormat.class);
+    NLineInputFormat.setInputPaths(job, inputDir);
+    // this is default, but be explicit about it just in case.
+    NLineInputFormat.setNumLinesPerSplit(job, 1);
+
+    job.setOutputKeyClass(LongWritable.class);
+    job.setOutputValueClass(LongWritable.class);
+
+    job.setMapperClass(EvaluationMapTask.class);
+    job.setReducerClass(LongSumReducer.class);
+
+    job.setNumReduceTasks(1);
+
+    job.setOutputFormatClass(TextOutputFormat.class);
+    TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
+
+    TableMapReduceUtil.addDependencyJars(job);
+    TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+      Histogram.class,     // yammer metrics
+      ObjectMapper.class); // jackson-mapper-asl
+
+    TableMapReduceUtil.initCredentials(job);
+
+    job.waitForCompletion(true);
+    return job;
+  }
+
+  /**
+   * Each client has one mapper to do the work,  and client do the resulting count in a map task.
+   */
+
+  static String JOB_INPUT_FILENAME = "input.txt";
+
+  /*
+   * Write input file of offsets-per-client for the mapreduce job.
+   * @param c Configuration
+   * @return Directory that contains file written whose name is JOB_INPUT_FILENAME
+   * @throws IOException
+   */
+  static Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
+    return writeInputFile(c, opts, new Path("."));
+  }
+
+  static Path writeInputFile(final Configuration c, final TestOptions opts, final Path basedir)
+  throws IOException {
+    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
+    Path jobdir = new Path(new Path(basedir, PERF_EVAL_DIR), formatter.format(new Date()));
+    Path inputDir = new Path(jobdir, "inputs");
+
+    FileSystem fs = FileSystem.get(c);
+    fs.mkdirs(inputDir);
+
+    Path inputFile = new Path(inputDir, JOB_INPUT_FILENAME);
+    PrintStream out = new PrintStream(fs.create(inputFile));
+    // Make input random.
+    Map<Integer, String> m = new TreeMap<>();
+    Hash h = MurmurHash.getInstance();
+    int perClientRows = (opts.totalRows / opts.numClientThreads);
+    try {
+      for (int j = 0; j < opts.numClientThreads; j++) {
+        TestOptions next = new TestOptions(opts);
+        next.startRow = j * perClientRows;
+        next.perClientRunRows = perClientRows;
+        String s = MAPPER.writeValueAsString(next);
+        LOG.info("Client=" + j + ", input=" + s);
+        byte[] b = Bytes.toBytes(s);
+        int hash = h.hash(new ByteArrayHashKey(b, 0, b.length), -1);
+        m.put(hash, s);
+      }
+      for (Map.Entry<Integer, String> e: m.entrySet()) {
+        out.println(e.getValue());
+      }
+    } finally {
+      out.close();
+    }
+    return inputDir;
+  }
+
+  /**
+   * Describes a command.
+   */
+  static class CmdDescriptor {
+    private Class<? extends TestBase> cmdClass;
+    private String name;
+    private String description;
+
+    CmdDescriptor(Class<? extends TestBase> cmdClass, String name, String description) {
+      this.cmdClass = cmdClass;
+      this.name = name;
+      this.description = description;
+    }
+
+    public Class<? extends TestBase> getCmdClass() {
+      return cmdClass;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public String getDescription() {
+      return description;
+    }
+  }
+
+  /**
+   * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
+   * This makes tracking all these arguments a little easier.
+   * NOTE: ADDING AN OPTION, you need to add a data member, a getter/setter (to make JSON
+   * serialization of this TestOptions class behave), and you need to add to the clone constructor
+   * below copying your new option from the 'that' to the 'this'.  Look for 'clone' below.
+   */
+  static class TestOptions {
+    String cmdName = null;
+    boolean nomapred = false;
+    boolean filterAll = false;
+    int startRow = 0;
+    float size = 1.0f;
+    int perClientRunRows = DEFAULT_ROWS_PER_GB;
+    int numClientThreads = 1;
+    int totalRows = DEFAULT_ROWS_PER_GB;
+    int measureAfter = 0;
+    float sampleRate = 1.0f;
+    double traceRate = 0.0;
+    String tableName = TABLE_NAME;
+    boolean flushCommits = true;
+    boolean writeToWAL = true;
+    boolean autoFlush = false;
+    boolean oneCon = false;
+    boolean useTags = false;
+    int noOfTags = 1;
+    boolean reportLatency = false;
+    int multiGet = 0;
+    int randomSleep = 0;
+    boolean inMemoryCF = false;
+    int presplitRegions = 0;
+    int replicas = HTableDescriptor.DEFAULT_REGION_REPLICATION;
+    String splitPolicy = null;
+    Compression.Algorithm compression = Compression.Algorithm.NONE;
+    BloomType bloomType = BloomType.ROW;
+    int blockSize = HConstants.DEFAULT_BLOCKSIZE;
+    DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
+    boolean valueRandom = false;
+    boolean valueZipf = false;
+    int valueSize = DEFAULT_VALUE_LENGTH;
+    int period = (this.perClientRunRows / 10) == 0? perClientRunRows: perClientRunRows / 10;
+    int cycles = 1;
+    int columns = 1;
+    int caching = 30;
+    boolean addColumns = true;
+    MemoryCompactionPolicy inMemoryCompaction =
+        MemoryCompactionPolicy.valueOf(
+            CompactingMemStore.COMPACTING_MEMSTORE_TYPE_DEFAULT);
+    boolean asyncPrefetch = false;
+    boolean cacheBlocks = true;
+    Scan.ReadType scanReadType = Scan.ReadType.DEFAULT;
+
+    public TestOptions() {}
+
+    /**
+     * Clone constructor.
+     * @param that Object to copy from.
+     */
+    public TestOptions(TestOptions that) {
+      this.cmdName = that.cmdName;
+      this.cycles = that.cycles;
+      this.nomapred = that.nomapred;
+      this.startRow = that.startRow;
+      this.size = that.size;
+      this.perClientRunRows = that.perClientRunRows;
+      this.numClientThreads = that.numClientThreads;
+      this.totalRows = that.totalRows;
+      this.sampleRate = that.sampleRate;
+      this.traceRate = that.traceRate;
+      this.tableName = that.tableName;
+      this.flushCommits = that.flushCommits;
+      this.writeToWAL = that.writeToWAL;
+      this.autoFlush = that.autoFlush;
+      this.oneCon = that.oneCon;
+      this.useTags = that.useTags;
+      this.noOfTags = that.noOfTags;
+      this.reportLatency = that.reportLatency;
+      this.multiGet = that.multiGet;
+      this.inMemoryCF = that.inMemoryCF;
+      this.presplitRegions = that.presplitRegions;
+      this.replicas = that.replicas;
+      this.splitPolicy = that.splitPolicy;
+      this.compression = that.compression;
+      this.blockEncoding = that.blockEncoding;
+      this.filterAll = that.filterAll;
+      this.bloomType = that.bloomType;
+      this.blockSize = that.blockSize;
+      this.valueRandom = that.valueRandom;
+      this.valueZipf = that.valueZipf;
+      this.valueSize = that.valueSize;
+      this.period = that.period;
+      this.randomSleep = that.randomSleep;
+      this.measureAfter = that.measureAfter;
+      this.addColumns = that.addColumns;
+      this.columns = that.columns;
+      this.caching = that.caching;
+      this.inMemoryCompaction = that.inMemoryCompaction;
+      this.asyncPrefetch = that.asyncPrefetch;
+      this.cacheBlocks = that.cacheBlocks;
+      this.scanReadType = that.scanReadType;
+    }
+
+    public int getCaching() {
+      return this.caching;
+    }
+
+    public void setCaching(final int caching) {
+      this.caching = caching;
+    }
+
+    public int getColumns() {
+      return this.columns;
+    }
+
+    public void setColumns(final int columns) {
+      this.columns = columns;
+    }
+
+    public int getCycles() {
+      return this.cycles;
+    }
+
+    public void setCycles(final int cycles) {
+      this.cycles = cycles;
+    }
+
+    public boolean isValueZipf() {
+      return valueZipf;
+    }
+
+    public void setValueZipf(boolean valueZipf) {
+      this.valueZipf = valueZipf;
+    }
+
+    public String getCmdName() {
+      return cmdName;
+    }
+
+    public void setCmdName(String cmdName) {
+      this.cmdName = cmdName;
+    }
+
+    public int getRandomSleep() {
+      return randomSleep;
+    }
+
+    public void setRandomSleep(int randomSleep) {
+      this.randomSleep = randomSleep;
+    }
+
+    public int getReplicas() {
+      return replicas;
+    }
+
+    public void setReplicas(int replicas) {
+      this.replicas = replicas;
+    }
+
+    public String getSplitPolicy() {
+      return splitPolicy;
+    }
+
+    public void setSplitPolicy(String splitPolicy) {
+      this.splitPolicy = splitPolicy;
+    }
+
+    public void setNomapred(boolean nomapred) {
+      this.nomapred = nomapred;
+    }
+
+    public void setFilterAll(boolean filterAll) {
+      this.filterAll = filterAll;
+    }
+
+    public void setStartRow(int startRow) {
+      this.startRow = startRow;
+    }
+
+    public void setSize(float size) {
+      this.size = size;
+    }
+
+    public void setPerClientRunRows(int perClientRunRows) {
+      this.perClientRunRows = perClientRunRows;
+    }
+
+    public void setNumClientThreads(int numClientThreads) {
+      this.numClientThreads = numClientThreads;
+    }
+
+    public void setTotalRows(int totalRows) {
+      this.totalRows = totalRows;
+    }
+
+    public void setSampleRate(float sampleRate) {
+      this.sampleRate = sampleRate;
+    }
+
+    public void setTraceRate(double traceRate) {
+      this.traceRate = traceRate;
+    }
+
+    public void setTableName(String tableName) {
+      this.tableName = tableName;
+    }
+
+    public void setFlushCommits(boolean flushCommits) {
+      this.flushCommits = flushCommits;
+    }
+
+    public void setWriteToWAL(boolean writeToWAL) {
+      this.writeToWAL = writeToWAL;
+    }
+
+    public void setAutoFlush(boolean autoFlush) {
+      this.autoFlush = autoFlush;
+    }
+
+    public void setOneCon(boolean oneCon) {
+      this.oneCon = oneCon;
+    }
+
+    public void setUseTags(boolean useTags) {
+      this.useTags = useTags;
+    }
+
+    public void setNoOfTags(int noOfTags) {
+      this.noOfTags = noOfTags;
+    }
+
+    public void setReportLatency(boolean reportLatency) {
+      this.reportLatency = reportLatency;
+    }
+
+    public void setMultiGet(int multiGet) {
+      this.multiGet = multiGet;
+    }
+
+    public void setInMemoryCF(boolean inMemoryCF) {
+      this.inMemoryCF = inMemoryCF;
+    }
+
+    public void setPresplitRegions(int presplitRegions) {
+      this.presplitRegions = presplitRegions;
+    }
+
+    public void setCompression(Compression.Algorithm compression) {
+      this.compression = compression;
+    }
+
+    public void setBloomType(BloomType bloomType) {
+      this.bloomType = bloomType;
+    }
+
+    public void setBlockSize(int blockSize) {
+      this.blockSize = blockSize;
+    }
+
+    public void setBlockEncoding(DataBlockEncoding blockEncoding) {
+      this.blockEncoding = blockEncoding;
+    }
+
+    public void setValueRandom(boolean valueRandom) {
+      this.valueRandom = valueRandom;
+    }
+
+    public void setValueSize(int valueSize) {
+      this.valueSize = valueSize;
+    }
+
+    public void setPeriod(int period) {
+      this.period = period;
+    }
+
+    public boolean isNomapred() {
+      return nomapred;
+    }
+
+    public boolean isFilterAll() {
+      return filterAll;
+    }
+
+    public int getStartRow() {
+      return startRow;
+    }
+
+    public float getSize() {
+      return size;
+    }
+
+    public int getPerClientRunRows() {
+      return perClientRunRows;
+    }
+
+    public int getNumClientThreads() {
+      return numClientThreads;
+    }
+
+    public int getTotalRows() {
+      return totalRows;
+    }
+
+    public float getSampleRate() {
+      return sampleRate;
+    }
+
+    public double getTraceRate() {
+      return traceRate;
+    }
+
+    public String getTableName() {
+      return tableName;
+    }
+
+    public boolean isFlushCommits() {
+      return flushCommits;
+    }
+
+    public boolean isWriteToWAL() {
+      return writeToWAL;
+    }
+
+    public boolean isAutoFlush() {
+      return autoFlush;
+    }
+
+    public boolean isUseTags() {
+      return useTags;
+    }
+
+    public int getNoOfTags() {
+      return noOfTags;
+    }
+
+    public boolean isReportLatency() {
+      return reportLatency;
+    }
+
+    public int getMultiGet() {
+      return multiGet;
+    }
+
+    public boolean isInMemoryCF() {
+      return inMemoryCF;
+    }
+
+    public int getPresplitRegions() {
+      return presplitRegions;
+    }
+
+    public Compression.Algorithm getCompression() {
+      return compression;
+    }
+
+    public DataBlockEncoding getBlockEncoding() {
+      return blockEncoding;
+    }
+
+    public boolean isValueRandom() {
+      return valueRandom;
+    }
+
+    public int getValueSize() {
+      return valueSize;
+    }
+
+    public int getPeriod() {
+      return period;
+    }
+
+    public BloomType getBloomType() {
+      return bloomType;
+    }
+
+    public int getBlockSize() {
+      return blockSize;
+    }
+
+    public boolean isOneCon() {
+      return oneCon;
+    }
+
+    public int getMeasureAfter() {
+      return measureAfter;
+    }
+
+    public void setMeasureAfter(int measureAfter) {
+      this.measureAfter = measureAfter;
+    }
+
+    public boolean getAddColumns() {
+      return addColumns;
+    }
+
+    public void setAddColumns(boolean addColumns) {
+      this.addColumns = addColumns;
+    }
+
+    public void setInMemoryCompaction(MemoryCompactionPolicy inMemoryCompaction) {
+      this.inMemoryCompaction = inMemoryCompaction;
+    }
+
+    public MemoryCompactionPolicy getInMemoryCompaction() {
+      return this.inMemoryCompaction;
+    }
+  }
+
+  /*
+   * A test.
+   * Subclass to particularize what happens per row.
+   */
+  static abstract class TestBase {
+    // Below is make it so when Tests are all running in the one
+    // jvm, that they each have a differently seeded Random.
+    private static final Random randomSeed = new Random(System.currentTimeMillis());
+
+    private static long nextRandomSeed() {
+      return randomSeed.nextLong();
+    }
+    private final int everyN;
+
+    protected final Random rand = new Random(nextRandomSeed());
+    protected final Configuration conf;
+    protected final TestOptions opts;
+
+    private final Status status;
+    private final Sampler<?> traceSampler;
+    private final SpanReceiverHost receiverHost;
+
+    private String testName;
+    private Histogram latencyHistogram;
+    private Histogram valueSizeHistogram;
+    private RandomDistribution.Zipf zipf;
+
+    /**
+     * Note that all subclasses of this class must provide a public constructor
+     * that has the exact same list of arguments.
+     */
+    TestBase(final Configuration conf, final TestOptions options, final Status status) {
+      this.conf = conf;
+      this.receiverHost = this.conf == null? null: SpanReceiverHost.getInstance(conf);
+      this.opts = options;
+      this.status = status;
+      this.testName = this.getClass().getSimpleName();
+      if (options.traceRate >= 1.0) {
+        this.traceSampler = Sampler.ALWAYS;
+      } else if (options.traceRate > 0.0) {
+        conf.setDouble("hbase.sampler.fraction", options.traceRate);
+        this.traceSampler = new ProbabilitySampler(new HBaseHTraceConfiguration(conf));
+      } else {
+        this.traceSampler = Sampler.NEVER;
+      }
+      everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
+      if (options.isValueZipf()) {
+        this.zipf = new RandomDistribution.Zipf(this.rand, 1, options.getValueSize(), 1.2);
+      }
+      LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
+    }
+
+    int getValueLength(final Random r) {
+      if (this.opts.isValueRandom()) return Math.abs(r.nextInt() % opts.valueSize);
+      else if (this.opts.isValueZipf()) return Math.abs(this.zipf.nextInt());
+      else return opts.valueSize;
+    }
+
+    void updateValueSize(final Result [] rs) throws IOException {
+      if (rs == null || !isRandomValueSize()) return;
+      for (Result r: rs) updateValueSize(r);
+    }
+
+    void updateValueSize(final Result r) throws IOException {
+      if (r == null || !isRandomValueSize()) return;
+      int size = 0;
+      for (CellScanner scanner = r.cellScanner(); scanner.advance();) {
+        size += scanner.current().getValueLength();
+      }
+      updateValueSize(size);
+    }
+
+    void updateValueSize(final int valueSize) {
+      if (!isRandomValueSize()) return;
+      this.valueSizeHistogram.update(valueSize);
+    }
+
+    String generateStatus(final int sr, final int i, final int lr) {
+      return sr + "/" + i + "/" + lr + ", latency " + getShortLatencyReport() +
+        (!isRandomValueSize()? "": ", value size " + getShortValueSizeReport());
+    }
+
+    boolean isRandomValueSize() {
+      return opts.valueRandom;
+    }
+
+    protected int getReportingPeriod() {
+      return opts.period;
+    }
+
+    /**
+     * Populated by testTakedown. Only implemented by RandomReadTest at the moment.
+     */
+    public Histogram getLatencyHistogram() {
+      return latencyHistogram;
+    }
+
+    void testSetup() throws IOException {
+      createConnection();
+      onStartup();
+      latencyHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
+      valueSizeHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
+    }
+
+    abstract void createConnection() throws IOException;
+
+    abstract void onStartup() throws IOException;
+
+    void testTakedown() throws IOException {
+      onTakedown();
+      // Print all stats for this thread continuously.
+      // Synchronize on Test.class so different threads don't intermingle the
+      // output. We can't use 'this' here because each thread has its own instance of Test class.
+      synchronized (Test.class) {
+        status.setStatus("Test : " + testName + ", Thread : " + Thread.currentThread().getName());
+        status.setStatus("Latency (us) : " + YammerHistogramUtils.getHistogramReport(
+            latencyHistogram));
+        status.setStatus("Num measures (latency) : " + latencyHistogram.getCount());
+        status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(latencyHistogram));
+        status.setStatus("ValueSize (bytes) : "
+            + YammerHistogramUtils.getHistogramReport(valueSizeHistogram));
+        status.setStatus("Num measures (ValueSize): " + valueSizeHistogram.getCount());
+        status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(valueSizeHistogram));
+      }
+      closeConnection();
+      receiverHost.closeReceivers();
+    }
+
+    abstract void onTakedown() throws IOException;
+
+    abstract void closeConnection() throws IOException;
+
+    /*
+     * Run test
+     * @return Elapsed time.
+     * @throws IOException
+     */
+    long test() throws IOException, InterruptedException {
+      testSetup();
+      LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
+      final long startTime = System.nanoTime();
+      try {
+        testTimed();
+      } finally {
+        testTakedown();
+      }
+      return (System.nanoTime() - startTime) / 1000000;
+    }
+
+    int getStartRow() {
+      return opts.startRow;
+    }
+
+    int getLastRow() {
+      return getStartRow() + opts.perClientRunRows;
+    }
+
+    /**
+     * Provides an extension point for tests that don't want a per row invocation.
+     */
+    void testTimed() throws IOException, InterruptedException {
+      int startRow = getStartRow();
+      int lastRow = getLastRow();
+      // Report on completion of 1/10th of total.
+      for (int ii = 0; ii < opts.cycles; ii++) {
+        if (opts.cycles > 1) LOG.info("Cycle=" + ii + " of " + opts.cycles);
+        for (int i = startRow; i < lastRow; i++) {
+          if (i % everyN != 0) continue;
+          long startTime = System.nanoTime();
+          TraceScope scope = Trace.startSpan("test row", traceSampler);
+          try {
+            testRow(i);
+          } finally {
+            scope.close();
+          }
+          if ( (i - startRow) > opts.measureAfter) {
+            // If multiget is enabled, say set to 10, testRow() returns immediately first 9 times
+            // and sends the actual get request in the 10th iteration. We should only set latency
+            // when actual request is sent because otherwise it turns out to be 0.
+            if (opts.multiGet == 0 || (i - startRow + 1) % opts.multiGet == 0) {
+              latencyHistogram.update((System.nanoTime() - startTime) / 1000);
+            }
+            if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
+              status.setStatus(generateStatus(startRow, i, lastRow));
+            }
+          }
+        }
+      }
+    }
+
+    /**
+     * @return Subset of the histograms' calculation.
+     */
+    public String getShortLatencyReport() {
+      return YammerHistogramUtils.getShortHistogramReport(this.latencyHistogram);
+    }
+
+    /**
+     * @return Subset of the histograms' calculation.
+     */
+    public String getShortValueSizeReport() {
+      return YammerHistogramUtils.getShortHistogramReport(this.valueSizeHistogram);
+    }
+
+    /*
+    * Test for individual row.
+    * @param i Row index.
+    */
+    abstract void testRow(final int i) throws IOException, InterruptedException;
+  }
+
+  static abstract class Test extends TestBase {
+    protected Connection connection;
+
+    Test(final Connection con, final TestOptions options, final Status status) {
+      super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
+      this.connection = con;
+    }
+
+    @Override
+    void createConnection() throws IOException {
+      if (!opts.isOneCon()) {
+        this.connection = ConnectionFactory.createConnection(conf);
+      }
+    }
+
+    @Override
+    void closeConnection() throws IOException {
+      if (!opts.isOneCon()) {
+        this.connection.close();
+      }
+    }
+  }
+
+  static abstract class AsyncTest extends TestBase {
+    protected AsyncConnection connection;
+
+    AsyncTest(final AsyncConnection con, final TestOptions options, final Status status) {
+      super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
+      this.connection = con;
+    }
+
+    @Override
+    void createConnection() {
+      if (!opts.isOneCon()) {
+        try {
+          this.connection = ConnectionFactory.createAsyncConnection(conf).get();
+        } catch (InterruptedException | ExecutionException e) {
+          LOG.error("Failed to create async connection", e);
+        }
+      }
+    }
+
+    @Override
+    void closeConnection() throws IOException {
+      if (!opts.isOneCon()) {
+        this.connection.close();
+      }
+    }
+  }
+
+  static abstract class TableTest extends Test {
+    protected Table table;
+
+    TableTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void onStartup() throws IOException {
+      this.table = connection.getTable(TableName.valueOf(opts.tableName));
+    }
+
+    @Override
+    void onTakedown() throws IOException {
+      table.close();
+    }
+  }
+
+  static abstract class AsyncTableTest extends AsyncTest {
+    protected RawAsyncTable table;
+
+    AsyncTableTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void onStartup() throws IOException {
+      this.table = connection.getRawTable(TableName.valueOf(opts.tableName));
+    }
+
+    @Override
+    void onTakedown() throws IOException {
+    }
+  }
+
+  static class AsyncRandomReadTest extends AsyncTableTest {
+    private final Consistency consistency;
+    private ArrayList<Get> gets;
+    private Random rd = new Random();
+
+    AsyncRandomReadTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+      consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
+      if (opts.multiGet > 0) {
+        LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
+        this.gets = new ArrayList<>(opts.multiGet);
+      }
+    }
+
+    @Override
+    void testRow(final int i) throws IOException, InterruptedException {
+      if (opts.randomSleep > 0) {
+        Thread.sleep(rd.nextInt(opts.randomSleep));
+      }
+      Get get = new Get(getRandomRow(this.rand, opts.totalRows));
+      if (opts.addColumns) {
+        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      } else {
+        get.addFamily(FAMILY_NAME);
+      }
+      if (opts.filterAll) {
+        get.setFilter(new FilterAllFilter());
+      }
+      get.setConsistency(consistency);
+      if (LOG.isTraceEnabled()) LOG.trace(get.toString());
+      try {
+        if (opts.multiGet > 0) {
+          this.gets.add(get);
+          if (this.gets.size() == opts.multiGet) {
+            Result[] rs =
+                this.table.get(this.gets).stream().map(f -> propagate(f::get)).toArray(Result[]::new);
+            updateValueSize(rs);
+            this.gets.clear();
+          }
+        } else {
+          updateValueSize(this.table.get(get).get());
+        }
+      } catch (ExecutionException e) {
+        throw new IOException(e);
+      }
+    }
+
+    public static RuntimeException runtime(Throwable e) {
+      if (e instanceof RuntimeException) {
+        return (RuntimeException) e;
+      }
+      return new RuntimeException(e);
+    }
+
+    public static <V> V propagate(Callable<V> callable) {
+      try {
+        return callable.call();
+      } catch (Exception e) {
+        throw runtime(e);
+      }
+    }
+
+    @Override
+    protected int getReportingPeriod() {
+      int period = opts.perClientRunRows / 10;
+      return period == 0 ? opts.perClientRunRows : period;
+    }
+
+    @Override
+    protected void testTakedown() throws IOException {
+      if (this.gets != null && this.gets.size() > 0) {
+        this.table.get(gets);
+        this.gets.clear();
+      }
+      super.testTakedown();
+    }
+  }
+
+  static class AsyncRandomWriteTest extends AsyncTableTest {
+    AsyncRandomWriteTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException, InterruptedException {
+      byte[] row = getRandomRow(this.rand, opts.totalRows);
+      Put put = new Put(row);
+      for (int column = 0; column < opts.columns; column++) {
+        byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
+        byte[] value = generateData(this.rand, getValueLength(this.rand));
+        if (opts.useTags) {
+          byte[] tag = generateData(this.rand, TAG_LENGTH);
+          Tag[] tags = new Tag[opts.noOfTags];
+          for (int n = 0; n < opts.noOfTags; n++) {
+            Tag t = new ArrayBackedTag((byte) n, tag);
+            tags[n] = t;
+          }
+          KeyValue kv =
+              new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP, value, tags);
+          put.add(kv);
+          updateValueSize(kv.getValueLength());
+        } else {
+          put.addColumn(FAMILY_NAME, qualifier, value);
+          updateValueSize(value.length);
+        }
+      }
+      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+      try {
+        table.put(put).get();
+      } catch (ExecutionException e) {
+        throw new IOException(e);
+      }
+    }
+  }
+
+  static class AsyncScanTest extends AsyncTableTest {
+    private ResultScanner testScanner;
+    private AsyncTable asyncTable;
+
+    AsyncScanTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void onStartup() throws IOException {
+      this.asyncTable =
+          connection.getTable(TableName.valueOf(opts.tableName),
+            Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
+    }
+
+    @Override
+    void testTakedown() throws IOException {
+      if (this.testScanner != null) {
+        this.testScanner.close();
+      }
+      super.testTakedown();
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      if (this.testScanner == null) {
+        Scan scan =
+            new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
+                .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+                .setReadType(opts.scanReadType);
+        if (opts.addColumns) {
+          scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+        } else {
+          scan.addFamily(FAMILY_NAME);
+        }
+        if (opts.filterAll) {
+          scan.setFilter(new FilterAllFilter());
+        }
+        this.testScanner = asyncTable.getScanner(scan);
+      }
+      Result r = testScanner.next();
+      updateValueSize(r);
+    }
+  }
+
+  static class AsyncSequentialReadTest extends AsyncTableTest {
+    AsyncSequentialReadTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException, InterruptedException {
+      Get get = new Get(format(i));
+      if (opts.addColumns) {
+        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      }
+      if (opts.filterAll) {
+        get.setFilter(new FilterAllFilter());
+      }
+      try {
+        updateValueSize(table.get(get).get());
+      } catch (ExecutionException e) {
+        throw new IOException(e);
+      }
+    }
+  }
+
+  static class AsyncSequentialWriteTest extends AsyncTableTest {
+    AsyncSequentialWriteTest(AsyncConnection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException, InterruptedException {
+      byte[] row = format(i);
+      Put put = new Put(row);
+      for (int column = 0; column < opts.columns; column++) {
+        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+        byte[] value = generateData(this.rand, getValueLength(this.rand));
+        if (opts.useTags) {
+          byte[] tag = generateData(this.rand, TAG_LENGTH);
+          Tag[] tags = new Tag[opts.noOfTags];
+          for (int n = 0; n < opts.noOfTags; n++) {
+            Tag t = new ArrayBackedTag((byte) n, tag);
+            tags[n] = t;
+          }
+          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+              value, tags);
+          put.add(kv);
+          updateValueSize(kv.getValueLength());
+        } else {
+          put.addColumn(FAMILY_NAME, qualifier, value);
+          updateValueSize(value.length);
+        }
+      }
+      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+      try {
+        table.put(put).get();
+      } catch (ExecutionException e) {
+        throw new IOException(e);
+      }
+    }
+  }
+
+  static abstract class BufferedMutatorTest extends Test {
+    protected BufferedMutator mutator;
+    protected Table table;
+
+    BufferedMutatorTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void onStartup() throws IOException {
+      this.mutator = connection.getBufferedMutator(TableName.valueOf(opts.tableName));
+      this.table = connection.getTable(TableName.valueOf(opts.tableName));
+    }
+
+    @Override
+    void onTakedown() throws IOException {
+      mutator.close();
+      table.close();
+    }
+  }
+
+  static class RandomSeekScanTest extends TableTest {
+    RandomSeekScanTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      Scan scan = new Scan().withStartRow(getRandomRow(this.rand, opts.totalRows))
+          .setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+          .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
+      FilterList list = new FilterList();
+      if (opts.addColumns) {
+        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      } else {
+        scan.addFamily(FAMILY_NAME);
+      }
+      if (opts.filterAll) {
+        list.addFilter(new FilterAllFilter());
+      }
+      list.addFilter(new WhileMatchFilter(new PageFilter(120)));
+      scan.setFilter(list);
+      ResultScanner s = this.table.getScanner(scan);
+      for (Result rr; (rr = s.next()) != null;) {
+        updateValueSize(rr);
+      }
+      s.close();
+    }
+
+    @Override
+    protected int getReportingPeriod() {
+      int period = opts.perClientRunRows / 100;
+      return period == 0 ? opts.perClientRunRows : period;
+    }
+
+  }
+
+  static abstract class RandomScanWithRangeTest extends TableTest {
+    RandomScanWithRangeTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
+      Scan scan = new Scan().withStartRow(startAndStopRow.getFirst())
+          .withStopRow(startAndStopRow.getSecond()).setCaching(opts.caching)
+          .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+          .setReadType(opts.scanReadType);
+      if (opts.filterAll) {
+        scan.setFilter(new FilterAllFilter());
+      }
+      if (opts.addColumns) {
+        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      } else {
+        scan.addFamily(FAMILY_NAME);
+      }
+      Result r = null;
+      int count = 0;
+      ResultScanner s = this.table.getScanner(scan);
+      for (; (r = s.next()) != null;) {
+        updateValueSize(r);
+        count++;
+      }
+      if (i % 100 == 0) {
+        LOG.info(String.format("Scan for key range %s - %s returned %s rows",
+            Bytes.toString(startAndStopRow.getFirst()),
+            Bytes.toString(startAndStopRow.getSecond()), count));
+      }
+
+      s.close();
+    }
+
+    protected abstract Pair<byte[],byte[]> getStartAndStopRow();
+
+    protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
+      int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
+      int stop = start + maxRange;
+      return new Pair<>(format(start), format(stop));
+    }
+
+    @Override
+    protected int getReportingPeriod() {
+      int period = opts.perClientRunRows / 100;
+      return period == 0? opts.perClientRunRows: period;
+    }
+  }
+
+  static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
+    RandomScanWithRange10Test(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    protected Pair<byte[], byte[]> getStartAndStopRow() {
+      return generateStartAndStopRows(10);
+    }
+  }
+
+  static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
+    RandomScanWithRange100Test(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    protected Pair<byte[], byte[]> getStartAndStopRow() {
+      return generateStartAndStopRows(100);
+    }
+  }
+
+  static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
+    RandomScanWithRange1000Test(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    protected Pair<byte[], byte[]> getStartAndStopRow() {
+      return generateStartAndStopRows(1000);
+    }
+  }
+
+  static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
+    RandomScanWithRange10000Test(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    protected Pair<byte[], byte[]> getStartAndStopRow() {
+      return generateStartAndStopRows(10000);
+    }
+  }
+
+  static class RandomReadTest extends TableTest {
+    private final Consistency consistency;
+    private ArrayList<Get> gets;
+    private Random rd = new Random();
+
+    RandomReadTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+      consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
+      if (opts.multiGet > 0) {
+        LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
+        this.gets = new ArrayList<>(opts.multiGet);
+      }
+    }
+
+    @Override
+    void testRow(final int i) throws IOException, InterruptedException {
+      if (opts.randomSleep > 0) {
+        Thread.sleep(rd.nextInt(opts.randomSleep));
+      }
+      Get get = new Get(getRandomRow(this.rand, opts.totalRows));
+      if (opts.addColumns) {
+        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      } else {
+        get.addFamily(FAMILY_NAME);
+      }
+      if (opts.filterAll) {
+        get.setFilter(new FilterAllFilter());
+      }
+      get.setConsistency(consistency);
+      if (LOG.isTraceEnabled()) LOG.trace(get.toString());
+      if (opts.multiGet > 0) {
+        this.gets.add(get);
+        if (this.gets.size() == opts.multiGet) {
+          Result [] rs = this.table.get(this.gets);
+          updateValueSize(rs);
+          this.gets.clear();
+        }
+      } else {
+        updateValueSize(this.table.get(get));
+      }
+    }
+
+    @Override
+    protected int getReportingPeriod() {
+      int period = opts.perClientRunRows / 10;
+      return period == 0 ? opts.perClientRunRows : period;
+    }
+
+    @Override
+    protected void testTakedown() throws IOException {
+      if (this.gets != null && this.gets.size() > 0) {
+        this.table.get(gets);
+        this.gets.clear();
+      }
+      super.testTakedown();
+    }
+  }
+
+  static class RandomWriteTest extends BufferedMutatorTest {
+    RandomWriteTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte[] row = getRandomRow(this.rand, opts.totalRows);
+      Put put = new Put(row);
+      for (int column = 0; column < opts.columns; column++) {
+        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+        byte[] value = generateData(this.rand, getValueLength(this.rand));
+        if (opts.useTags) {
+          byte[] tag = generateData(this.rand, TAG_LENGTH);
+          Tag[] tags = new Tag[opts.noOfTags];
+          for (int n = 0; n < opts.noOfTags; n++) {
+            Tag t = new ArrayBackedTag((byte) n, tag);
+            tags[n] = t;
+          }
+          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+              value, tags);
+          put.add(kv);
+          updateValueSize(kv.getValueLength());
+        } else {
+          put.addColumn(FAMILY_NAME, qualifier, value);
+          updateValueSize(value.length);
+        }
+      }
+      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+      if (opts.autoFlush) {
+        table.put(put);
+      } else {
+        mutator.mutate(put);
+      }
+    }
+  }
+
+  static class ScanTest extends TableTest {
+    private ResultScanner testScanner;
+
+    ScanTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testTakedown() throws IOException {
+      if (this.testScanner != null) {
+        this.testScanner.close();
+      }
+      super.testTakedown();
+    }
+
+
+    @Override
+    void testRow(final int i) throws IOException {
+      if (this.testScanner == null) {
+        Scan scan = new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
+            .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+            .setReadType(opts.scanReadType);
+        if (opts.addColumns) {
+          scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+        } else {
+          scan.addFamily(FAMILY_NAME);
+        }
+        if (opts.filterAll) {
+          scan.setFilter(new FilterAllFilter());
+        }
+        this.testScanner = table.getScanner(scan);
+      }
+      Result r = testScanner.next();
+      updateValueSize(r);
+    }
+  }
+
+  /**
+   * Base class for operations that are CAS-like; that read a value and then set it based off what
+   * they read. In this category is increment, append, checkAndPut, etc.
+   *
+   * <p>These operations also want some concurrency going on. Usually when these tests run, they
+   * operate in their own part of the key range. In CASTest, we will have them all overlap on the
+   * same key space. We do this with our getStartRow and getLastRow overrides.
+   */
+  static abstract class CASTableTest extends TableTest {
+    private final byte [] qualifier;
+    CASTableTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+      qualifier = Bytes.toBytes(this.getClass().getSimpleName());
+    }
+
+    byte [] getQualifier() {
+      return this.qualifier;
+    }
+
+    @Override
+    int getStartRow() {
+      return 0;
+    }
+
+    @Override
+    int getLastRow() {
+      return opts.perClientRunRows;
+    }
+  }
+
+  static class IncrementTest extends CASTableTest {
+    IncrementTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      Increment increment = new Increment(format(i));
+      increment.addColumn(FAMILY_NAME, getQualifier(), 1l);
+      updateValueSize(this.table.increment(increment));
+    }
+  }
+
+  static class AppendTest extends CASTableTest {
+    AppendTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte [] bytes = format(i);
+      Append append = new Append(bytes);
+      append.addColumn(FAMILY_NAME, getQualifier(), bytes);
+      updateValueSize(this.table.append(append));
+    }
+  }
+
+  static class CheckAndMutateTest extends CASTableTest {
+    CheckAndMutateTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte [] bytes = format(i);
+      // Put a known value so when we go to check it, it is there.
+      Put put = new Put(bytes);
+      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+      this.table.put(put);
+      RowMutations mutations = new RowMutations(bytes);
+      mutations.add(put);
+      this.table.checkAndMutate(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes,
+          mutations);
+    }
+  }
+
+  static class CheckAndPutTest extends CASTableTest {
+    CheckAndPutTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte [] bytes = format(i);
+      // Put a known value so when we go to check it, it is there.
+      Put put = new Put(bytes);
+      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+      this.table.put(put);
+      this.table.checkAndPut(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, put);
+    }
+  }
+
+  static class CheckAndDeleteTest extends CASTableTest {
+    CheckAndDeleteTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte [] bytes = format(i);
+      // Put a known value so when we go to check it, it is there.
+      Put put = new Put(bytes);
+      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+      this.table.put(put);
+      Delete delete = new Delete(put.getRow());
+      delete.addColumn(FAMILY_NAME, getQualifier());
+      this.table.checkAndDelete(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, delete);
+    }
+  }
+
+  static class SequentialReadTest extends TableTest {
+    SequentialReadTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      Get get = new Get(format(i));
+      if (opts.addColumns) {
+        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      }
+      if (opts.filterAll) {
+        get.setFilter(new FilterAllFilter());
+      }
+      updateValueSize(table.get(get));
+    }
+  }
+
+  static class SequentialWriteTest extends BufferedMutatorTest {
+    SequentialWriteTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(final int i) throws IOException {
+      byte[] row = format(i);
+      Put put = new Put(row);
+      for (int column = 0; column < opts.columns; column++) {
+        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+        byte[] value = generateData(this.rand, getValueLength(this.rand));
+        if (opts.useTags) {
+          byte[] tag = generateData(this.rand, TAG_LENGTH);
+          Tag[] tags = new Tag[opts.noOfTags];
+          for (int n = 0; n < opts.noOfTags; n++) {
+            Tag t = new ArrayBackedTag((byte) n, tag);
+            tags[n] = t;
+          }
+          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+              value, tags);
+          put.add(kv);
+          updateValueSize(kv.getValueLength());
+        } else {
+          put.addColumn(FAMILY_NAME, qualifier, value);
+          updateValueSize(value.length);
+        }
+      }
+      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+      if (opts.autoFlush) {
+        table.put(put);
+      } else {
+        mutator.mutate(put);
+      }
+    }
+  }
+
+  static class FilteredScanTest extends TableTest {
+    protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
+
+    FilteredScanTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testRow(int i) throws IOException {
+      byte[] value = generateData(this.rand, getValueLength(this.rand));
+      Scan scan = constructScan(value);
+      ResultScanner scanner = null;
+      try {
+        scanner = this.table.getScanner(scan);
+        for (Result r = null; (r = scanner.next()) != null;) {
+          updateValueSize(r);
+        }
+      } finally {
+        if (scanner != null) scanner.close();
+      }
+    }
+
+    protected Scan constructScan(byte[] valuePrefix) throws IOException {
+      FilterList list = new FilterList();
+      Filter filter = new SingleColumnValueFilter(
+          FAMILY_NAME, COLUMN_ZERO, CompareFilter.CompareOp.EQUAL,
+          new BinaryComparator(valuePrefix)
+      );
+      list.addFilter(filter);
+      if(opts.filterAll) {
+        list.addFilter(new FilterAllFilter());
+      }
+      Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+          .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
+      if (opts.addColumns) {
+        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+      } else {
+        scan.addFamily(FAMILY_NAME);
+      }
+      scan.setFilter(list);
+      return scan;
+    }
+  }
+
+  /**
+   * Compute a throughput rate in MB/s.
+   * @param rows Number of records consumed.
+   * @param timeMs Time taken in milliseconds.
+   * @return String value with label, ie '123.76 MB/s'
+   */
+  private static String calculateMbps(int rows, long timeMs, final int valueSize, int columns) {
+    BigDecimal rowSize = BigDecimal.valueOf(ROW_LENGTH +
+      ((valueSize + FAMILY_NAME.length + COLUMN_ZERO.length) * columns));
+    BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
+      .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
+      .divide(BYTES_PER_MB, CXT);
+    return FMT.format(mbps) + " MB/s";
+  }
+
+  /*
+   * Format passed integer.
+   * @param number
+   * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
+   * number (Does absolute in case number is negative).
+   */
+  public static byte [] format(final int number) {
+    byte [] b = new byte[ROW_LENGTH];
+    int d = Math.abs(number);
+    for (int i = b.length - 1; i >= 0; i--) {
+      b[i] = (byte)((d % 10) + '0');
+      d /= 10;
+    }
+    return b;
+  }
+
+  /*
+   * This method takes some time and is done inline uploading data.  For
+   * example, doing the mapfile test, generation of the key and value
+   * consumes about 30% of CPU time.
+   * @return Generated random value to insert into a table cell.
+   */
+  public static byte[] generateData(final Random r, int length) {
+    byte [] b = new byte [length];
+    int i;
+
+    for(i = 0; i < (length-8); i += 8) {
+      b[i] = (byte) (65 + r.nextInt(26));
+      b[i+1] = b[i];
+      b[i+2] = b[i];
+      b[i+3] = b[i];
+      b[i+4] = b[i];
+      b[i+5] = b[i];
+      b[i+6] = b[i];
+      b[i+7] = b[i];
+    }
+
+    byte a = (byte) (65 + r.nextInt(26));
+    for(; i < length; i++) {
+      b[i] = a;
+    }
+    return b;
+  }
+
+  static byte [] getRandomRow(final Random random, final int totalRows) {
+    return format(generateRandomRow(random, totalRows));
+  }
+
+  static int generateRandomRow(final Random random, final int totalRows) {
+    return random.nextInt(Integer.MAX_VALUE) % totalRows;
+  }
+
+  static RunResult runOneClient(final Class<? extends TestBase> cmd, Configuration conf,
+      Connection con, AsyncConnection asyncCon, TestOptions opts, final Status status)
+      throws IOException, InterruptedException {
+    status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for "
+        + opts.perClientRunRows + " rows");
+    long totalElapsedTime;
+
+    final TestBase t;
+    try {
+      if (AsyncTest.class.isAssignableFrom(cmd)) {
+        Class<? extends AsyncTest> newCmd = (Class<? extends AsyncTest>) cmd;
+        Constructor<? extends AsyncTest> constructor =
+            newCmd.getDeclaredConstructor(AsyncConnection.class, TestOptions.class, Status.class);
+        t = constructor.newInstance(asyncCon, opts, status);
+      } else {
+        Class<? extends Test> newCmd = (Class<? extends Test>) cmd;
+        Constructor<? extends Test> constructor =
+            newCmd.getDeclaredConstructor(Connection.class, TestOptions.class, Status.class);
+        t = constructor.newInstance(con, opts, status);
+      }
+    } catch (NoSuchMethodException e) {
+      throw new IllegalArgumentException("Invalid command class: " + cmd.getName()
+          + ".  It does not provide a constructor as described by "
+          + "the javadoc comment.  Available constructors are: "
+          + Arrays.toString(cmd.getConstructors()));
+    } catch (Exception e) {
+      throw new IllegalStateException("Failed to construct command class", e);
+    }
+    totalElapsedTime = t.test();
+
+    status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
+      "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
+      " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime,
+          getAverageValueLength(opts), opts.columns) + ")");
+
+    return new RunResult(totalElapsedTime, t.getLatencyHistogram());
+  }
+
+  private static int getAverageValueLength(final TestOptions opts) {
+    return opts.valueRandom? opts.valueSize/2: opts.valueSize;
+  }
+
+  private void runTest(final Class<? extends TestBase> cmd, TestOptions opts) throws IOException,
+      InterruptedException, ClassNotFoundException, ExecutionException {
+    // Log the configuration we're going to run with. Uses JSON mapper because lazy. It'll do
+    // the TestOptions introspection for us and dump the output in a readable format.
+    LOG.info(cmd.getSimpleName() + " test run options=" + MAPPER.writeValueAsString(opts));
+    Admin admin = null;
+    Connection connection = null;
+    try {
+      connection = ConnectionFactory.createConnection(getConf());
+      admin = connection.getAdmin();
+      checkTable(admin, opts);
+    } finally {
+      if (admin != null) admin.close();
+      if (connection != null) connection.close();
+    }
+    if (opts.nomapred) {
+      doLocalClients(opts, getConf());
+    } else {
+      doMapReduce(opts, getConf());
+    }
+  }
+
+  protected void printUsage() {
+    printUsage(this.getClass().getName(), null);
+  }
+
+  protected static void printUsage(final String message) {
+    printUsage(PerformanceEvaluation.class.getName(), message);
+  }
+
+  protected static void printUsageAndExit(final String message, final int exitCode) {
+    printUsage(message);
+    System.exit(exitCode);
+  }
+
+  protected static void printUsage(final String className, final String message) {
+    if (message != null && message.length() > 0) {
+      System.err.println(message);
+    }
+    System.err.println("Usage: java " + className + " \\");
+    System.err.println("  <OPTIONS> [-D<property=value>]* <command> <nclients>");
+    System.err.println();
+    System.err.println("General Options:");
+    System.err.println(" nomapred        Run multiple clients using threads " +
+      "(rather than use mapreduce)");
+    System.err.println(" oneCon          all the threads share the same connection. Default: False");
+    System.err.println(" sampleRate      Execute test on a sample of total " +
+      "rows. Only supported by randomRead. Default: 1.0");
+    System.err.println(" period          Report every 'period' rows: " +
+      "Default: opts.perClientRunRows / 10 = " + DEFAULT_OPTS.getPerClientRunRows()/10);
+    System.err.println(" cycles          How many times to cycle the test. Defaults: 1.");
+    System.err.println(" traceRate       Enable HTrace spans. Initiate tracing every N rows. " +
+      "Default: 0");
+    System.err.println(" latency         Set to report operation latencies. Default: False");
+    System.err.println(" measureAfter    Start to measure the latency once 'measureAfter'" +
+        " rows have been treated. Default: 0");
+    System.err.println(" valueSize       Pass value size to use: Default: "
+        + DEFAULT_OPTS.getValueSize());
+    System.err.println(" valueRandom     Set if we should vary value size between 0 and " +
+        "'valueSize'; set on read for stats on size: Default: Not set.");
+    System.err.println(" blockEncoding   Block encoding to use. Value should be one of "
+        + Arrays.toString(DataBlockEncoding.values()) + ". Default: NONE");
+    System.err.println();
+    System.err.println("Table Creation / Write Tests:");
+    System.err.println(" table           Alternate table name. Default: 'TestTable'");
+    System.err.println(" rows            Rows each client runs. Default: "
+        + DEFAULT_OPTS.getPerClientRunRows()
+        + ".  In case of randomReads and randomSeekScans this could"
+        + " be specified along with --size to specify the number of rows to be scanned within"
+        + " the total range specified by the size.");
+    System.err.println(
+      " size            Total size in GiB. Mutually exclusive with --rows for writes and scans"
+          + ". But for randomReads and randomSeekScans when you use size with --rows you could"
+          + " use size to specify the end range and --rows"
+          + " specifies the number of rows within that range. " + "Default: 1.0.");
+    System.err.println(" compress        Compression type to use (GZ, LZO, ...). Default: 'NONE'");
+    System.err.println(" flushCommits    Used to determine if the test should flush the table. " +
+      "Default: false");
+    System.err.println(" valueZipf       Set if we should vary value size between 0 and " +
+        "'valueSize' in zipf form: Default: Not set.");
+    System.err.println(" writeToWAL      Set writeToWAL on puts. Default: True");
+    System.err.println(" autoFlush       Set autoFlush on htable. Default: False");
+    System.err.println(" presplit        Create presplit table. If a table with same name exists,"
+        + " it'll be deleted and recreated (instead of verifying count of its existing regions). "
+        + "Recommended for accurate perf analysis (see guide). Default: disabled");
+    System.err.println(" usetags         Writes tags along with KVs. Use with HFile V3. " +
+      "Default: false");
+    System.err.println(" numoftags       Specify the no of tags that would be needed. " +
+       "This works only if usetags is true. Default: " + DEFAULT_OPTS.noOfTags);
+    System.err.println(" splitPolicy     Specify a custom RegionSplitPolicy for the table.");
+    System.err.println(" columns         Columns to write per row. Default: 1");
+    System.err.println();
+    System.err.println("Read Tests:");
+    System.err.println(" filterAll       Helps to filter out all the rows on the server side"
+        + " there by not returning any thing back to the client.  Helps to check the server side"
+        + " performance.  Uses FilterAllFilter internally. ");
+    System.err.println(" multiGet        Batch gets together into groups of N. Only supported " +
+      "by randomRead. Default: disabled");
+    System.err.println(" inmemory        Tries to keep the HFiles of the CF " +
+      "inmemory as far as possible. Not guaranteed that reads are always served " +
+      "from memory.  Default: false");
+    System.err.println(" bloomFilter     Bloom filter type, one of "
+        + Arrays.toString(BloomType.values()));
+    System.err.println(" blockSize       Blocksize to use when writing out hfiles. ");
+    System.err.println(" inmemoryCompaction  Makes the column family to do inmemory flushes/compactions. "
+        + "Uses the CompactingMemstore");
+    System.err.println(" addColumns      Adds columns to scans/gets explicitly. Default: true");
+    System.err.println(" replicas        Enable region replica testing. Defaults: 1.");
+    System.err.println(" randomSleep     Do a random sleep before each get between 0 and entered value. Defaults: 0");
+    System.err.println(" caching         Scan caching to use. Default: 30");
+    System.err.println(" asyncPrefetch   Enable asyncPrefetch for scan");
+    System.err.println(" cacheBlocks     Set the cacheBlocks option for scan. Default: true");
+    System.err.println(" scanReadType    Set the readType option for scan, stream/pread/default. Default: default");
+    System.err.println();
+    System.err.println(" Note: -D properties will be applied to the conf used. ");
+    System.err.println("  For example: ");
+    System.err.println("   -Dmapreduce.output.fileoutputformat.compress=true");
+    System.err.println("   -Dmapreduce.task.timeout=60000");
+    System.err.println();
+    System.err.println("Command:");
+    for (CmdDescriptor command : COMMANDS.values()) {
+      System.err.println(String.format(" %-20s %s", command.getName(), command.getDescription()));
+    }
+    System.err.println();
+    System.err.println("Args:");
+    System.err.println(" nclients        Integer. Required. Total number of clients "
+        + "(and HRegionServers) running. 1 <= value <= 500");
+    System.err.println("Examples:");
+    System.err.println(" To run a single client doing the default 1M sequentialWrites:");
+    System.err.println(" $ hbase " + className + " sequentialWrite 1");
+    System.err.println(" To run 10 clients doing increments over ten rows:");
+    System.err.println(" $ hbase " + className + " --rows=10 --nomapred increment 10");
+  }
+
+  /**
+   * Parse options passed in via an arguments array. Assumes that array has been split
+   * on white-space and placed into a {@code Queue}. Any unknown arguments will remain
+   * in the queue at the conclusion of this method call. It's up to the caller to deal
+   * with these unrecognized arguments.
+   */
+  static TestOptions parseOpts(Queue<String> args) {
+    TestOptions opts = new TestOptions();
+
+    String cmd = null;
+    while ((cmd = args.poll()) != null) {
+      if (cmd.equals("-h") || cmd.startsWith("--h")) {
+        // place item back onto queue so that caller knows parsing was incomplete
+        args.add(cmd);
+        break;
+      }
+
+      final String nmr = "--nomapred";
+      if (cmd.startsWith(nmr)) {
+        opts.nomapred = true;
+        continue;
+      }
+
+      final String rows = "--rows=";
+      if (cmd.startsWith(rows)) {
+        opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
+        continue;
+      }
+
+      final String cycles = "--cycles=";
+      if (cmd.startsWith(cycles)) {
+        opts.cycles = Integer.parseInt(cmd.substring(cycles.length()));
+        continue;
+      }
+
+      final String sampleRate = "--sampleRate=";
+      if (cmd.startsWith(sampleRate)) {
+        opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
+        continue;
+      }
+
+      final String table = "--table=";
+      if (cmd.startsWith(table)) {
+        opts.tableName = cmd.substring(table.length());
+        continue;
+      }
+
+      final String startRow = "--startRow=";
+      if (cmd.startsWith(startRow)) {
+        opts.startRow = Integer.parseInt(cmd.substring(startRow.length()));
+        continue;
+      }
+
+      final String compress = "--compress=";
+      if (cmd.startsWith(compress)) {
+        opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
+        continue;
+      }
+
+      final String traceRate = "--traceRate=";
+      if (cmd.startsWith(traceRate)) {
+        opts.traceRate = Double.parseDouble(cmd.substring(traceRate.length()));
+        continue;
+      }
+
+      final String blockEncoding = "--blockEncoding=";
+      if (cmd.startsWith(blockEncoding)) {
+        opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
+        continue;
+      }
+
+      final String flushCommits = "--flushCommits=";
+      if (cmd.startsWith(flushCommits)) {
+        opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
+        continue;
+      }
+
+      final String writeToWAL = "--writeToWAL=";
+      if (cmd.startsWith(writeToWAL)) {
+        opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
+        continue;
+      }
+
+      final String presplit = "--presplit=";
+      if (cmd.startsWith(presplit)) {
+        opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
+        continue;
+      }
+
+      final String inMemory = "--inmemory=";
+      if (cmd.startsWith(inMemory)) {
+        opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
+        continue;
+      }
+
+      final String autoFlush = "--autoFlush=";
+      if (cmd.startsWith(autoFlush)) {
+        opts.autoFlush = Boolean.parseBoolean(cmd.substring(autoFlush.length()));
+        continue;
+      }
+
+      final String onceCon = "--oneCon=";
+      if (cmd.startsWith(onceCon)) {
+        opts.oneCon = Boolean.parseBoolean(cmd.substring(onceCon.length()));
+        continue;
+      }
+
+      final String latency = "--latency";
+      if (cmd.startsWith(latency)) {
+        opts.reportLatency = true;
+        continue;
+      }
+
+      final String multiGet = "--multiGet=";
+      if (cmd.startsWith(multiGet)) {
+        opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
+        continue;
+      }
+
+      final String useTags = "--usetags=";
+      if (cmd.startsWith(useTags)) {
+        opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
+        continue;
+      }
+
+      final String noOfTags = "--numoftags=";
+      if (cmd.startsWith(noOfTags)) {
+        opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
+        continue;
+      }
+
+      final String replicas = "--replicas=";
+      if (cmd.startsWith(replicas)) {
+        opts.replicas = Integer.parseInt(cmd.substring(replicas.length()));
+        continue;
+      }
+
+      final String filterOutAll = "--filterAll";
+      if (cmd.startsWith(filterOutAll)) {
+        opts.filterAll = true;
+        continue;
+      }
+
+      final String size = "--size=";
+      if (cmd.startsWith(size)) {
+        opts.size = Float.parseFloat(cmd.substring(size.length()));
+        if (opts.size <= 1.0f) throw new IllegalStateException("Size must be > 1; i.e. 1GB");
+        continue;
+      }
+
+      final String splitPolicy = "--splitPolicy=";
+      if (cmd.startsWith(splitPolicy)) {
+        opts.splitPolicy = cmd.substring(splitPolicy.length());
+        continue;
+      }
+
+      final String randomSleep = "--randomSleep=";
+      if (cmd.startsWith(randomSleep)) {
+        opts.randomSleep = Integer.parseInt(cmd.substring(randomSleep.length()));
+        continue;
+      }
+
+      final String measureAfter = "--measureAfter=";
+      if (cmd.startsWith(measureAfter)) {
+        opts.measureAfter = Integer.parseInt(cmd.substring(measureAfter.length()));
+        continue;
+      }
+
+      final String bloomFilter = "--bloomFilter=";
+      if (cmd.startsWith(bloomFilter)) {
+        opts.bloomType = BloomType.valueOf(cmd.substring(bloomFilter.length()));
+        continue;
+      }
+
+      final String blockSize = "--blockSize=";
+      if(cmd.startsWith(blockSize) ) {
+        opts.blockSize = Integer.parseInt(cmd.substring(blockSize.length()));
+      }
+
+      final String valueSize = "--valueSize=";
+      if (cmd.startsWith(valueSize)) {
+        opts.valueSize = Integer.parseInt(cmd.substring(valueSize.length()));
+        continue;
+      }
+
+      final String valueRandom = "--valueRandom";
+      if (cmd.startsWith(valueRandom)) {
+        opts.valueRandom = true;
+        if (opts.valueZipf) {
+          throw new IllegalStateException("Either valueZipf or valueRandom but not both");
+        }
+        continue;
+      }
+
+      final String valueZipf = "--valueZipf";
+      if (cmd.startsWith(valueZipf)) {
+        opts.valueZipf = true;
+        if (opts.valueRandom) {
+          throw new IllegalStateException("Either valueZipf or valueRandom but not both");
+        }
+        continue;
+      }
+
+      final String period = "--period=";
+      if (cmd.startsWith(period)) {
+        opts.period = Integer.parseInt(cmd.substring(period.length()));
+        continue;
+      }
+
+      final String addColumns = "--addColumns=";
+      if (cmd.startsWith(addColumns)) {
+  

<TRUNCATED>

[12/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
deleted file mode 100644
index 8bb266e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
+++ /dev/null
@@ -1,700 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce.replication;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Abortable;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.TableSnapshotScanner;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapper;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableSplit;
-import org.apache.hadoop.hbase.replication.ReplicationException;
-import org.apache.hadoop.hbase.replication.ReplicationFactory;
-import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
-import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
-import org.apache.hadoop.hbase.replication.ReplicationPeers;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.Threads;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * This map-only job compares the data from a local table with a remote one.
- * Every cell is compared and must have exactly the same keys (even timestamp)
- * as well as same value. It is possible to restrict the job by time range and
- * families. The peer id that's provided must match the one given when the
- * replication stream was setup.
- * <p>
- * Two counters are provided, Verifier.Counters.GOODROWS and BADROWS. The reason
- * for a why a row is different is shown in the map's log.
- */
-public class VerifyReplication extends Configured implements Tool {
-
-  private static final Log LOG =
-      LogFactory.getLog(VerifyReplication.class);
-
-  public final static String NAME = "verifyrep";
-  private final static String PEER_CONFIG_PREFIX = NAME + ".peer.";
-  long startTime = 0;
-  long endTime = Long.MAX_VALUE;
-  int batch = -1;
-  int versions = -1;
-  String tableName = null;
-  String families = null;
-  String delimiter = "";
-  String peerId = null;
-  String rowPrefixes = null;
-  int sleepMsBeforeReCompare = 0;
-  boolean verbose = false;
-  boolean includeDeletedCells = false;
-  //Source table snapshot name
-  String sourceSnapshotName = null;
-  //Temp location in source cluster to restore source snapshot
-  String sourceSnapshotTmpDir = null;
-  //Peer table snapshot name
-  String peerSnapshotName = null;
-  //Temp location in peer cluster to restore peer snapshot
-  String peerSnapshotTmpDir = null;
-  //Peer cluster Hadoop FS address
-  String peerFSAddress = null;
-  //Peer cluster HBase root dir location
-  String peerHBaseRootAddress = null;
-
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  /**
-   * Map-only comparator for 2 tables
-   */
-  public static class Verifier
-      extends TableMapper<ImmutableBytesWritable, Put> {
-
-
-
-    public static enum Counters {
-      GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
-
-    private Connection sourceConnection;
-    private Table sourceTable;
-    private Connection replicatedConnection;
-    private Table replicatedTable;
-    private ResultScanner replicatedScanner;
-    private Result currentCompareRowInPeerTable;
-    private int sleepMsBeforeReCompare;
-    private String delimiter = "";
-    private boolean verbose = false;
-    private int batch = -1;
-
-    /**
-     * Map method that compares every scanned row with the equivalent from
-     * a distant cluster.
-     * @param row  The current table row key.
-     * @param value  The columns.
-     * @param context  The current context.
-     * @throws IOException When something is broken with the data.
-     */
-    @Override
-    public void map(ImmutableBytesWritable row, final Result value,
-                    Context context)
-        throws IOException {
-      if (replicatedScanner == null) {
-        Configuration conf = context.getConfiguration();
-        sleepMsBeforeReCompare = conf.getInt(NAME +".sleepMsBeforeReCompare", 0);
-        delimiter = conf.get(NAME + ".delimiter", "");
-        verbose = conf.getBoolean(NAME +".verbose", false);
-        batch = conf.getInt(NAME + ".batch", -1);
-        final Scan scan = new Scan();
-        if (batch > 0) {
-          scan.setBatch(batch);
-        }
-        scan.setCacheBlocks(false);
-        scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
-        long startTime = conf.getLong(NAME + ".startTime", 0);
-        long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
-        String families = conf.get(NAME + ".families", null);
-        if(families != null) {
-          String[] fams = families.split(",");
-          for(String fam : fams) {
-            scan.addFamily(Bytes.toBytes(fam));
-          }
-        }
-        boolean includeDeletedCells = conf.getBoolean(NAME + ".includeDeletedCells", false);
-        scan.setRaw(includeDeletedCells);
-        String rowPrefixes = conf.get(NAME + ".rowPrefixes", null);
-        setRowPrefixFilter(scan, rowPrefixes);
-        scan.setTimeRange(startTime, endTime);
-        int versions = conf.getInt(NAME+".versions", -1);
-        LOG.info("Setting number of version inside map as: " + versions);
-        if (versions >= 0) {
-          scan.setMaxVersions(versions);
-        }
-        TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName"));
-        sourceConnection = ConnectionFactory.createConnection(conf);
-        sourceTable = sourceConnection.getTable(tableName);
-
-        final InputSplit tableSplit = context.getInputSplit();
-
-        String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
-        Configuration peerConf = HBaseConfiguration.createClusterConf(conf,
-            zkClusterKey, PEER_CONFIG_PREFIX);
-
-        replicatedConnection = ConnectionFactory.createConnection(peerConf);
-        replicatedTable = replicatedConnection.getTable(tableName);
-        scan.setStartRow(value.getRow());
-
-        byte[] endRow = null;
-        if (tableSplit instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit) {
-          endRow = ((TableSnapshotInputFormat.TableSnapshotRegionSplit) tableSplit).getRegionInfo()
-              .getEndKey();
-        } else {
-          endRow = ((TableSplit) tableSplit).getEndRow();
-        }
-
-        scan.setStopRow(endRow);
-
-        String peerSnapshotName = conf.get(NAME + ".peerSnapshotName", null);
-        if (peerSnapshotName != null) {
-          String peerSnapshotTmpDir = conf.get(NAME + ".peerSnapshotTmpDir", null);
-          String peerFSAddress = conf.get(NAME + ".peerFSAddress", null);
-          String peerHBaseRootAddress = conf.get(NAME + ".peerHBaseRootAddress", null);
-          FileSystem.setDefaultUri(peerConf, peerFSAddress);
-          FSUtils.setRootDir(peerConf, new Path(peerHBaseRootAddress));
-          LOG.info("Using peer snapshot:" + peerSnapshotName + " with temp dir:"
-              + peerSnapshotTmpDir + " peer root uri:" + FSUtils.getRootDir(peerConf)
-              + " peerFSAddress:" + peerFSAddress);
-
-          replicatedScanner = new TableSnapshotScanner(peerConf,
-              new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName, scan);
-        } else {
-          replicatedScanner = replicatedTable.getScanner(scan);
-        }
-        currentCompareRowInPeerTable = replicatedScanner.next();
-      }
-      while (true) {
-        if (currentCompareRowInPeerTable == null) {
-          // reach the region end of peer table, row only in source table
-          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
-          break;
-        }
-        int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
-        if (rowCmpRet == 0) {
-          // rowkey is same, need to compare the content of the row
-          try {
-            Result.compareResults(value, currentCompareRowInPeerTable);
-            context.getCounter(Counters.GOODROWS).increment(1);
-            if (verbose) {
-              LOG.info("Good row key: " + delimiter
-                  + Bytes.toStringBinary(value.getRow()) + delimiter);
-            }
-          } catch (Exception e) {
-            logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
-          }
-          currentCompareRowInPeerTable = replicatedScanner.next();
-          break;
-        } else if (rowCmpRet < 0) {
-          // row only exists in source table
-          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
-          break;
-        } else {
-          // row only exists in peer table
-          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
-            currentCompareRowInPeerTable);
-          currentCompareRowInPeerTable = replicatedScanner.next();
-        }
-      }
-    }
-
-    private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
-      if (sleepMsBeforeReCompare > 0) {
-        Threads.sleep(sleepMsBeforeReCompare);
-        try {
-          Result sourceResult = sourceTable.get(new Get(row.getRow()));
-          Result replicatedResult = replicatedTable.get(new Get(row.getRow()));
-          Result.compareResults(sourceResult, replicatedResult);
-          if (!sourceResult.isEmpty()) {
-            context.getCounter(Counters.GOODROWS).increment(1);
-            if (verbose) {
-              LOG.info("Good row key (with recompare): " + delimiter + Bytes.toStringBinary(row.getRow())
-              + delimiter);
-            }
-          }
-          return;
-        } catch (Exception e) {
-          LOG.error("recompare fail after sleep, rowkey=" + delimiter +
-              Bytes.toStringBinary(row.getRow()) + delimiter);
-        }
-      }
-      context.getCounter(counter).increment(1);
-      context.getCounter(Counters.BADROWS).increment(1);
-      LOG.error(counter.toString() + ", rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) +
-          delimiter);
-    }
-
-    @Override
-    protected void cleanup(Context context) {
-      if (replicatedScanner != null) {
-        try {
-          while (currentCompareRowInPeerTable != null) {
-            logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
-              currentCompareRowInPeerTable);
-            currentCompareRowInPeerTable = replicatedScanner.next();
-          }
-        } catch (Exception e) {
-          LOG.error("fail to scan peer table in cleanup", e);
-        } finally {
-          replicatedScanner.close();
-          replicatedScanner = null;
-        }
-      }
-
-      if (sourceTable != null) {
-        try {
-          sourceTable.close();
-        } catch (IOException e) {
-          LOG.error("fail to close source table in cleanup", e);
-        }
-      }
-      if(sourceConnection != null){
-        try {
-          sourceConnection.close();
-        } catch (Exception e) {
-          LOG.error("fail to close source connection in cleanup", e);
-        }
-      }
-
-      if(replicatedTable != null){
-        try{
-          replicatedTable.close();
-        } catch (Exception e) {
-          LOG.error("fail to close replicated table in cleanup", e);
-        }
-      }
-      if(replicatedConnection != null){
-        try {
-          replicatedConnection.close();
-        } catch (Exception e) {
-          LOG.error("fail to close replicated connection in cleanup", e);
-        }
-      }
-    }
-  }
-
-  private static Pair<ReplicationPeerConfig, Configuration> getPeerQuorumConfig(
-      final Configuration conf, String peerId) throws IOException {
-    ZooKeeperWatcher localZKW = null;
-    ReplicationPeerZKImpl peer = null;
-    try {
-      localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
-          new Abortable() {
-            @Override public void abort(String why, Throwable e) {}
-            @Override public boolean isAborted() {return false;}
-          });
-
-      ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
-      rp.init();
-
-      Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
-      if (pair == null) {
-        throw new IOException("Couldn't get peer conf!");
-      }
-
-      return pair;
-    } catch (ReplicationException e) {
-      throw new IOException(
-          "An error occurred while trying to connect to the remove peer cluster", e);
-    } finally {
-      if (peer != null) {
-        peer.close();
-      }
-      if (localZKW != null) {
-        localZKW.close();
-      }
-    }
-  }
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param conf  The current configuration.
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws java.io.IOException When setting up the job fails.
-   */
-  public Job createSubmittableJob(Configuration conf, String[] args)
-  throws IOException {
-    if (!doCommandLine(args)) {
-      return null;
-    }
-    conf.set(NAME+".peerId", peerId);
-    conf.set(NAME+".tableName", tableName);
-    conf.setLong(NAME+".startTime", startTime);
-    conf.setLong(NAME+".endTime", endTime);
-    conf.setInt(NAME +".sleepMsBeforeReCompare", sleepMsBeforeReCompare);
-    conf.set(NAME + ".delimiter", delimiter);
-    conf.setInt(NAME + ".batch", batch);
-    conf.setBoolean(NAME +".verbose", verbose);
-    conf.setBoolean(NAME +".includeDeletedCells", includeDeletedCells);
-    if (families != null) {
-      conf.set(NAME+".families", families);
-    }
-    if (rowPrefixes != null){
-      conf.set(NAME+".rowPrefixes", rowPrefixes);
-    }
-
-    Pair<ReplicationPeerConfig, Configuration> peerConfigPair = getPeerQuorumConfig(conf, peerId);
-    ReplicationPeerConfig peerConfig = peerConfigPair.getFirst();
-    String peerQuorumAddress = peerConfig.getClusterKey();
-    LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " +
-        peerConfig.getConfiguration());
-    conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
-    HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX,
-        peerConfig.getConfiguration().entrySet());
-
-    conf.setInt(NAME + ".versions", versions);
-    LOG.info("Number of version: " + versions);
-
-    //Set Snapshot specific parameters
-    if (peerSnapshotName != null) {
-      conf.set(NAME + ".peerSnapshotName", peerSnapshotName);
-      conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir);
-      conf.set(NAME + ".peerFSAddress", peerFSAddress);
-      conf.set(NAME + ".peerHBaseRootAddress", peerHBaseRootAddress);
-
-      // This is to create HDFS delegation token for peer cluster in case of secured
-      conf.setStrings(MRJobConfig.JOB_NAMENODES, peerFSAddress);
-    }
-
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
-    job.setJarByClass(VerifyReplication.class);
-
-    Scan scan = new Scan();
-    scan.setTimeRange(startTime, endTime);
-    scan.setRaw(includeDeletedCells);
-    scan.setCacheBlocks(false);
-    if (batch > 0) {
-      scan.setBatch(batch);
-    }
-    if (versions >= 0) {
-      scan.setMaxVersions(versions);
-      LOG.info("Number of versions set to " + versions);
-    }
-    if(families != null) {
-      String[] fams = families.split(",");
-      for(String fam : fams) {
-        scan.addFamily(Bytes.toBytes(fam));
-      }
-    }
-
-    setRowPrefixFilter(scan, rowPrefixes);
-
-    if (sourceSnapshotName != null) {
-      Path snapshotTempPath = new Path(sourceSnapshotTmpDir);
-      LOG.info(
-        "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir);
-      TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null,
-        null, job, true, snapshotTempPath);
-    } else {
-      TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job);
-    }
-    Configuration peerClusterConf = peerConfigPair.getSecond();
-    // Obtain the auth token from peer cluster
-    TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf);
-
-    job.setOutputFormatClass(NullOutputFormat.class);
-    job.setNumReduceTasks(0);
-    return job;
-  }
-
-  private static void setRowPrefixFilter(Scan scan, String rowPrefixes) {
-    if (rowPrefixes != null && !rowPrefixes.isEmpty()) {
-      String[] rowPrefixArray = rowPrefixes.split(",");
-      Arrays.sort(rowPrefixArray);
-      FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
-      for (String prefix : rowPrefixArray) {
-        Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
-        filterList.addFilter(filter);
-      }
-      scan.setFilter(filterList);
-      byte[] startPrefixRow = Bytes.toBytes(rowPrefixArray[0]);
-      byte[] lastPrefixRow = Bytes.toBytes(rowPrefixArray[rowPrefixArray.length -1]);
-      setStartAndStopRows(scan, startPrefixRow, lastPrefixRow);
-    }
-  }
-
-  private static void setStartAndStopRows(Scan scan, byte[] startPrefixRow, byte[] lastPrefixRow) {
-    scan.setStartRow(startPrefixRow);
-    byte[] stopRow = Bytes.add(Bytes.head(lastPrefixRow, lastPrefixRow.length - 1),
-        new byte[]{(byte) (lastPrefixRow[lastPrefixRow.length - 1] + 1)});
-    scan.setStopRow(stopRow);
-  }
-
-  @VisibleForTesting
-  public boolean doCommandLine(final String[] args) {
-    if (args.length < 2) {
-      printUsage(null);
-      return false;
-    }
-    try {
-      for (int i = 0; i < args.length; i++) {
-        String cmd = args[i];
-        if (cmd.equals("-h") || cmd.startsWith("--h")) {
-          printUsage(null);
-          return false;
-        }
-
-        final String startTimeArgKey = "--starttime=";
-        if (cmd.startsWith(startTimeArgKey)) {
-          startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
-          continue;
-        }
-
-        final String endTimeArgKey = "--endtime=";
-        if (cmd.startsWith(endTimeArgKey)) {
-          endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
-          continue;
-        }
-
-        final String includeDeletedCellsArgKey = "--raw";
-        if (cmd.equals(includeDeletedCellsArgKey)) {
-          includeDeletedCells = true;
-          continue;
-        }
-
-        final String versionsArgKey = "--versions=";
-        if (cmd.startsWith(versionsArgKey)) {
-          versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
-          continue;
-        }
-        
-        final String batchArgKey = "--batch=";
-        if (cmd.startsWith(batchArgKey)) {
-          batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
-          continue;
-        }
-
-        final String familiesArgKey = "--families=";
-        if (cmd.startsWith(familiesArgKey)) {
-          families = cmd.substring(familiesArgKey.length());
-          continue;
-        }
-
-        final String rowPrefixesKey = "--row-prefixes=";
-        if (cmd.startsWith(rowPrefixesKey)){
-          rowPrefixes = cmd.substring(rowPrefixesKey.length());
-          continue;
-        }
-
-        final String delimiterArgKey = "--delimiter=";
-        if (cmd.startsWith(delimiterArgKey)) {
-          delimiter = cmd.substring(delimiterArgKey.length());
-          continue;
-        }
-
-        final String sleepToReCompareKey = "--recomparesleep=";
-        if (cmd.startsWith(sleepToReCompareKey)) {
-          sleepMsBeforeReCompare = Integer.parseInt(cmd.substring(sleepToReCompareKey.length()));
-          continue;
-        }
-        final String verboseKey = "--verbose";
-        if (cmd.startsWith(verboseKey)) {
-          verbose = true;
-          continue;
-        }
-
-        final String sourceSnapshotNameArgKey = "--sourceSnapshotName=";
-        if (cmd.startsWith(sourceSnapshotNameArgKey)) {
-          sourceSnapshotName = cmd.substring(sourceSnapshotNameArgKey.length());
-          continue;
-        }
-
-        final String sourceSnapshotTmpDirArgKey = "--sourceSnapshotTmpDir=";
-        if (cmd.startsWith(sourceSnapshotTmpDirArgKey)) {
-          sourceSnapshotTmpDir = cmd.substring(sourceSnapshotTmpDirArgKey.length());
-          continue;
-        }
-
-        final String peerSnapshotNameArgKey = "--peerSnapshotName=";
-        if (cmd.startsWith(peerSnapshotNameArgKey)) {
-          peerSnapshotName = cmd.substring(peerSnapshotNameArgKey.length());
-          continue;
-        }
-
-        final String peerSnapshotTmpDirArgKey = "--peerSnapshotTmpDir=";
-        if (cmd.startsWith(peerSnapshotTmpDirArgKey)) {
-          peerSnapshotTmpDir = cmd.substring(peerSnapshotTmpDirArgKey.length());
-          continue;
-        }
-
-        final String peerFSAddressArgKey = "--peerFSAddress=";
-        if (cmd.startsWith(peerFSAddressArgKey)) {
-          peerFSAddress = cmd.substring(peerFSAddressArgKey.length());
-          continue;
-        }
-
-        final String peerHBaseRootAddressArgKey = "--peerHBaseRootAddress=";
-        if (cmd.startsWith(peerHBaseRootAddressArgKey)) {
-          peerHBaseRootAddress = cmd.substring(peerHBaseRootAddressArgKey.length());
-          continue;
-        }
-
-        if (cmd.startsWith("--")) {
-          printUsage("Invalid argument '" + cmd + "'");
-          return false;
-        }
-
-        if (i == args.length-2) {
-          peerId = cmd;
-        }
-
-        if (i == args.length-1) {
-          tableName = cmd;
-        }
-      }
-
-      if ((sourceSnapshotName != null && sourceSnapshotTmpDir == null)
-          || (sourceSnapshotName == null && sourceSnapshotTmpDir != null)) {
-        printUsage("Source snapshot name and snapshot temp location should be provided"
-            + " to use snapshots in source cluster");
-        return false;
-      }
-
-      if (peerSnapshotName != null || peerSnapshotTmpDir != null || peerFSAddress != null
-          || peerHBaseRootAddress != null) {
-        if (peerSnapshotName == null || peerSnapshotTmpDir == null || peerFSAddress == null
-            || peerHBaseRootAddress == null) {
-          printUsage(
-            "Peer snapshot name, peer snapshot temp location, Peer HBase root address and  "
-                + "peer FSAddress should be provided to use snapshots in peer cluster");
-          return false;
-        }
-      }
-
-      // This is to avoid making recompare calls to source/peer tables when snapshots are used
-      if ((sourceSnapshotName != null || peerSnapshotName != null) && sleepMsBeforeReCompare > 0) {
-        printUsage(
-          "Using sleepMsBeforeReCompare along with snapshots is not allowed as snapshots are immutable");
-        return false;
-      }
-
-    } catch (Exception e) {
-      e.printStackTrace();
-      printUsage("Can't start because " + e.getMessage());
-      return false;
-    }
-    return true;
-  }
-
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void printUsage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: verifyrep [--starttime=X]" +
-        " [--endtime=Y] [--families=A] [--row-prefixes=B] [--delimiter=] [--recomparesleep=] " +
-        "[--batch=] [--verbose] [--sourceSnapshotName=P] [--sourceSnapshotTmpDir=Q] [--peerSnapshotName=R] "
-            + "[--peerSnapshotTmpDir=S] [--peerFSAddress=T] [--peerHBaseRootAddress=U]  <peerid> <tablename>");
-    System.err.println();
-    System.err.println("Options:");
-    System.err.println(" starttime    beginning of the time range");
-    System.err.println("              without endtime means from starttime to forever");
-    System.err.println(" endtime      end of the time range");
-    System.err.println(" versions     number of cell versions to verify");
-    System.err.println(" batch        batch count for scan, " +
-        "note that result row counts will no longer be actual number of rows when you use this option");
-    System.err.println(" raw          includes raw scan if given in options");
-    System.err.println(" families     comma-separated list of families to copy");
-    System.err.println(" row-prefixes comma-separated list of row key prefixes to filter on ");
-    System.err.println(" delimiter    the delimiter used in display around rowkey");
-    System.err.println(" recomparesleep   milliseconds to sleep before recompare row, " +
-        "default value is 0 which disables the recompare.");
-    System.err.println(" verbose      logs row keys of good rows");
-    System.err.println(" sourceSnapshotName  Source Snapshot Name");
-    System.err.println(" sourceSnapshotTmpDir Tmp location to restore source table snapshot");
-    System.err.println(" peerSnapshotName  Peer Snapshot Name");
-    System.err.println(" peerSnapshotTmpDir Tmp location to restore peer table snapshot");
-    System.err.println(" peerFSAddress      Peer cluster Hadoop FS address");
-    System.err.println(" peerHBaseRootAddress  Peer cluster HBase root location");
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" peerid       Id of the peer used for verification, must match the one given for replication");
-    System.err.println(" tablename    Name of the table to verify");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
-    System.err.println(" $ hbase " +
-        "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
-        " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    Configuration conf = this.getConf();
-    Job job = createSubmittableJob(conf, args);
-    if (job != null) {
-      return job.waitForCompletion(true) ? 0 : 1;
-    } 
-    return 1;
-  }
-
-  /**
-   * Main entry point.
-   *
-   * @param args  The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
-    System.exit(res);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
deleted file mode 100644
index eb9a5f7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
+++ /dev/null
@@ -1,470 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.regionserver;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.mapreduce.JobUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
-import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.LineReader;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/*
- * The CompactionTool allows to execute a compaction specifying a:
- * <ul>
- *  <li>table folder (all regions and families will be compacted)
- *  <li>region folder (all families in the region will be compacted)
- *  <li>family folder (the store files will be compacted)
- * </ul>
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class CompactionTool extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(CompactionTool.class);
-
-  private final static String CONF_TMP_DIR = "hbase.tmp.dir";
-  private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
-  private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
-  private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
-  private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
-
-  /**
-   * Class responsible to execute the Compaction on the specified path.
-   * The path can be a table, region or family directory.
-   */
-  private static class CompactionWorker {
-    private final boolean keepCompactedFiles;
-    private final boolean deleteCompacted;
-    private final Configuration conf;
-    private final FileSystem fs;
-    private final Path tmpDir;
-
-    public CompactionWorker(final FileSystem fs, final Configuration conf) {
-      this.conf = conf;
-      this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
-      this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
-      this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
-      this.fs = fs;
-    }
-
-    /**
-     * Execute the compaction on the specified path.
-     *
-     * @param path Directory path on which to run compaction.
-     * @param compactOnce Execute just a single step of compaction.
-     * @param major Request major compaction.
-     */
-    public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
-      if (isFamilyDir(fs, path)) {
-        Path regionDir = path.getParent();
-        Path tableDir = regionDir.getParent();
-        TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
-        HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
-        compactStoreFiles(tableDir, htd, hri,
-            path.getName(), compactOnce, major);
-      } else if (isRegionDir(fs, path)) {
-        Path tableDir = path.getParent();
-        TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
-        compactRegion(tableDir, htd, path, compactOnce, major);
-      } else if (isTableDir(fs, path)) {
-        compactTable(path, compactOnce, major);
-      } else {
-        throw new IOException(
-          "Specified path is not a table, region or family directory. path=" + path);
-      }
-    }
-
-    private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
-        throws IOException {
-      TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
-      for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
-        compactRegion(tableDir, htd, regionDir, compactOnce, major);
-      }
-    }
-
-    private void compactRegion(final Path tableDir, final TableDescriptor htd,
-        final Path regionDir, final boolean compactOnce, final boolean major)
-        throws IOException {
-      HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
-      for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
-        compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
-      }
-    }
-
-    /**
-     * Execute the actual compaction job.
-     * If the compact once flag is not specified, execute the compaction until
-     * no more compactions are needed. Uses the Configuration settings provided.
-     */
-    private void compactStoreFiles(final Path tableDir, final TableDescriptor htd,
-        final HRegionInfo hri, final String familyName, final boolean compactOnce,
-        final boolean major) throws IOException {
-      HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
-      LOG.info("Compact table=" + htd.getTableName() +
-        " region=" + hri.getRegionNameAsString() +
-        " family=" + familyName);
-      if (major) {
-        store.triggerMajorCompaction();
-      }
-      do {
-        CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
-        if (compaction == null) break;
-        List<StoreFile> storeFiles =
-            store.compact(compaction, NoLimitThroughputController.INSTANCE);
-        if (storeFiles != null && !storeFiles.isEmpty()) {
-          if (keepCompactedFiles && deleteCompacted) {
-            for (StoreFile storeFile: storeFiles) {
-              fs.delete(storeFile.getPath(), false);
-            }
-          }
-        }
-      } while (store.needsCompaction() && !compactOnce);
-    }
-
-    /**
-     * Create a "mock" HStore that uses the tmpDir specified by the user and
-     * the store dir to compact as source.
-     */
-    private static HStore getStore(final Configuration conf, final FileSystem fs,
-        final Path tableDir, final TableDescriptor htd, final HRegionInfo hri,
-        final String familyName, final Path tempDir) throws IOException {
-      HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
-        @Override
-        public Path getTempDir() {
-          return tempDir;
-        }
-      };
-      HRegion region = new HRegion(regionFs, null, conf, htd, null);
-      return new HStore(region, htd.getColumnFamily(Bytes.toBytes(familyName)), conf);
-    }
-  }
-
-  private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
-    Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
-    return fs.exists(regionInfo);
-  }
-
-  private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
-    return FSTableDescriptors.getTableInfoPath(fs, path) != null;
-  }
-
-  private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
-    return isRegionDir(fs, path.getParent());
-  }
-
-  private static class CompactionMapper
-      extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
-    private CompactionWorker compactor = null;
-    private boolean compactOnce = false;
-    private boolean major = false;
-
-    @Override
-    public void setup(Context context) {
-      Configuration conf = context.getConfiguration();
-      compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
-      major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
-
-      try {
-        FileSystem fs = FileSystem.get(conf);
-        this.compactor = new CompactionWorker(fs, conf);
-      } catch (IOException e) {
-        throw new RuntimeException("Could not get the input FileSystem", e);
-      }
-    }
-
-    @Override
-    public void map(LongWritable key, Text value, Context context)
-        throws InterruptedException, IOException {
-      Path path = new Path(value.toString());
-      this.compactor.compact(path, compactOnce, major);
-    }
-  }
-
-  /**
-   * Input format that uses store files block location as input split locality.
-   */
-  private static class CompactionInputFormat extends TextInputFormat {
-    @Override
-    protected boolean isSplitable(JobContext context, Path file) {
-      return true;
-    }
-
-    /**
-     * Returns a split for each store files directory using the block location
-     * of each file as locality reference.
-     */
-    @Override
-    public List<InputSplit> getSplits(JobContext job) throws IOException {
-      List<InputSplit> splits = new ArrayList<>();
-      List<FileStatus> files = listStatus(job);
-
-      Text key = new Text();
-      for (FileStatus file: files) {
-        Path path = file.getPath();
-        FileSystem fs = path.getFileSystem(job.getConfiguration());
-        LineReader reader = new LineReader(fs.open(path));
-        long pos = 0;
-        int n;
-        try {
-          while ((n = reader.readLine(key)) > 0) {
-            String[] hosts = getStoreDirHosts(fs, path);
-            splits.add(new FileSplit(path, pos, n, hosts));
-            pos += n;
-          }
-        } finally {
-          reader.close();
-        }
-      }
-
-      return splits;
-    }
-
-    /**
-     * return the top hosts of the store files, used by the Split
-     */
-    private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
-        throws IOException {
-      FileStatus[] files = FSUtils.listStatus(fs, path);
-      if (files == null) {
-        return new String[] {};
-      }
-
-      HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
-      for (FileStatus hfileStatus: files) {
-        HDFSBlocksDistribution storeFileBlocksDistribution =
-          FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
-        hdfsBlocksDistribution.add(storeFileBlocksDistribution);
-      }
-
-      List<String> hosts = hdfsBlocksDistribution.getTopHosts();
-      return hosts.toArray(new String[hosts.size()]);
-    }
-
-    /**
-     * Create the input file for the given directories to compact.
-     * The file is a TextFile with each line corrisponding to a
-     * store files directory to compact.
-     */
-    public static void createInputFile(final FileSystem fs, final Path path,
-        final Set<Path> toCompactDirs) throws IOException {
-      // Extract the list of store dirs
-      List<Path> storeDirs = new LinkedList<>();
-      for (Path compactDir: toCompactDirs) {
-        if (isFamilyDir(fs, compactDir)) {
-          storeDirs.add(compactDir);
-        } else if (isRegionDir(fs, compactDir)) {
-          for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
-            storeDirs.add(familyDir);
-          }
-        } else if (isTableDir(fs, compactDir)) {
-          // Lookup regions
-          for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
-            for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
-              storeDirs.add(familyDir);
-            }
-          }
-        } else {
-          throw new IOException(
-            "Specified path is not a table, region or family directory. path=" + compactDir);
-        }
-      }
-
-      // Write Input File
-      FSDataOutputStream stream = fs.create(path);
-      LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
-      try {
-        final byte[] newLine = Bytes.toBytes("\n");
-        for (Path storeDir: storeDirs) {
-          stream.write(Bytes.toBytes(storeDir.toString()));
-          stream.write(newLine);
-        }
-      } finally {
-        stream.close();
-      }
-    }
-  }
-
-  /**
-   * Execute compaction, using a Map-Reduce job.
-   */
-  private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
-      final boolean compactOnce, final boolean major) throws Exception {
-    Configuration conf = getConf();
-    conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
-    conf.setBoolean(CONF_COMPACT_MAJOR, major);
-
-    Job job = new Job(conf);
-    job.setJobName("CompactionTool");
-    job.setJarByClass(CompactionTool.class);
-    job.setMapperClass(CompactionMapper.class);
-    job.setInputFormatClass(CompactionInputFormat.class);
-    job.setOutputFormatClass(NullOutputFormat.class);
-    job.setMapSpeculativeExecution(false);
-    job.setNumReduceTasks(0);
-
-    // add dependencies (including HBase ones)
-    TableMapReduceUtil.addDependencyJars(job);
-
-    Path stagingDir = JobUtil.getStagingDir(conf);
-    try {
-      // Create input file with the store dirs
-      Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
-      CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
-      CompactionInputFormat.addInputPath(job, inputPath);
-
-      // Initialize credential for secure cluster
-      TableMapReduceUtil.initCredentials(job);
-
-      // Start the MR Job and wait
-      return job.waitForCompletion(true) ? 0 : 1;
-    } finally {
-      fs.delete(stagingDir, true);
-    }
-  }
-
-  /**
-   * Execute compaction, from this client, one path at the time.
-   */
-  private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
-      final boolean compactOnce, final boolean major) throws IOException {
-    CompactionWorker worker = new CompactionWorker(fs, getConf());
-    for (Path path: toCompactDirs) {
-      worker.compact(path, compactOnce, major);
-    }
-    return 0;
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    Set<Path> toCompactDirs = new HashSet<>();
-    boolean compactOnce = false;
-    boolean major = false;
-    boolean mapred = false;
-
-    Configuration conf = getConf();
-    FileSystem fs = FileSystem.get(conf);
-
-    try {
-      for (int i = 0; i < args.length; ++i) {
-        String opt = args[i];
-        if (opt.equals("-compactOnce")) {
-          compactOnce = true;
-        } else if (opt.equals("-major")) {
-          major = true;
-        } else if (opt.equals("-mapred")) {
-          mapred = true;
-        } else if (!opt.startsWith("-")) {
-          Path path = new Path(opt);
-          FileStatus status = fs.getFileStatus(path);
-          if (!status.isDirectory()) {
-            printUsage("Specified path is not a directory. path=" + path);
-            return 1;
-          }
-          toCompactDirs.add(path);
-        } else {
-          printUsage();
-        }
-      }
-    } catch (Exception e) {
-      printUsage(e.getMessage());
-      return 1;
-    }
-
-    if (toCompactDirs.isEmpty()) {
-      printUsage("No directories to compact specified.");
-      return 1;
-    }
-
-    // Execute compaction!
-    if (mapred) {
-      return doMapReduce(fs, toCompactDirs, compactOnce, major);
-    } else {
-      return doClient(fs, toCompactDirs, compactOnce, major);
-    }
-  }
-
-  private void printUsage() {
-    printUsage(null);
-  }
-
-  private void printUsage(final String message) {
-    if (message != null && message.length() > 0) {
-      System.err.println(message);
-    }
-    System.err.println("Usage: java " + this.getClass().getName() + " \\");
-    System.err.println("  [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
-    System.err.println();
-    System.err.println("Options:");
-    System.err.println(" mapred         Use MapReduce to run compaction.");
-    System.err.println(" compactOnce    Execute just one compaction step. (default: while needed)");
-    System.err.println(" major          Trigger major compaction.");
-    System.err.println();
-    System.err.println("Note: -D properties will be applied to the conf used. ");
-    System.err.println("For example: ");
-    System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
-    System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
-    System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" To compact the full 'TestTable' using MapReduce:");
-    System.err.println(" $ hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
-    System.err.println();
-    System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
-    System.err.println(" $ hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
-  }
-
-  public static void main(String[] args) throws Exception {
-    System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
-  }
-}

[26/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
new file mode 100644
index 0000000..91d2696
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
@@ -0,0 +1,726 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeepDeletedCells;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.Import.KeyValueImporter;
+import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Tests the table import and table export MR job functionality
+ */
+@Category({VerySlowMapReduceTests.class, MediumTests.class})
+public class TestImportExport {
+  private static final Log LOG = LogFactory.getLog(TestImportExport.class);
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1");
+  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2");
+  private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3");
+  private static final String FAMILYA_STRING = "a";
+  private static final String FAMILYB_STRING = "b";
+  private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
+  private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
+  private static final byte[] QUAL = Bytes.toBytes("q");
+  private static final String OUTPUT_DIR = "outputdir";
+  private static String FQ_OUTPUT_DIR;
+  private static final String EXPORT_BATCH_SIZE = "100";
+
+  private static long now = System.currentTimeMillis();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    // Up the handlers; this test needs more than usual.
+    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
+    UTIL.startMiniCluster();
+    FQ_OUTPUT_DIR =
+      new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Rule
+  public final TestName name = new TestName();
+
+  @Before
+  public void announce() {
+    LOG.info("Running " + name.getMethodName());
+  }
+
+  @Before
+  @After
+  public void cleanup() throws Exception {
+    FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+    fs.delete(new Path(OUTPUT_DIR), true);
+  }
+
+  /**
+   * Runs an export job with the specified command line args
+   * @param args
+   * @return true if job completed successfully
+   * @throws IOException
+   * @throws InterruptedException
+   * @throws ClassNotFoundException
+   */
+  boolean runExport(String[] args) throws Exception {
+    // need to make a copy of the configuration because to make sure different temp dirs are used.
+    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Export(), args);
+    return status == 0;
+  }
+
+  /**
+   * Runs an import job with the specified command line args
+   * @param args
+   * @return true if job completed successfully
+   * @throws IOException
+   * @throws InterruptedException
+   * @throws ClassNotFoundException
+   */
+  boolean runImport(String[] args) throws Exception {
+    // need to make a copy of the configuration because to make sure different temp dirs are used.
+    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Import(), args);
+    return status == 0;
+  }
+
+  /**
+   * Test simple replication case with column mapping
+   * @throws Exception
+   */
+  @Test
+  public void testSimpleCase() throws Exception {
+    try (Table t = UTIL.createTable(TableName.valueOf(name.getMethodName()), FAMILYA, 3);) {
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      t.put(p);
+      p = new Put(ROW3);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      t.put(p);
+    }
+
+      String[] args = new String[] {
+          // Only export row1 & row2.
+          "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1",
+          "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3",
+          name.getMethodName(),
+          FQ_OUTPUT_DIR,
+          "1000", // max number of key versions per key to export
+      };
+      assertTrue(runExport(args));
+
+      final String IMPORT_TABLE = name.getMethodName() + "import";
+      try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), FAMILYB, 3);) {
+        args = new String[] {
+            "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
+            IMPORT_TABLE,
+            FQ_OUTPUT_DIR
+        };
+        assertTrue(runImport(args));
+
+        Get g = new Get(ROW1);
+        g.setMaxVersions();
+        Result r = t.get(g);
+        assertEquals(3, r.size());
+        g = new Get(ROW2);
+        g.setMaxVersions();
+        r = t.get(g);
+        assertEquals(3, r.size());
+        g = new Get(ROW3);
+        r = t.get(g);
+        assertEquals(0, r.size());
+      }
+  }
+
+  /**
+   * Test export hbase:meta table
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testMetaExport() throws Exception {
+    String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
+    String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
+    assertTrue(runExport(args));
+  }
+
+  /**
+   * Test import data from 0.94 exported file
+   * @throws Exception
+   */
+  @Test
+  public void testImport94Table() throws Exception {
+    final String name = "exportedTableIn94Format";
+    URL url = TestImportExport.class.getResource(name);
+    File f = new File(url.toURI());
+    if (!f.exists()) {
+      LOG.warn("FAILED TO FIND " + f + "; skipping out on test");
+      return;
+    }
+    assertTrue(f.exists());
+    LOG.info("FILE=" + f);
+    Path importPath = new Path(f.toURI());
+    FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+    fs.copyFromLocalFile(importPath, new Path(FQ_OUTPUT_DIR + Path.SEPARATOR + name));
+    String IMPORT_TABLE = name;
+    try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), Bytes.toBytes("f1"), 3);) {
+      String[] args = new String[] {
+              "-Dhbase.import.version=0.94" ,
+              IMPORT_TABLE, FQ_OUTPUT_DIR
+      };
+      assertTrue(runImport(args));
+      /* exportedTableIn94Format contains 5 rows
+      ROW         COLUMN+CELL
+      r1          column=f1:c1, timestamp=1383766761171, value=val1
+      r2          column=f1:c1, timestamp=1383766771642, value=val2
+      r3          column=f1:c1, timestamp=1383766777615, value=val3
+      r4          column=f1:c1, timestamp=1383766785146, value=val4
+      r5          column=f1:c1, timestamp=1383766791506, value=val5
+      */
+     assertEquals(5, UTIL.countRows(t));
+    }
+  }
+
+  /**
+   * Test export scanner batching
+   */
+   @Test
+   public void testExportScannerBatching() throws Exception {
+    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+    desc.addFamily(new HColumnDescriptor(FAMILYA)
+        .setMaxVersions(1)
+    );
+    UTIL.getAdmin().createTable(desc);
+    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+      t.put(p);
+
+      String[] args = new String[] {
+          "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE,  // added scanner batching arg.
+          name.getMethodName(),
+          FQ_OUTPUT_DIR
+      };
+      assertTrue(runExport(args));
+
+      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+      fs.delete(new Path(FQ_OUTPUT_DIR), true);
+    }
+  }
+
+  @Test
+  public void testWithDeletes() throws Exception {
+    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+    desc.addFamily(new HColumnDescriptor(FAMILYA)
+        .setMaxVersions(5)
+        .setKeepDeletedCells(KeepDeletedCells.TRUE)
+    );
+    UTIL.getAdmin().createTable(desc);
+    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+      t.put(p);
+
+      Delete d = new Delete(ROW1, now+3);
+      t.delete(d);
+      d = new Delete(ROW1);
+      d.addColumns(FAMILYA, QUAL, now+2);
+      t.delete(d);
+    }
+
+    String[] args = new String[] {
+        "-D" + Export.RAW_SCAN + "=true",
+        name.getMethodName(),
+        FQ_OUTPUT_DIR,
+        "1000", // max number of key versions per key to export
+    };
+    assertTrue(runExport(args));
+
+    final String IMPORT_TABLE = name.getMethodName() + "import";
+    desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
+    desc.addFamily(new HColumnDescriptor(FAMILYA)
+        .setMaxVersions(5)
+        .setKeepDeletedCells(KeepDeletedCells.TRUE)
+    );
+    UTIL.getAdmin().createTable(desc);
+    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+      args = new String[] {
+          IMPORT_TABLE,
+          FQ_OUTPUT_DIR
+      };
+      assertTrue(runImport(args));
+
+      Scan s = new Scan();
+      s.setMaxVersions();
+      s.setRaw(true);
+      ResultScanner scanner = t.getScanner(s);
+      Result r = scanner.next();
+      Cell[] res = r.rawCells();
+      assertTrue(CellUtil.isDeleteFamily(res[0]));
+      assertEquals(now+4, res[1].getTimestamp());
+      assertEquals(now+3, res[2].getTimestamp());
+      assertTrue(CellUtil.isDelete(res[3]));
+      assertEquals(now+2, res[4].getTimestamp());
+      assertEquals(now+1, res[5].getTimestamp());
+      assertEquals(now, res[6].getTimestamp());
+    }
+  }
+
+
+  @Test
+  public void testWithMultipleDeleteFamilyMarkersOfSameRowSameFamily() throws Exception {
+    final TableName exportTable = TableName.valueOf(name.getMethodName());
+    HTableDescriptor desc = new HTableDescriptor(exportTable);
+    desc.addFamily(new HColumnDescriptor(FAMILYA)
+        .setMaxVersions(5)
+        .setKeepDeletedCells(KeepDeletedCells.TRUE)
+    );
+    UTIL.getAdmin().createTable(desc);
+
+    Table exportT = UTIL.getConnection().getTable(exportTable);
+
+    //Add first version of QUAL
+    Put p = new Put(ROW1);
+    p.addColumn(FAMILYA, QUAL, now, QUAL);
+    exportT.put(p);
+
+    //Add Delete family marker
+    Delete d = new Delete(ROW1, now+3);
+    exportT.delete(d);
+
+    //Add second version of QUAL
+    p = new Put(ROW1);
+    p.addColumn(FAMILYA, QUAL, now + 5, "s".getBytes());
+    exportT.put(p);
+
+    //Add second Delete family marker
+    d = new Delete(ROW1, now+7);
+    exportT.delete(d);
+
+
+    String[] args = new String[] {
+        "-D" + Export.RAW_SCAN + "=true", exportTable.getNameAsString(),
+        FQ_OUTPUT_DIR,
+        "1000", // max number of key versions per key to export
+    };
+    assertTrue(runExport(args));
+
+    final String importTable = name.getMethodName() + "import";
+    desc = new HTableDescriptor(TableName.valueOf(importTable));
+    desc.addFamily(new HColumnDescriptor(FAMILYA)
+        .setMaxVersions(5)
+        .setKeepDeletedCells(KeepDeletedCells.TRUE)
+    );
+    UTIL.getAdmin().createTable(desc);
+
+    Table importT = UTIL.getConnection().getTable(TableName.valueOf(importTable));
+    args = new String[] {
+        importTable,
+        FQ_OUTPUT_DIR
+    };
+    assertTrue(runImport(args));
+
+    Scan s = new Scan();
+    s.setMaxVersions();
+    s.setRaw(true);
+
+    ResultScanner importedTScanner = importT.getScanner(s);
+    Result importedTResult = importedTScanner.next();
+
+    ResultScanner exportedTScanner = exportT.getScanner(s);
+    Result  exportedTResult =  exportedTScanner.next();
+    try {
+      Result.compareResults(exportedTResult, importedTResult);
+    } catch (Exception e) {
+      fail("Original and imported tables data comparision failed with error:"+e.getMessage());
+    } finally {
+      exportT.close();
+      importT.close();
+    }
+  }
+
+  /**
+   * Create a simple table, run an Export Job on it, Import with filtering on,  verify counts,
+   * attempt with invalid values.
+   */
+  @Test
+  public void testWithFilter() throws Exception {
+    // Create simple table to export
+    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+    desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
+    UTIL.getAdmin().createTable(desc);
+    Table exportTable = UTIL.getConnection().getTable(desc.getTableName());
+
+    Put p1 = new Put(ROW1);
+    p1.addColumn(FAMILYA, QUAL, now, QUAL);
+    p1.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+    p1.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+    p1.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+    p1.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+
+    // Having another row would actually test the filter.
+    Put p2 = new Put(ROW2);
+    p2.addColumn(FAMILYA, QUAL, now, QUAL);
+
+    exportTable.put(Arrays.asList(p1, p2));
+
+    // Export the simple table
+    String[] args = new String[] { name.getMethodName(), FQ_OUTPUT_DIR, "1000" };
+    assertTrue(runExport(args));
+
+    // Import to a new table
+    final String IMPORT_TABLE = name.getMethodName() + "import";
+    desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
+    desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
+    UTIL.getAdmin().createTable(desc);
+
+    Table importTable = UTIL.getConnection().getTable(desc.getTableName());
+    args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
+        "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE,
+        FQ_OUTPUT_DIR,
+        "1000" };
+    assertTrue(runImport(args));
+
+    // get the count of the source table for that time range
+    PrefixFilter filter = new PrefixFilter(ROW1);
+    int count = getCount(exportTable, filter);
+
+    Assert.assertEquals("Unexpected row count between export and import tables", count,
+      getCount(importTable, null));
+
+    // and then test that a broken command doesn't bork everything - easier here because we don't
+    // need to re-run the export job
+
+    args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
+        "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", name.getMethodName(),
+        FQ_OUTPUT_DIR, "1000" };
+    assertFalse(runImport(args));
+
+    // cleanup
+    exportTable.close();
+    importTable.close();
+  }
+
+  /**
+   * Count the number of keyvalues in the specified table for the given timerange
+   * @param start
+   * @param end
+   * @param table
+   * @return
+   * @throws IOException
+   */
+  private int getCount(Table table, Filter filter) throws IOException {
+    Scan scan = new Scan();
+    scan.setFilter(filter);
+    ResultScanner results = table.getScanner(scan);
+    int count = 0;
+    for (Result res : results) {
+      count += res.size();
+    }
+    results.close();
+    return count;
+  }
+
+  /**
+   * test main method. Import should print help and call System.exit
+   */
+  @Test
+  public void testImportMain() throws Exception {
+    PrintStream oldPrintStream = System.err;
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    String[] args = {};
+    System.setErr(new PrintStream(data));
+    try {
+      System.setErr(new PrintStream(data));
+      Import.main(args);
+      fail("should be SecurityException");
+    } catch (SecurityException e) {
+      assertEquals(-1, newSecurityManager.getExitCode());
+      assertTrue(data.toString().contains("Wrong number of arguments:"));
+      assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
+      assertTrue(data.toString().contains("-Dimport.filter.class=<name of filter class>"));
+      assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
+      assertTrue(data.toString().contains("-Dmapreduce.reduce.speculative=false"));
+    } finally {
+      System.setErr(oldPrintStream);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+  }
+
+  /**
+   * test main method. Export should print help and call System.exit
+   */
+  @Test
+  public void testExportMain() throws Exception {
+    PrintStream oldPrintStream = System.err;
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    String[] args = {};
+    System.setErr(new PrintStream(data));
+    try {
+      System.setErr(new PrintStream(data));
+      Export.main(args);
+      fail("should be SecurityException");
+    } catch (SecurityException e) {
+      assertEquals(-1, newSecurityManager.getExitCode());
+      String errMsg = data.toString();
+      assertTrue(errMsg.contains("Wrong number of arguments:"));
+      assertTrue(errMsg.contains(
+              "Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+              "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]"));
+      assertTrue(
+        errMsg.contains("-D hbase.mapreduce.scan.column.family=<family1>,<family2>, ..."));
+      assertTrue(errMsg.contains("-D hbase.mapreduce.include.deleted.rows=true"));
+      assertTrue(errMsg.contains("-Dhbase.client.scanner.caching=100"));
+      assertTrue(errMsg.contains("-Dmapreduce.map.speculative=false"));
+      assertTrue(errMsg.contains("-Dmapreduce.reduce.speculative=false"));
+      assertTrue(errMsg.contains("-Dhbase.export.scanner.batch=10"));
+    } finally {
+      System.setErr(oldPrintStream);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+  }
+
+  /**
+   * Test map method of Importer
+   */
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  @Test
+  public void testKeyValueImporter() throws Exception {
+    KeyValueImporter importer = new KeyValueImporter();
+    Configuration configuration = new Configuration();
+    Context ctx = mock(Context.class);
+    when(ctx.getConfiguration()).thenReturn(configuration);
+
+    doAnswer(new Answer<Void>() {
+
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Throwable {
+        ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
+        KeyValue key = (KeyValue) invocation.getArguments()[1];
+        assertEquals("Key", Bytes.toString(writer.get()));
+        assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
+        return null;
+      }
+    }).when(ctx).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
+
+    importer.setup(ctx);
+    Result value = mock(Result.class);
+    KeyValue[] keys = {
+        new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
+            Bytes.toBytes("value")),
+        new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
+            Bytes.toBytes("value1")) };
+    when(value.rawCells()).thenReturn(keys);
+    importer.map(new ImmutableBytesWritable(Bytes.toBytes("Key")), value, ctx);
+
+  }
+
+  /**
+   * Test addFilterAndArguments method of Import This method set couple
+   * parameters into Configuration
+   */
+  @Test
+  public void testAddFilterAndArguments() throws IOException {
+    Configuration configuration = new Configuration();
+
+    List<String> args = new ArrayList<>();
+    args.add("param1");
+    args.add("param2");
+
+    Import.addFilterAndArguments(configuration, FilterBase.class, args);
+    assertEquals("org.apache.hadoop.hbase.filter.FilterBase",
+        configuration.get(Import.FILTER_CLASS_CONF_KEY));
+    assertEquals("param1,param2", configuration.get(Import.FILTER_ARGS_CONF_KEY));
+  }
+
+  @Test
+  public void testDurability() throws Exception {
+    // Create an export table.
+    String exportTableName = name.getMethodName() + "export";
+    try (Table exportTable = UTIL.createTable(TableName.valueOf(exportTableName), FAMILYA, 3);) {
+
+      // Insert some data
+      Put put = new Put(ROW1);
+      put.addColumn(FAMILYA, QUAL, now, QUAL);
+      put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      exportTable.put(put);
+
+      put = new Put(ROW2);
+      put.addColumn(FAMILYA, QUAL, now, QUAL);
+      put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      exportTable.put(put);
+
+      // Run the export
+      String[] args = new String[] { exportTableName, FQ_OUTPUT_DIR, "1000"};
+      assertTrue(runExport(args));
+
+      // Create the table for import
+      String importTableName = name.getMethodName() + "import1";
+      Table importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
+
+      // Register the wal listener for the import table
+      HRegionInfo region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
+          .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
+      TableWALActionListener walListener = new TableWALActionListener(region);
+      WAL wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
+      wal.registerWALActionsListener(walListener);
+
+      // Run the import with SKIP_WAL
+      args =
+          new String[] { "-D" + Import.WAL_DURABILITY + "=" + Durability.SKIP_WAL.name(),
+              importTableName, FQ_OUTPUT_DIR };
+      assertTrue(runImport(args));
+      //Assert that the wal is not visisted
+      assertTrue(!walListener.isWALVisited());
+      //Ensure that the count is 2 (only one version of key value is obtained)
+      assertTrue(getCount(importTable, null) == 2);
+
+      // Run the import with the default durability option
+      importTableName = name.getMethodName() + "import2";
+      importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
+      region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
+          .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
+      wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
+      walListener = new TableWALActionListener(region);
+      wal.registerWALActionsListener(walListener);
+      args = new String[] { importTableName, FQ_OUTPUT_DIR };
+      assertTrue(runImport(args));
+      //Assert that the wal is visisted
+      assertTrue(walListener.isWALVisited());
+      //Ensure that the count is 2 (only one version of key value is obtained)
+      assertTrue(getCount(importTable, null) == 2);
+    }
+  }
+
+  /**
+   * This listens to the {@link #visitLogEntryBeforeWrite(HRegionInfo, WALKey, WALEdit)} to
+   * identify that an entry is written to the Write Ahead Log for the given table.
+   */
+  private static class TableWALActionListener extends WALActionsListener.Base {
+
+    private HRegionInfo regionInfo;
+    private boolean isVisited = false;
+
+    public TableWALActionListener(HRegionInfo region) {
+      this.regionInfo = region;
+    }
+
+    @Override
+    public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) {
+      if (logKey.getTablename().getNameAsString().equalsIgnoreCase(
+          this.regionInfo.getTable().getNameAsString()) && (!logEdit.isMetaEdit())) {
+        isVisited = true;
+      }
+    }
+
+    public boolean isWALVisited() {
+      return isVisited;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
new file mode 100644
index 0000000..7d6d74f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithOperationAttributes implements Configurable {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private static final Log LOG = LogFactory.getLog(TestImportTSVWithOperationAttributes.class);
+  protected static final String NAME = TestImportTsv.class.getSimpleName();
+  protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+  /**
+   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+   * false.
+   */
+  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+  /**
+   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+   */
+  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+  private static Configuration conf;
+
+  private static final String TEST_ATR_KEY = "test";
+
+  private final String FAMILY = "FAM";
+
+  @Rule
+  public TestName name = new TestName();
+
+  public Configuration getConf() {
+    return util.getConfiguration();
+  }
+
+  public void setConf(Configuration conf) {
+    throw new IllegalArgumentException("setConf not supported");
+  }
+
+  @BeforeClass
+  public static void provisionCluster() throws Exception {
+    conf = util.getConfiguration();
+    conf.set("hbase.coprocessor.master.classes", OperationAttributesTestController.class.getName());
+    conf.set("hbase.coprocessor.region.classes", OperationAttributesTestController.class.getName());
+    util.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void releaseCluster() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testMROnTable() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY
+            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest=>myvalue\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1, true);
+    util.deleteTable(tableName);
+  }
+
+  @Test
+  public void testMROnTableWithInvalidOperationAttr() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY
+            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest1=>myvalue\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1, false);
+    util.deleteTable(tableName);
+  }
+
+  /**
+   * Run an ImportTsv job and perform basic validation on the results. Returns
+   * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
+   * for further validation as necessary. This method is static to insure
+   * non-reliance on instance's util/conf facilities.
+   *
+   * @param args
+   *          Any arguments to pass BEFORE inputFile path is appended.
+   * @param dataAvailable
+   * @return The Tool instance used to run the test.
+   */
+  private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
+      int valueMultiplier, boolean dataAvailable) throws Exception {
+    String table = args[args.length - 1];
+    Configuration conf = new Configuration(util.getConfiguration());
+
+    // populate input file
+    FileSystem fs = FileSystem.get(conf);
+    Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
+    FSDataOutputStream op = fs.create(inputPath, true);
+    op.write(Bytes.toBytes(data));
+    op.close();
+    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+      LOG.debug("Forcing combiner.");
+      conf.setInt("mapreduce.map.combine.minspills", 1);
+    }
+
+    // run the import
+    List<String> argv = new ArrayList<>(Arrays.asList(args));
+    argv.add(inputPath.toString());
+    Tool tool = new ImportTsv();
+    LOG.debug("Running ImportTsv with arguments: " + argv);
+    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+
+    validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);
+
+    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+      LOG.debug("Deleting test subdirectory");
+      util.cleanupDataTestDirOnTestFS(table);
+    }
+    return tool;
+  }
+
+  /**
+   * Confirm ImportTsv via data in online table.
+   *
+   * @param dataAvailable
+   */
+  private static void validateTable(Configuration conf, TableName tableName, String family,
+      int valueMultiplier, boolean dataAvailable) throws IOException {
+
+    LOG.debug("Validating table.");
+    Connection connection = ConnectionFactory.createConnection(conf);
+    Table table = connection.getTable(tableName);
+    boolean verified = false;
+    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        Scan scan = new Scan();
+        // Scan entire family.
+        scan.addFamily(Bytes.toBytes(family));
+        if (dataAvailable) {
+          ResultScanner resScanner = table.getScanner(scan);
+          for (Result res : resScanner) {
+            LOG.debug("Getting results " + res.size());
+            assertTrue(res.size() == 2);
+            List<Cell> kvs = res.listCells();
+            assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+            assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+            assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+            assertTrue(CellUtil.matchingValue(kvs.get(1),
+                Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+            // Only one result set is expected, so let it loop.
+            verified = true;
+          }
+        } else {
+          ResultScanner resScanner = table.getScanner(scan);
+          Result[] next = resScanner.next(2);
+          assertEquals(0, next.length);
+          verified = true;
+        }
+
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty. Presume its because updates came in
+        // after the scanner had been opened. Wait a while and retry.
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    table.close();
+    connection.close();
+    assertTrue(verified);
+  }
+
+  public static class OperationAttributesTestController implements RegionObserver {
+
+    @Override
+    public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
+        Durability durability) throws IOException {
+      Region region = e.getEnvironment().getRegion();
+      if (!region.getRegionInfo().isMetaTable()
+          && !region.getRegionInfo().getTable().isSystemTable()) {
+        if (put.getAttribute(TEST_ATR_KEY) != null) {
+          LOG.debug("allow any put to happen " + region.getRegionInfo().getRegionNameAsString());
+        } else {
+          e.bypass();
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
new file mode 100644
index 0000000..4ab3d29
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithTTLs implements Configurable {
+
+  protected static final Log LOG = LogFactory.getLog(TestImportTSVWithTTLs.class);
+  protected static final String NAME = TestImportTsv.class.getSimpleName();
+  protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+  /**
+   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+   * false.
+   */
+  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+  /**
+   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+   */
+  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+  private final String FAMILY = "FAM";
+  private static Configuration conf;
+
+  @Rule
+  public TestName name = new TestName();
+
+  @Override
+  public Configuration getConf() {
+    return util.getConfiguration();
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    throw new IllegalArgumentException("setConf not supported");
+  }
+
+  @BeforeClass
+  public static void provisionCluster() throws Exception {
+    conf = util.getConfiguration();
+    // We don't check persistence in HFiles in this test, but if we ever do we will
+    // need this where the default hfile version is not 3 (i.e. 0.98)
+    conf.setInt("hfile.format.version", 3);
+    conf.set("hbase.coprocessor.region.classes", TTLCheckingObserver.class.getName());
+    util.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void releaseCluster() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testMROnTable() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY
+            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_TTL",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001b1000000\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1);
+    util.deleteTable(tableName);
+  }
+
+  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+      String[] args, int valueMultiplier) throws Exception {
+    TableName table = TableName.valueOf(args[args.length - 1]);
+    Configuration conf = new Configuration(util.getConfiguration());
+
+    // populate input file
+    FileSystem fs = FileSystem.get(conf);
+    Path inputPath = fs.makeQualified(new Path(util
+        .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+    FSDataOutputStream op = fs.create(inputPath, true);
+    op.write(Bytes.toBytes(data));
+    op.close();
+    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+      LOG.debug("Forcing combiner.");
+      conf.setInt("mapreduce.map.combine.minspills", 1);
+    }
+
+    // run the import
+    List<String> argv = new ArrayList<>(Arrays.asList(args));
+    argv.add(inputPath.toString());
+    Tool tool = new ImportTsv();
+    LOG.debug("Running ImportTsv with arguments: " + argv);
+    try {
+      // Job will fail if observer rejects entries without TTL
+      assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+    } finally {
+      // Clean up
+      if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+        LOG.debug("Deleting test subdirectory");
+        util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+      }
+    }
+
+    return tool;
+  }
+
+  public static class TTLCheckingObserver implements RegionObserver {
+
+    @Override
+    public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
+        Durability durability) throws IOException {
+      Region region = e.getEnvironment().getRegion();
+      if (!region.getRegionInfo().isMetaTable()
+          && !region.getRegionInfo().getTable().isSystemTable()) {
+        // The put carries the TTL attribute
+        if (put.getTTL() != Long.MAX_VALUE) {
+          return;
+        }
+        throw new IOException("Operation does not have TTL set");
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
new file mode 100644
index 0000000..8967ac7
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
@@ -0,0 +1,495 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.visibility.Authorizations;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
+import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
+import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
+import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
+import org.apache.hadoop.hbase.security.visibility.VisibilityController;
+import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithVisibilityLabels implements Configurable {
+
+  private static final Log LOG = LogFactory.getLog(TestImportTSVWithVisibilityLabels.class);
+  protected static final String NAME = TestImportTsv.class.getSimpleName();
+  protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+  /**
+   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+   * false.
+   */
+  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+  /**
+   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+   */
+  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+  private final String FAMILY = "FAM";
+  private final static String TOPSECRET = "topsecret";
+  private final static String PUBLIC = "public";
+  private final static String PRIVATE = "private";
+  private final static String CONFIDENTIAL = "confidential";
+  private final static String SECRET = "secret";
+  private static User SUPERUSER;
+  private static Configuration conf;
+
+  @Rule
+  public TestName name = new TestName();
+
+  @Override
+  public Configuration getConf() {
+    return util.getConfiguration();
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    throw new IllegalArgumentException("setConf not supported");
+  }
+
+  @BeforeClass
+  public static void provisionCluster() throws Exception {
+    conf = util.getConfiguration();
+    SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
+    conf.set("hbase.superuser", "admin,"+User.getCurrent().getName());
+    conf.setInt("hfile.format.version", 3);
+    conf.set("hbase.coprocessor.master.classes", VisibilityController.class.getName());
+    conf.set("hbase.coprocessor.region.classes", VisibilityController.class.getName());
+    conf.setClass(VisibilityUtils.VISIBILITY_LABEL_GENERATOR_CLASS, SimpleScanLabelGenerator.class,
+        ScanLabelGenerator.class);
+    util.startMiniCluster();
+    // Wait for the labels table to become available
+    util.waitTableEnabled(VisibilityConstants.LABELS_TABLE_NAME.getName(), 50000);
+    createLabels();
+  }
+
+  private static void createLabels() throws IOException, InterruptedException {
+    PrivilegedExceptionAction<VisibilityLabelsResponse> action =
+        new PrivilegedExceptionAction<VisibilityLabelsResponse>() {
+      @Override
+      public VisibilityLabelsResponse run() throws Exception {
+        String[] labels = { SECRET, TOPSECRET, CONFIDENTIAL, PUBLIC, PRIVATE };
+        try (Connection conn = ConnectionFactory.createConnection(conf)) {
+          VisibilityClient.addLabels(conn, labels);
+          LOG.info("Added labels ");
+        } catch (Throwable t) {
+          LOG.error("Error in adding labels" , t);
+          throw new IOException(t);
+        }
+        return null;
+      }
+    };
+    SUPERUSER.runAs(action);
+  }
+
+  @AfterClass
+  public static void releaseCluster() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testMROnTable() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY
+            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1);
+    util.deleteTable(tableName);
+  }
+
+  @Test
+  public void testMROnTableWithDeletes() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1);
+    issueDeleteAndVerifyData(tableName);
+    util.deleteTable(tableName);
+  }
+
+  private void issueDeleteAndVerifyData(TableName tableName) throws IOException {
+    LOG.debug("Validating table after delete.");
+    Table table = util.getConnection().getTable(tableName);
+    boolean verified = false;
+    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        Delete d = new Delete(Bytes.toBytes("KEY"));
+        d.addFamily(Bytes.toBytes(FAMILY));
+        d.setCellVisibility(new CellVisibility("private&secret"));
+        table.delete(d);
+
+        Scan scan = new Scan();
+        // Scan entire family.
+        scan.addFamily(Bytes.toBytes(FAMILY));
+        scan.setAuthorizations(new Authorizations("secret", "private"));
+        ResultScanner resScanner = table.getScanner(scan);
+        Result[] next = resScanner.next(5);
+        assertEquals(0, next.length);
+        verified = true;
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty. Presume its because updates came in
+        // after the scanner had been opened. Wait a while and retry.
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    table.close();
+    assertTrue(verified);
+  }
+
+  @Test
+  public void testMROnTableWithBulkload() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+        "-D" + ImportTsv.COLUMNS_CONF_KEY
+            + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1);
+    util.deleteTable(tableName);
+  }
+
+  @Test
+  public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+    String FAMILY = "FAM";
+    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table.getNameAsString()),"hfiles");
+    // Prepare the arguments required for the test.
+    String[] args =
+        new String[] {
+            "-D" + ImportTsv.MAPPER_CONF_KEY
+                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+            "-D" + ImportTsv.COLUMNS_CONF_KEY
+                + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
+            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+            table.getNameAsString()
+            };
+    String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
+    doMROnTableTest(util, FAMILY, data, args, 4);
+    util.deleteTable(table);
+  }
+
+  @Test
+  public void testMRWithOutputFormat() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+    // Prepare the arguments required for the test.
+    String[] args = new String[] {
+        "-D" + ImportTsv.MAPPER_CONF_KEY
+            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+    String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1);
+    util.deleteTable(tableName);
+  }
+
+  @Test
+  public void testBulkOutputWithInvalidLabels() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+    // Prepare the arguments required for the test.
+    String[] args =
+        new String[] { "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+            "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+
+    // 2 Data rows, one with valid label and one with invalid label
+    String data =
+        "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1, 2);
+    util.deleteTable(tableName);
+  }
+
+  @Test
+  public void testBulkOutputWithTsvImporterTextMapperWithInvalidLabels() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+    // Prepare the arguments required for the test.
+    String[] args =
+        new String[] {
+            "-D" + ImportTsv.MAPPER_CONF_KEY
+                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+            "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+
+    // 2 Data rows, one with valid label and one with invalid label
+    String data =
+        "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
+    util.createTable(tableName, FAMILY);
+    doMROnTableTest(util, FAMILY, data, args, 1, 2);
+    util.deleteTable(tableName);
+  }
+
+  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+      String[] args, int valueMultiplier) throws Exception {
+    return doMROnTableTest(util, family, data, args, valueMultiplier, -1);
+  }
+
+  /**
+   * Run an ImportTsv job and perform basic validation on the results. Returns
+   * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
+   * for further validation as necessary. This method is static to insure
+   * non-reliance on instance's util/conf facilities.
+   *
+   * @param args
+   *          Any arguments to pass BEFORE inputFile path is appended.
+   *
+   * @param expectedKVCount Expected KV count. pass -1 to skip the kvcount check
+   *
+   * @return The Tool instance used to run the test.
+   */
+  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+      String[] args, int valueMultiplier,int expectedKVCount) throws Exception {
+    TableName table = TableName.valueOf(args[args.length - 1]);
+    Configuration conf = new Configuration(util.getConfiguration());
+
+    // populate input file
+    FileSystem fs = FileSystem.get(conf);
+    Path inputPath = fs.makeQualified(new Path(util
+        .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+    FSDataOutputStream op = fs.create(inputPath, true);
+    if (data == null) {
+      data = "KEY\u001bVALUE1\u001bVALUE2\n";
+    }
+    op.write(Bytes.toBytes(data));
+    op.close();
+    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+      LOG.debug("Forcing combiner.");
+      conf.setInt("mapreduce.map.combine.minspills", 1);
+    }
+
+    // run the import
+    List<String> argv = new ArrayList<>(Arrays.asList(args));
+    argv.add(inputPath.toString());
+    Tool tool = new ImportTsv();
+    LOG.debug("Running ImportTsv with arguments: " + argv);
+    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+
+    // Perform basic validation. If the input args did not include
+    // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
+    // Otherwise, validate presence of hfiles.
+    boolean createdHFiles = false;
+    String outputPath = null;
+    for (String arg : argv) {
+      if (arg.contains(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
+        createdHFiles = true;
+        // split '-Dfoo=bar' on '=' and keep 'bar'
+        outputPath = arg.split("=")[1];
+        break;
+      }
+    }
+    LOG.debug("validating the table " + createdHFiles);
+    if (createdHFiles)
+     validateHFiles(fs, outputPath, family,expectedKVCount);
+    else
+      validateTable(conf, table, family, valueMultiplier);
+
+    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+      LOG.debug("Deleting test subdirectory");
+      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+    }
+    return tool;
+  }
+
+  /**
+   * Confirm ImportTsv via HFiles on fs.
+   */
+  private static void validateHFiles(FileSystem fs, String outputPath, String family,
+      int expectedKVCount) throws IOException {
+
+    // validate number and content of output columns
+    LOG.debug("Validating HFiles.");
+    Set<String> configFamilies = new HashSet<>();
+    configFamilies.add(family);
+    Set<String> foundFamilies = new HashSet<>();
+    int actualKVCount = 0;
+    for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
+      LOG.debug("The output path has files");
+      String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
+      String cf = elements[elements.length - 1];
+      foundFamilies.add(cf);
+      assertTrue(String.format(
+          "HFile ouput contains a column family (%s) not present in input families (%s)", cf,
+          configFamilies), configFamilies.contains(cf));
+      for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
+        assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),
+            hfile.getLen() > 0);
+        if (expectedKVCount > -1) {
+          actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
+        }
+      }
+    }
+    if (expectedKVCount > -1) {
+      assertTrue(String.format(
+        "KV count in output hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
+        expectedKVCount), actualKVCount == expectedKVCount);
+    }
+  }
+
+  /**
+   * Confirm ImportTsv via data in online table.
+   */
+  private static void validateTable(Configuration conf, TableName tableName, String family,
+      int valueMultiplier) throws IOException {
+
+    LOG.debug("Validating table.");
+    Table table = util.getConnection().getTable(tableName);
+    boolean verified = false;
+    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        Scan scan = new Scan();
+        // Scan entire family.
+        scan.addFamily(Bytes.toBytes(family));
+        scan.setAuthorizations(new Authorizations("secret","private"));
+        ResultScanner resScanner = table.getScanner(scan);
+        Result[] next = resScanner.next(5);
+        assertEquals(1, next.length);
+        for (Result res : resScanner) {
+          LOG.debug("Getting results " + res.size());
+          assertTrue(res.size() == 2);
+          List<Cell> kvs = res.listCells();
+          assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+          assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+          assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+          assertTrue(CellUtil.matchingValue(kvs.get(1),
+              Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+          // Only one result set is expected, so let it loop.
+        }
+        verified = true;
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty. Presume its because updates came in
+        // after the scanner had been opened. Wait a while and retry.
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    table.close();
+    assertTrue(verified);
+  }
+
+  /**
+   * Method returns the total KVs in given hfile
+   * @param fs File System
+   * @param p HFile path
+   * @return KV count in the given hfile
+   * @throws IOException
+   */
+  private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
+    Configuration conf = util.getConfiguration();
+    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
+    reader.loadFileInfo();
+    HFileScanner scanner = reader.getScanner(false, false);
+    scanner.seekTo();
+    int count = 0;
+    do {
+      count++;
+    } while (scanner.next());
+    reader.close();
+    return count;
+  }
+
+}

[13/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
deleted file mode 100644
index bf11473..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
-import org.apache.hadoop.hbase.client.IsolationLevel;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
-import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
-import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.Writable;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.UUID;
-
-/**
- * Hadoop MR API-agnostic implementation for mapreduce over table snapshots.
- */
-@InterfaceAudience.Private
-@InterfaceStability.Evolving
-public class TableSnapshotInputFormatImpl {
-  // TODO: Snapshots files are owned in fs by the hbase user. There is no
-  // easy way to delegate access.
-
-  public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
-
-  private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
-  // key for specifying the root dir of the restored snapshot
-  protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
-
-  /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
-  private static final String LOCALITY_CUTOFF_MULTIPLIER =
-    "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
-  private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
-
-  /**
-   * Implementation class for InputSplit logic common between mapred and mapreduce.
-   */
-  public static class InputSplit implements Writable {
-
-    private TableDescriptor htd;
-    private HRegionInfo regionInfo;
-    private String[] locations;
-    private String scan;
-    private String restoreDir;
-
-    // constructor for mapreduce framework / Writable
-    public InputSplit() {}
-
-    public InputSplit(TableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
-        Scan scan, Path restoreDir) {
-      this.htd = htd;
-      this.regionInfo = regionInfo;
-      if (locations == null || locations.isEmpty()) {
-        this.locations = new String[0];
-      } else {
-        this.locations = locations.toArray(new String[locations.size()]);
-      }
-      try {
-        this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
-      } catch (IOException e) {
-        LOG.warn("Failed to convert Scan to String", e);
-      }
-
-      this.restoreDir = restoreDir.toString();
-    }
-
-    public TableDescriptor getHtd() {
-      return htd;
-    }
-
-    public String getScan() {
-      return scan;
-    }
-
-    public String getRestoreDir() {
-      return restoreDir;
-    }
-
-    public long getLength() {
-      //TODO: We can obtain the file sizes of the snapshot here.
-      return 0;
-    }
-
-    public String[] getLocations() {
-      return locations;
-    }
-
-    public TableDescriptor getTableDescriptor() {
-      return htd;
-    }
-
-    public HRegionInfo getRegionInfo() {
-      return regionInfo;
-    }
-
-    // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
-    // doing this wrapping with Writables.
-    @Override
-    public void write(DataOutput out) throws IOException {
-      TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
-          .setTable(ProtobufUtil.toTableSchema(htd))
-          .setRegion(HRegionInfo.convert(regionInfo));
-
-      for (String location : locations) {
-        builder.addLocations(location);
-      }
-
-      TableSnapshotRegionSplit split = builder.build();
-
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      split.writeTo(baos);
-      baos.close();
-      byte[] buf = baos.toByteArray();
-      out.writeInt(buf.length);
-      out.write(buf);
-
-      Bytes.writeByteArray(out, Bytes.toBytes(scan));
-      Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
-
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      int len = in.readInt();
-      byte[] buf = new byte[len];
-      in.readFully(buf);
-      TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
-      this.htd = ProtobufUtil.toTableDescriptor(split.getTable());
-      this.regionInfo = HRegionInfo.convert(split.getRegion());
-      List<String> locationsList = split.getLocationsList();
-      this.locations = locationsList.toArray(new String[locationsList.size()]);
-
-      this.scan = Bytes.toString(Bytes.readByteArray(in));
-      this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
-    }
-  }
-
-  /**
-   * Implementation class for RecordReader logic common between mapred and mapreduce.
-   */
-  public static class RecordReader {
-    private InputSplit split;
-    private Scan scan;
-    private Result result = null;
-    private ImmutableBytesWritable row = null;
-    private ClientSideRegionScanner scanner;
-
-    public ClientSideRegionScanner getScanner() {
-      return scanner;
-    }
-
-    public void initialize(InputSplit split, Configuration conf) throws IOException {
-      this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
-      this.split = split;
-      TableDescriptor htd = split.htd;
-      HRegionInfo hri = this.split.getRegionInfo();
-      FileSystem fs = FSUtils.getCurrentFileSystem(conf);
-
-
-      // region is immutable, this should be fine,
-      // otherwise we have to set the thread read point
-      scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
-      // disable caching of data blocks
-      scan.setCacheBlocks(false);
-
-      scanner =
-          new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
-    }
-
-    public boolean nextKeyValue() throws IOException {
-      result = scanner.next();
-      if (result == null) {
-        //we are done
-        return false;
-      }
-
-      if (this.row == null) {
-        this.row = new ImmutableBytesWritable();
-      }
-      this.row.set(result.getRow());
-      return true;
-    }
-
-    public ImmutableBytesWritable getCurrentKey() {
-      return row;
-    }
-
-    public Result getCurrentValue() {
-      return result;
-    }
-
-    public long getPos() {
-      return 0;
-    }
-
-    public float getProgress() {
-      return 0; // TODO: use total bytes to estimate
-    }
-
-    public void close() {
-      if (this.scanner != null) {
-        this.scanner.close();
-      }
-    }
-  }
-
-  public static List<InputSplit> getSplits(Configuration conf) throws IOException {
-    String snapshotName = getSnapshotName(conf);
-
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-
-    SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
-
-    List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
-
-    // TODO: mapred does not support scan as input API. Work around for now.
-    Scan scan = extractScanFromConf(conf);
-    // the temp dir where the snapshot is restored
-    Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
-
-    return getSplits(scan, manifest, regionInfos, restoreDir, conf);
-  }
-
-  public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
-    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
-    if (regionManifests == null) {
-      throw new IllegalArgumentException("Snapshot seems empty");
-    }
-
-    List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
-
-    for (SnapshotRegionManifest regionManifest : regionManifests) {
-      HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
-      if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
-        continue;
-      }
-      regionInfos.add(hri);
-    }
-    return regionInfos;
-  }
-
-  public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
-      Path rootDir, FileSystem fs) throws IOException {
-    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
-    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
-    return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
-  }
-
-  public static Scan extractScanFromConf(Configuration conf) throws IOException {
-    Scan scan = null;
-    if (conf.get(TableInputFormat.SCAN) != null) {
-      scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
-    } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
-      String[] columns =
-        conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
-      scan = new Scan();
-      for (String col : columns) {
-        scan.addFamily(Bytes.toBytes(col));
-      }
-    } else {
-      throw new IllegalArgumentException("Unable to create scan");
-    }
-    return scan;
-  }
-
-  public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
-      List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
-    // load table descriptor
-    TableDescriptor htd = manifest.getTableDescriptor();
-
-    Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
-
-    List<InputSplit> splits = new ArrayList<>();
-    for (HRegionInfo hri : regionManifests) {
-      // load region descriptor
-
-      if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
-          hri.getEndKey())) {
-        // compute HDFS locations from snapshot files (which will get the locations for
-        // referred hfiles)
-        List<String> hosts = getBestLocations(conf,
-            HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
-
-        int len = Math.min(3, hosts.size());
-        hosts = hosts.subList(0, len);
-        splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
-      }
-    }
-
-    return splits;
-
-  }
-
-  /**
-   * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
-   * weights into account, thus will treat every location passed from the input split as equal. We
-   * do not want to blindly pass all the locations, since we are creating one split per region, and
-   * the region's blocks are all distributed throughout the cluster unless favorite node assignment
-   * is used. On the expected stable case, only one location will contain most of the blocks as
-   * local.
-   * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
-   * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
-   * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
-   * host with the best locality.
-   */
-  public static List<String> getBestLocations(
-      Configuration conf, HDFSBlocksDistribution blockDistribution) {
-    List<String> locations = new ArrayList<>(3);
-
-    HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
-
-    if (hostAndWeights.length == 0) {
-      return locations;
-    }
-
-    HostAndWeight topHost = hostAndWeights[0];
-    locations.add(topHost.getHost());
-
-    // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
-    double cutoffMultiplier
-      = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
-
-    double filterWeight = topHost.getWeight() * cutoffMultiplier;
-
-    for (int i = 1; i < hostAndWeights.length; i++) {
-      if (hostAndWeights[i].getWeight() >= filterWeight) {
-        locations.add(hostAndWeights[i].getHost());
-      } else {
-        break;
-      }
-    }
-
-    return locations;
-  }
-
-  private static String getSnapshotName(Configuration conf) {
-    String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
-    if (snapshotName == null) {
-      throw new IllegalArgumentException("Snapshot name must be provided");
-    }
-    return snapshotName;
-  }
-
-  /**
-   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
-   * @param conf the job to configuration
-   * @param snapshotName the name of the snapshot to read from
-   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restoreDir can be deleted.
-   * @throws IOException if an error occurs
-   */
-  public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
-      throws IOException {
-    conf.set(SNAPSHOT_NAME_KEY, snapshotName);
-
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-
-    restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
-
-    // TODO: restore from record readers to parallelize.
-    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
-
-    conf.set(RESTORE_DIR_KEY, restoreDir.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
deleted file mode 100644
index 13c7c67..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
+++ /dev/null
@@ -1,395 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-import org.apache.hadoop.mapreduce.InputSplit;
-
-/**
- * A table split corresponds to a key range (low, high) and an optional scanner.
- * All references to row below refer to the key of the row.
- */
-@InterfaceAudience.Public
-public class TableSplit extends InputSplit
-implements Writable, Comparable<TableSplit> {
-  /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
-  @Deprecated
-  public static final Log LOG = LogFactory.getLog(TableSplit.class);
-
-  // should be < 0 (@see #readFields(DataInput))
-  // version 1 supports Scan data member
-  enum Version {
-    UNVERSIONED(0),
-    // Initial number we put on TableSplit when we introduced versioning.
-    INITIAL(-1),
-    // Added an encoded region name field for easier identification of split -> region
-    WITH_ENCODED_REGION_NAME(-2);
-
-    final int code;
-    static final Version[] byCode;
-    static {
-      byCode = Version.values();
-      for (int i = 0; i < byCode.length; i++) {
-        if (byCode[i].code != -1 * i) {
-          throw new AssertionError("Values in this enum should be descending by one");
-        }
-      }
-    }
-
-    Version(int code) {
-      this.code = code;
-    }
-
-    boolean atLeast(Version other) {
-      return code <= other.code;
-    }
-
-    static Version fromCode(int code) {
-      return byCode[code * -1];
-    }
-  }
-
-  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
-  private TableName tableName;
-  private byte [] startRow;
-  private byte [] endRow;
-  private String regionLocation;
-  private String encodedRegionName = "";
-  private String scan = ""; // stores the serialized form of the Scan
-  private long length; // Contains estimation of region size in bytes
-
-  /** Default constructor. */
-  public TableSplit() {
-    this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
-      HConstants.EMPTY_BYTE_ARRAY, "");
-  }
-
-  /**
-   * Creates a new instance while assigning all variables.
-   * Length of region is set to 0
-   * Encoded name of the region is set to blank
-   *
-   * @param tableName  The name of the current table.
-   * @param scan The scan associated with this split.
-   * @param startRow  The start row of the split.
-   * @param endRow  The end row of the split.
-   * @param location  The location of the region.
-   */
-  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
-                    final String location) {
-    this(tableName, scan, startRow, endRow, location, 0L);
-  }
-
-  /**
-   * Creates a new instance while assigning all variables.
-   * Encoded name of region is set to blank
-   *
-   * @param tableName  The name of the current table.
-   * @param scan The scan associated with this split.
-   * @param startRow  The start row of the split.
-   * @param endRow  The end row of the split.
-   * @param location  The location of the region.
-   */
-  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
-      final String location, long length) {
-    this(tableName, scan, startRow, endRow, location, "", length);
-  }
-
-  /**
-   * Creates a new instance while assigning all variables.
-   *
-   * @param tableName  The name of the current table.
-   * @param scan The scan associated with this split.
-   * @param startRow  The start row of the split.
-   * @param endRow  The end row of the split.
-   * @param encodedRegionName The region ID.
-   * @param location  The location of the region.
-   */
-  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
-      final String location, final String encodedRegionName, long length) {
-    this.tableName = tableName;
-    try {
-      this.scan =
-        (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
-    } catch (IOException e) {
-      LOG.warn("Failed to convert Scan to String", e);
-    }
-    this.startRow = startRow;
-    this.endRow = endRow;
-    this.regionLocation = location;
-    this.encodedRegionName = encodedRegionName;
-    this.length = length;
-  }
-
-  /**
-   * Creates a new instance without a scanner.
-   * Length of region is set to 0
-   *
-   * @param tableName The name of the current table.
-   * @param startRow The start row of the split.
-   * @param endRow The end row of the split.
-   * @param location The location of the region.
-   */
-  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
-      final String location) {
-    this(tableName, null, startRow, endRow, location);
-  }
-
-  /**
-   * Creates a new instance without a scanner.
-   *
-   * @param tableName The name of the current table.
-   * @param startRow The start row of the split.
-   * @param endRow The end row of the split.
-   * @param location The location of the region.
-   * @param length Size of region in bytes
-   */
-  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
-                    final String location, long length) {
-    this(tableName, null, startRow, endRow, location, length);
-  }
-
-  /**
-   * Returns a Scan object from the stored string representation.
-   *
-   * @return Returns a Scan object based on the stored scanner.
-   * @throws IOException
-   */
-  public Scan getScan() throws IOException {
-    return TableMapReduceUtil.convertStringToScan(this.scan);
-  }
-
-  /**
-   * Returns the table name converted to a byte array.
-   * @see #getTable()
-   * @return The table name.
-   */
-  public byte [] getTableName() {
-    return tableName.getName();
-  }
-
-  /**
-   * Returns the table name.
-   *
-   * @return The table name.
-   */
-  public TableName getTable() {
-    // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
-    // that in here though because there was an existing getTableName in place already since
-    // deprecated.
-    return tableName;
-  }
-
-  /**
-   * Returns the start row.
-   *
-   * @return The start row.
-   */
-  public byte [] getStartRow() {
-    return startRow;
-  }
-
-  /**
-   * Returns the end row.
-   *
-   * @return The end row.
-   */
-  public byte [] getEndRow() {
-    return endRow;
-  }
-
-  /**
-   * Returns the region location.
-   *
-   * @return The region's location.
-   */
-  public String getRegionLocation() {
-    return regionLocation;
-  }
-
-  /**
-   * Returns the region's location as an array.
-   *
-   * @return The array containing the region location.
-   * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
-   */
-  @Override
-  public String[] getLocations() {
-    return new String[] {regionLocation};
-  }
-
-  /**
-   * Returns the region's encoded name.
-   *
-   * @return The region's encoded name.
-   */
-  public String getEncodedRegionName() {
-    return encodedRegionName;
-  }
-
-  /**
-   * Returns the length of the split.
-   *
-   * @return The length of the split.
-   * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
-   */
-  @Override
-  public long getLength() {
-    return length;
-  }
-
-  /**
-   * Reads the values of each field.
-   *
-   * @param in  The input to read from.
-   * @throws IOException When reading the input fails.
-   */
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    Version version = Version.UNVERSIONED;
-    // TableSplit was not versioned in the beginning.
-    // In order to introduce it now, we make use of the fact
-    // that tableName was written with Bytes.writeByteArray,
-    // which encodes the array length as a vint which is >= 0.
-    // Hence if the vint is >= 0 we have an old version and the vint
-    // encodes the length of tableName.
-    // If < 0 we just read the version and the next vint is the length.
-    // @see Bytes#readByteArray(DataInput)
-    int len = WritableUtils.readVInt(in);
-    if (len < 0) {
-      // what we just read was the version
-      version = Version.fromCode(len);
-      len = WritableUtils.readVInt(in);
-    }
-    byte[] tableNameBytes = new byte[len];
-    in.readFully(tableNameBytes);
-    tableName = TableName.valueOf(tableNameBytes);
-    startRow = Bytes.readByteArray(in);
-    endRow = Bytes.readByteArray(in);
-    regionLocation = Bytes.toString(Bytes.readByteArray(in));
-    if (version.atLeast(Version.INITIAL)) {
-      scan = Bytes.toString(Bytes.readByteArray(in));
-    }
-    length = WritableUtils.readVLong(in);
-    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
-      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
-    }
-  }
-
-  /**
-   * Writes the field values to the output.
-   *
-   * @param out  The output to write to.
-   * @throws IOException When writing the values to the output fails.
-   */
-  @Override
-  public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, VERSION.code);
-    Bytes.writeByteArray(out, tableName.getName());
-    Bytes.writeByteArray(out, startRow);
-    Bytes.writeByteArray(out, endRow);
-    Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
-    Bytes.writeByteArray(out, Bytes.toBytes(scan));
-    WritableUtils.writeVLong(out, length);
-    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
-  }
-
-  /**
-   * Returns the details about this instance as a string.
-   *
-   * @return The values of this instance as a string.
-   * @see java.lang.Object#toString()
-   */
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("HBase table split(");
-    sb.append("table name: ").append(tableName);
-    // null scan input is represented by ""
-    String printScan = "";
-    if (!scan.equals("")) {
-      try {
-        // get the real scan here in toString, not the Base64 string
-        printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
-      }
-      catch (IOException e) {
-        printScan = "";
-      }
-    }
-    sb.append(", scan: ").append(printScan);
-    sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
-    sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
-    sb.append(", region location: ").append(regionLocation);
-    sb.append(", encoded region name: ").append(encodedRegionName);
-    sb.append(")");
-    return sb.toString();
-  }
-
-  /**
-   * Compares this split against the given one.
-   *
-   * @param split  The split to compare to.
-   * @return The result of the comparison.
-   * @see java.lang.Comparable#compareTo(java.lang.Object)
-   */
-  @Override
-  public int compareTo(TableSplit split) {
-    // If The table name of the two splits is the same then compare start row
-    // otherwise compare based on table names
-    int tableNameComparison =
-        getTable().compareTo(split.getTable());
-    return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
-        getStartRow(), split.getStartRow());
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (o == null || !(o instanceof TableSplit)) {
-      return false;
-    }
-    return tableName.equals(((TableSplit)o).tableName) &&
-      Bytes.equals(startRow, ((TableSplit)o).startRow) &&
-      Bytes.equals(endRow, ((TableSplit)o).endRow) &&
-      regionLocation.equals(((TableSplit)o).regionLocation);
-  }
-
-  @Override
-  public int hashCode() {
-    int result = tableName != null ? tableName.hashCode() : 0;
-    result = 31 * result + (scan != null ? scan.hashCode() : 0);
-    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
-    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
-    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
-    result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
deleted file mode 100644
index 84324e2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Emits Sorted KeyValues. Parse the passed text and creates KeyValues. Sorts them before emit.
- * @see HFileOutputFormat2
- * @see KeyValueSortReducer
- * @see PutSortReducer
- */
-@InterfaceAudience.Public
-public class TextSortReducer extends
-    Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
-  
-  /** Timestamp for all inserted rows */
-  private long ts;
-
-  /** Column seperator */
-  private String separator;
-
-  /** Should skip bad lines */
-  private boolean skipBadLines;
-  
-  private Counter badLineCount;
-
-  private ImportTsv.TsvParser parser;
-
-  /** Cell visibility expr **/
-  private String cellVisibilityExpr;
-
-  /** Cell TTL */
-  private long ttl;
-
-  private CellCreator kvCreator;
-
-  public long getTs() {
-    return ts;
-  }
-
-  public boolean getSkipBadLines() {
-    return skipBadLines;
-  }
-
-  public Counter getBadLineCount() {
-    return badLineCount;
-  }
-
-  public void incrementBadLineCount(int count) {
-    this.badLineCount.increment(count);
-  }
-
-  /**
-   * Handles initializing this class with objects specific to it (i.e., the parser).
-   * Common initialization that might be leveraged by a subsclass is done in
-   * <code>doSetup</code>. Hence a subclass may choose to override this method
-   * and call <code>doSetup</code> as well before handling it's own custom params.
-   *
-   * @param context
-   */
-  @Override
-  protected void setup(Context context) {
-    Configuration conf = context.getConfiguration();
-    doSetup(context, conf);
-
-    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
-    if (parser.getRowKeyColumnIndex() == -1) {
-      throw new RuntimeException("No row key column specified");
-    }
-    this.kvCreator = new CellCreator(conf);
-  }
-
-  /**
-   * Handles common parameter initialization that a subclass might want to leverage.
-   * @param context
-   * @param conf
-   */
-  protected void doSetup(Context context, Configuration conf) {
-    // If a custom separator has been used,
-    // decode it back from Base64 encoding.
-    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
-    if (separator == null) {
-      separator = ImportTsv.DEFAULT_SEPARATOR;
-    } else {
-      separator = new String(Base64.decode(separator));
-    }
-
-    // Should never get 0 as we are setting this to a valid value in job configuration.
-    ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
-
-    skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
-    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
-  }
-  
-  @Override
-  protected void reduce(
-      ImmutableBytesWritable rowKey,
-      java.lang.Iterable<Text> lines,
-      Reducer<ImmutableBytesWritable, Text,
-              ImmutableBytesWritable, KeyValue>.Context context)
-      throws java.io.IOException, InterruptedException
-  {
-    // although reduce() is called per-row, handle pathological case
-    long threshold = context.getConfiguration().getLong(
-        "reducer.row.threshold", 1L * (1<<30));
-    Iterator<Text> iter = lines.iterator();
-    while (iter.hasNext()) {
-      Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
-      long curSize = 0;
-      // stop at the end or the RAM threshold
-      while (iter.hasNext() && curSize < threshold) {
-        Text line = iter.next();
-        byte[] lineBytes = line.getBytes();
-        try {
-          ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
-          // Retrieve timestamp if exists
-          ts = parsed.getTimestamp(ts);
-          cellVisibilityExpr = parsed.getCellVisibility();
-          ttl = parsed.getCellTTL();
-          
-          // create tags for the parsed line
-          List<Tag> tags = new ArrayList<>();
-          if (cellVisibilityExpr != null) {
-            tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
-              cellVisibilityExpr));
-          }
-          // Add TTL directly to the KV so we can vary them when packing more than one KV
-          // into puts
-          if (ttl > 0) {
-            tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
-          }
-          for (int i = 0; i < parsed.getColumnCount(); i++) {
-            if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
-                || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
-                || i == parser.getCellTTLColumnIndex()) {
-              continue;
-            }
-            // Creating the KV which needs to be directly written to HFiles. Using the Facade
-            // KVCreator for creation of kvs.
-            Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(),
-                parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length,
-                parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes,
-                parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
-            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
-            kvs.add(kv);
-            curSize += kv.heapSize();
-          }
-        } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
-            | InvalidLabelException badLine) {
-          if (skipBadLines) {
-            System.err.println("Bad line." + badLine.getMessage());
-            incrementBadLineCount(1);
-            continue;
-          }
-          throw new IOException(badLine);
-        }
-      }
-      context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass()
-          + "(" + StringUtils.humanReadableInt(curSize) + ")");
-      int index = 0;
-      for (KeyValue kv : kvs) {
-        context.write(rowKey, kv);
-        if (++index > 0 && index % 100 == 0)
-          context.setStatus("Wrote " + index + " key values.");
-      }
-
-      // if we have more entries to process
-      if (iter.hasNext()) {
-        // force flush because we cannot guarantee intra-row sorted order
-        context.write(null, null);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
deleted file mode 100644
index a9d8e03..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Mapper;
-
-/**
- * Write table content out to files in hdfs.
- */
-@InterfaceAudience.Public
-public class TsvImporterMapper
-extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>
-{
-
-  /** Timestamp for all inserted rows */
-  protected long ts;
-
-  /** Column seperator */
-  private String separator;
-
-  /** Should skip bad lines */
-  private boolean skipBadLines;
-  /** Should skip empty columns*/
-  private boolean skipEmptyColumns;
-  private Counter badLineCount;
-  private boolean logBadLines;
-
-  protected ImportTsv.TsvParser parser;
-
-  protected Configuration conf;
-
-  protected String cellVisibilityExpr;
-
-  protected long ttl;
-
-  protected CellCreator kvCreator;
-
-  private String hfileOutPath;
-
-  /** List of cell tags */
-  private List<Tag> tags;
-  
-  public long getTs() {
-    return ts;
-  }
-
-  public boolean getSkipBadLines() {
-    return skipBadLines;
-  }
-
-  public Counter getBadLineCount() {
-    return badLineCount;
-  }
-
-  public void incrementBadLineCount(int count) {
-    this.badLineCount.increment(count);
-  }
-
-  /**
-   * Handles initializing this class with objects specific to it (i.e., the parser).
-   * Common initialization that might be leveraged by a subsclass is done in
-   * <code>doSetup</code>. Hence a subclass may choose to override this method
-   * and call <code>doSetup</code> as well before handling it's own custom params.
-   *
-   * @param context
-   */
-  @Override
-  protected void setup(Context context) {
-    doSetup(context);
-
-    conf = context.getConfiguration();
-    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY),
-                           separator);
-    if (parser.getRowKeyColumnIndex() == -1) {
-      throw new RuntimeException("No row key column specified");
-    }
-    this.kvCreator = new CellCreator(conf);
-    tags = new ArrayList<>();
-  }
-
-  /**
-   * Handles common parameter initialization that a subclass might want to leverage.
-   * @param context
-   */
-  protected void doSetup(Context context) {
-    Configuration conf = context.getConfiguration();
-
-    // If a custom separator has been used,
-    // decode it back from Base64 encoding.
-    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
-    if (separator == null) {
-      separator = ImportTsv.DEFAULT_SEPARATOR;
-    } else {
-      separator = new String(Base64.decode(separator));
-    }
-    // Should never get 0 as we are setting this to a valid value in job
-    // configuration.
-    ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
-
-    skipEmptyColumns = context.getConfiguration().getBoolean(
-        ImportTsv.SKIP_EMPTY_COLUMNS, false);
-    skipBadLines = context.getConfiguration().getBoolean(
-        ImportTsv.SKIP_LINES_CONF_KEY, true);
-    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
-    logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
-    hfileOutPath = conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY);
-  }
-
-  /**
-   * Convert a line of TSV text into an HBase table row.
-   */
-  @Override
-  public void map(LongWritable offset, Text value,
-    Context context)
-  throws IOException {
-    byte[] lineBytes = value.getBytes();
-
-    try {
-      ImportTsv.TsvParser.ParsedLine parsed = parser.parse(
-          lineBytes, value.getLength());
-      ImmutableBytesWritable rowKey =
-        new ImmutableBytesWritable(lineBytes,
-            parsed.getRowKeyOffset(),
-            parsed.getRowKeyLength());
-      // Retrieve timestamp if exists
-      ts = parsed.getTimestamp(ts);
-      cellVisibilityExpr = parsed.getCellVisibility();
-      ttl = parsed.getCellTTL();
-
-      // create tags for the parsed line
-      if (hfileOutPath != null) {
-        tags.clear();
-        if (cellVisibilityExpr != null) {
-          tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
-            cellVisibilityExpr));
-        }
-        // Add TTL directly to the KV so we can vary them when packing more than one KV
-        // into puts
-        if (ttl > 0) {
-          tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
-        }
-      }
-      Put put = new Put(rowKey.copyBytes());
-      for (int i = 0; i < parsed.getColumnCount(); i++) {
-        if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
-            || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
-            || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns 
-            && parsed.getColumnLength(i) == 0)) {
-          continue;
-        }
-        populatePut(lineBytes, parsed, put, i);
-      }
-      context.write(rowKey, put);
-    } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
-        | InvalidLabelException badLine) {
-      if (logBadLines) {
-        System.err.println(value);
-      }
-      System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
-      if (skipBadLines) {
-        incrementBadLineCount(1);
-        return;
-      }
-      throw new IOException(badLine);
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    }
-  }
-
-  protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put,
-      int i) throws BadTsvLineException, IOException {
-    Cell cell = null;
-    if (hfileOutPath == null) {
-      cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
-          parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
-          parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes,
-          parsed.getColumnOffset(i), parsed.getColumnLength(i));
-      if (cellVisibilityExpr != null) {
-        // We won't be validating the expression here. The Visibility CP will do
-        // the validation
-        put.setCellVisibility(new CellVisibility(cellVisibilityExpr));
-      }
-      if (ttl > 0) {
-        put.setTTL(ttl);
-      }
-    } else {
-      // Creating the KV which needs to be directly written to HFiles. Using the Facade
-      // KVCreator for creation of kvs.
-      cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
-          parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
-          parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i),
-          parsed.getColumnLength(i), tags);
-    }
-    put.add(cell);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
deleted file mode 100644
index 581f0d0..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-
-import java.io.IOException;
-
-/**
- * Write table content out to map output files.
- */
-@InterfaceAudience.Public
-public class TsvImporterTextMapper
-extends Mapper<LongWritable, Text, ImmutableBytesWritable, Text>
-{
-
-  /** Column seperator */
-  private String separator;
-
-  /** Should skip bad lines */
-  private boolean skipBadLines;
-  private Counter badLineCount;
-  private boolean logBadLines;
-
-  private ImportTsv.TsvParser parser;
-
-  public boolean getSkipBadLines() {
-    return skipBadLines;
-  }
-
-  public Counter getBadLineCount() {
-    return badLineCount;
-  }
-
-  public void incrementBadLineCount(int count) {
-    this.badLineCount.increment(count);
-  }
-
-  /**
-   * Handles initializing this class with objects specific to it (i.e., the parser).
-   * Common initialization that might be leveraged by a subsclass is done in
-   * <code>doSetup</code>. Hence a subclass may choose to override this method
-   * and call <code>doSetup</code> as well before handling it's own custom params.
-   *
-   * @param context
-   */
-  @Override
-  protected void setup(Context context) {
-    doSetup(context);
-
-    Configuration conf = context.getConfiguration();
-
-    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
-    if (parser.getRowKeyColumnIndex() == -1) {
-      throw new RuntimeException("No row key column specified");
-    }
-  }
-
-  /**
-   * Handles common parameter initialization that a subclass might want to leverage.
-   * @param context
-   */
-  protected void doSetup(Context context) {
-    Configuration conf = context.getConfiguration();
-
-    // If a custom separator has been used,
-    // decode it back from Base64 encoding.
-    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
-    if (separator == null) {
-      separator = ImportTsv.DEFAULT_SEPARATOR;
-    } else {
-      separator = new String(Base64.decode(separator));
-    }
-
-    skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
-    logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
-    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
-  }
-
-  /**
-   * Convert a line of TSV text into an HBase table row.
-   */
-  @Override
-  public void map(LongWritable offset, Text value, Context context) throws IOException {
-    try {
-      Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
-      ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
-          value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
-      context.write(rowKey, value);
-    } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
-      if (logBadLines) {
-        System.err.println(value);
-      }
-      System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
-      if (skipBadLines) {
-        incrementBadLineCount(1);
-        return;
-      } 
-      throw new IOException(badLine);
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-      Thread.currentThread().interrupt();
-    } 
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
deleted file mode 100644
index a83a88f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.hbase.Tag;
-
-/**
- * Interface to convert visibility expressions into Tags for storing along with Cells in HFiles.
- */
-@InterfaceAudience.Public
-public interface VisibilityExpressionResolver extends Configurable {
-
-  /**
-   * Giving a chance for the initialization.
-   */
-  void init();
-
-  /**
-   * Convert visibility expression into tags to be serialized.
-   * @param visExpression the label expression
-   * @return The list of tags corresponds to the visibility expression. These tags will be stored
-   *         along with the Cells.
-   */
-  List<Tag> createVisibilityExpTags(String visExpression) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
deleted file mode 100644
index 8b4e967..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
+++ /dev/null
@@ -1,344 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.EOFException;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WAL.Entry;
-import org.apache.hadoop.hbase.wal.WAL.Reader;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Simple {@link InputFormat} for {@link org.apache.hadoop.hbase.wal.WAL} files.
- */
-@InterfaceAudience.Public
-public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
-  private static final Log LOG = LogFactory.getLog(WALInputFormat.class);
-
-  public static final String START_TIME_KEY = "wal.start.time";
-  public static final String END_TIME_KEY = "wal.end.time";
-
-  /**
-   * {@link InputSplit} for {@link WAL} files. Each split represent
-   * exactly one log file.
-   */
-  static class WALSplit extends InputSplit implements Writable {
-    private String logFileName;
-    private long fileSize;
-    private long startTime;
-    private long endTime;
-
-    /** for serialization */
-    public WALSplit() {}
-
-    /**
-     * Represent an WALSplit, i.e. a single WAL file.
-     * Start- and EndTime are managed by the split, so that WAL files can be
-     * filtered before WALEdits are passed to the mapper(s).
-     * @param logFileName
-     * @param fileSize
-     * @param startTime
-     * @param endTime
-     */
-    public WALSplit(String logFileName, long fileSize, long startTime, long endTime) {
-      this.logFileName = logFileName;
-      this.fileSize = fileSize;
-      this.startTime = startTime;
-      this.endTime = endTime;
-    }
-
-    @Override
-    public long getLength() throws IOException, InterruptedException {
-      return fileSize;
-    }
-
-    @Override
-    public String[] getLocations() throws IOException, InterruptedException {
-      // TODO: Find the data node with the most blocks for this WAL?
-      return new String[] {};
-    }
-
-    public String getLogFileName() {
-      return logFileName;
-    }
-
-    public long getStartTime() {
-      return startTime;
-    }
-
-    public long getEndTime() {
-      return endTime;
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      logFileName = in.readUTF();
-      fileSize = in.readLong();
-      startTime = in.readLong();
-      endTime = in.readLong();
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-      out.writeUTF(logFileName);
-      out.writeLong(fileSize);
-      out.writeLong(startTime);
-      out.writeLong(endTime);
-    }
-
-    @Override
-    public String toString() {
-      return logFileName + " (" + startTime + ":" + endTime + ") length:" + fileSize;
-    }
-  }
-
-  /**
-   * {@link RecordReader} for an {@link WAL} file.
-   * Implementation shared with deprecated HLogInputFormat.
-   */
-  static abstract class WALRecordReader<K extends WALKey> extends RecordReader<K, WALEdit> {
-    private Reader reader = null;
-    // visible until we can remove the deprecated HLogInputFormat
-    Entry currentEntry = new Entry();
-    private long startTime;
-    private long endTime;
-    private Configuration conf;
-    private Path logFile;
-    private long currentPos;
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext context)
-        throws IOException, InterruptedException {
-      WALSplit hsplit = (WALSplit)split;
-      logFile = new Path(hsplit.getLogFileName());
-      conf = context.getConfiguration();
-      LOG.info("Opening reader for "+split);
-      openReader(logFile);
-      this.startTime = hsplit.getStartTime();
-      this.endTime = hsplit.getEndTime();
-    }
-
-    private void openReader(Path path) throws IOException
-    {
-      closeReader();
-      reader = AbstractFSWALProvider.openReader(path, conf);
-      seek();
-      setCurrentPath(path);
-    }
-
-    private void setCurrentPath(Path path) {
-      this.logFile = path;
-    }
-
-    private void closeReader() throws IOException {
-      if (reader != null) {
-        reader.close();
-        reader = null;
-      }
-    }
-
-    private void seek() throws IOException {
-      if (currentPos != 0) {
-        reader.seek(currentPos);
-      }
-    }
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      if (reader == null) return false;
-      this.currentPos = reader.getPosition();
-      Entry temp;
-      long i = -1;
-      try {
-        do {
-          // skip older entries
-          try {
-            temp = reader.next(currentEntry);
-            i++;
-          } catch (EOFException x) {
-            LOG.warn("Corrupted entry detected. Ignoring the rest of the file."
-                + " (This is normal when a RegionServer crashed.)");
-            return false;
-          }
-        } while (temp != null && temp.getKey().getWriteTime() < startTime);
-
-        if (temp == null) {
-          if (i > 0) LOG.info("Skipped " + i + " entries.");
-          LOG.info("Reached end of file.");
-          return false;
-        } else if (i > 0) {
-          LOG.info("Skipped " + i + " entries, until ts: " + temp.getKey().getWriteTime() + ".");
-        }
-        boolean res = temp.getKey().getWriteTime() <= endTime;
-        if (!res) {
-          LOG.info("Reached ts: " + temp.getKey().getWriteTime()
-              + " ignoring the rest of the file.");
-        }
-        return res;
-      } catch (IOException e) {
-        Path archivedLog = AbstractFSWALProvider.getArchivedLogPath(logFile, conf);
-        if (logFile != archivedLog) {
-          openReader(archivedLog);
-          // Try call again in recursion
-          return nextKeyValue();
-        } else {
-          throw e;
-        }
-      }
-    }
-
-    @Override
-    public WALEdit getCurrentValue() throws IOException, InterruptedException {
-      return currentEntry.getEdit();
-    }
-
-    @Override
-    public float getProgress() throws IOException, InterruptedException {
-      // N/A depends on total number of entries, which is unknown
-      return 0;
-    }
-
-    @Override
-    public void close() throws IOException {
-      LOG.info("Closing reader");
-      if (reader != null) this.reader.close();
-    }
-  }
-
-  /**
-   * handler for non-deprecated WALKey version. fold into WALRecordReader once we no longer
-   * need to support HLogInputFormat.
-   */
-  static class WALKeyRecordReader extends WALRecordReader<WALKey> {
-    @Override
-    public WALKey getCurrentKey() throws IOException, InterruptedException {
-      return currentEntry.getKey();
-    }
-  }
-
-  @Override
-  public List<InputSplit> getSplits(JobContext context) throws IOException,
-      InterruptedException {
-    return getSplits(context, START_TIME_KEY, END_TIME_KEY);
-  }
-
-  /**
-   * implementation shared with deprecated HLogInputFormat
-   */
-  List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
-      throws IOException, InterruptedException {
-    Configuration conf = context.getConfiguration();
-    boolean ignoreMissing = conf.getBoolean(WALPlayer.IGNORE_MISSING_FILES, false);
-    Path[] inputPaths = getInputPaths(conf);
-    long startTime = conf.getLong(startKey, Long.MIN_VALUE);
-    long endTime = conf.getLong(endKey, Long.MAX_VALUE);
-
-    List<FileStatus> allFiles = new ArrayList<FileStatus>();
-    for(Path inputPath: inputPaths){
-      FileSystem fs = inputPath.getFileSystem(conf);
-      try {
-        List<FileStatus> files = getFiles(fs, inputPath, startTime, endTime);
-        allFiles.addAll(files);
-      } catch (FileNotFoundException e) {
-        if (ignoreMissing) {
-          LOG.warn("File "+ inputPath +" is missing. Skipping it.");
-          continue;
-        }
-        throw e;
-      }
-    }
-    List<InputSplit> splits = new ArrayList<InputSplit>(allFiles.size());
-    for (FileStatus file : allFiles) {
-      splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
-    }
-    return splits;
-  }
-
-  private Path[] getInputPaths(Configuration conf) {
-    String inpDirs = conf.get(FileInputFormat.INPUT_DIR);
-    return StringUtils.stringToPath(
-      inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ",")));
-  }
-
-  private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
-      throws IOException {
-    List<FileStatus> result = new ArrayList<>();
-    LOG.debug("Scanning " + dir.toString() + " for WAL files");
-
-    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
-    if (!iter.hasNext()) return Collections.emptyList();
-    while (iter.hasNext()) {
-      LocatedFileStatus file = iter.next();
-      if (file.isDirectory()) {
-        // recurse into sub directories
-        result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
-      } else {
-        String name = file.getPath().toString();
-        int idx = name.lastIndexOf('.');
-        if (idx > 0) {
-          try {
-            long fileStartTime = Long.parseLong(name.substring(idx+1));
-            if (fileStartTime <= endTime) {
-              LOG.info("Found: " + file);
-              result.add(file);
-            }
-          } catch (NumberFormatException x) {
-            idx = 0;
-          }
-        }
-        if (idx == 0) {
-          LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
-        }
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public RecordReader<WALKey, WALEdit> createRecordReader(InputSplit split,
-      TaskAttemptContext context) throws IOException, InterruptedException {
-    return new WALKeyRecordReader();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
deleted file mode 100644
index b1e655c..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A tool to replay WAL files as a M/R job.
- * The WAL can be replayed for a set of tables or all tables,
- * and a time range can be provided (in milliseconds).
- * The WAL is filtered to the passed set of tables and  the output
- * can optionally be mapped to another set of tables.
- *
- * WAL replay can also generate HFiles for later bulk importing,
- * in that case the WAL is replayed for a single table only.
- */
-@InterfaceAudience.Public
-public class WALPlayer extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(WALPlayer.class);
-  final static String NAME = "WALPlayer";
-  public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
-  public final static String TABLES_KEY = "wal.input.tables";
-  public final static String TABLE_MAP_KEY = "wal.input.tablesmap";
-  public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator";
-  public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files";
-
-
-  // This relies on Hadoop Configuration to handle warning about deprecated configs and
-  // to set the correct non-deprecated configs when an old one shows up.
-  static {
-    Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
-    Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
-    Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
-  }
-
-  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
-  public WALPlayer(){
-  }
-
-  protected WALPlayer(final Configuration c) {
-    super(c);
-  }
-
-  /**
-   * A mapper that just writes out KeyValues.
-   * This one can be used together with {@link KeyValueSortReducer}
-   */
-  static class WALKeyValueMapper
-    extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
-    private byte[] table;
-
-    @Override
-    public void map(WALKey key, WALEdit value,
-      Context context)
-    throws IOException {
-      try {
-        // skip all other tables
-        if (Bytes.equals(table, key.getTablename().getName())) {
-          for (Cell cell : value.getCells()) {
-            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
-            if (WALEdit.isMetaEditFamily(kv)) {
-              continue;
-            }
-            context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv);
-          }
-        }
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-
-    @Override
-    public void setup(Context context) throws IOException {
-      // only a single table is supported when HFiles are generated with HFileOutputFormat
-      String[] tables = context.getConfiguration().getStrings(TABLES_KEY);
-      if (tables == null || tables.length != 1) {
-        // this can only happen when WALMapper is used directly by a class other than WALPlayer
-        throw new IOException("Exactly one table must be specified for bulk HFile case.");
-      }
-      table = Bytes.toBytes(tables[0]);
-
-    }
-
-  }
-
-  /**
-   * A mapper that writes out {@link Mutation} to be directly applied to
-   * a running HBase instance.
-   */
-  protected static class WALMapper
-  extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
-    private Map<TableName, TableName> tables = new TreeMap<>();
-
-    @Override
-    public void map(WALKey key, WALEdit value, Context context)
-    throws IOException {
-      try {
-        if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
-          TableName targetTable = tables.isEmpty() ?
-                key.getTablename() :
-                tables.get(key.getTablename());
-          ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
-          Put put = null;
-          Delete del = null;
-          Cell lastCell = null;
-          for (Cell cell : value.getCells()) {
-            // filtering WAL meta entries
-            if (WALEdit.isMetaEditFamily(cell)) {
-              continue;
-            }
-
-            // Allow a subclass filter out this cell.
-            if (filter(context, cell)) {
-              // A WALEdit may contain multiple operations (HBASE-3584) and/or
-              // multiple rows (HBASE-5229).
-              // Aggregate as much as possible into a single Put/Delete
-              // operation before writing to the context.
-              if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
-                  || !CellUtil.matchingRow(lastCell, cell)) {
-                // row or type changed, write out aggregate KVs.
-                if (put != null) {
-                  context.write(tableOut, put);
-                }
-                if (del != null) {
-                  context.write(tableOut, del);
-                }
-                if (CellUtil.isDelete(cell)) {
-                  del = new Delete(CellUtil.cloneRow(cell));
-                } else {
-                  put = new Put(CellUtil.cloneRow(cell));
-                }
-              }
-              if (CellUtil.isDelete(cell)) {
-                del.add(cell);
-              } else {
-                put.add(cell);
-              }
-            }
-            lastCell = cell;
-          }
-          // write residual KVs
-          if (put != null) {
-            context.write(tableOut, put);
-          }
-          if (del != null) {
-            context.write(tableOut, del);
-          }
-        }
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-
-    protected boolean filter(Context context, final Cell cell) {
-      return true;
-    }
-
-    @Override
-    protected void
-        cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
-            throws IOException, InterruptedException {
-      super.cleanup(context);
-    }
-
-    @Override
-    public void setup(Context context) throws IOException {
-      String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
-      String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
-      if (tableMap == null) {
-        tableMap = tablesToUse;
-      }
-      if (tablesToUse == null) {
-        // Then user wants all tables.
-      } else if (tablesToUse.length != tableMap.length) {
-        // this can only happen when WALMapper is used directly by a class other than WALPlayer
-        throw new IOException("Incorrect table mapping specified .");
-      }
-      int i = 0;
-      if (tablesToUse != null) {
-        for (String table : tablesToUse) {
-          tables.put(TableName.valueOf(table),
-            TableName.valueOf(tableMap[i++]));
-        }
-      }
-    }
-  }
-
-  void setupTime(Configuration conf, String option) throws IOException {
-    String val = conf.get(option);
-    if (null == val) {
-      return;
-    }
-    long ms;
-    try {
-      // first try to parse in user friendly form
-      ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
-    } catch (ParseException pe) {
-      try {
-        // then see if just a number of ms's was specified
-        ms = Long.parseLong(val);
-      } catch (NumberFormatException nfe) {
-        throw new IOException(option
-            + " must be specified either in the form 2001-02-20T16:35:06.99 "
-            + "or as number of milliseconds");
-      }
-    }
-    conf.setLong(option, ms);
-  }
-
-  /**
-   * Sets up the actual job.
-   *
-   * @param args  The command line parameters.
-   * @return The newly created job.
-   * @throws IOException When setting up the job fails.
-   */
-  public Job createSubmittableJob(String[] args) throws IOException {
-    Configuration conf = getConf();
-    setupTime(conf, WALInputFormat.START_TIME_KEY);
-    setupTime(conf, WALInputFormat.END_TIME_KEY);
-    String inputDirs = args[0];
-    String[] tables = args[1].split(",");
-    String[] tableMap;
-    if (args.length > 2) {
-      tableMap = args[2].split(",");
-      if (tableMap.length != tables.length) {
-        throw new IOException("The same number of tables and mapping must be provided.");
-      }
-    } else {
-      // if not mapping is specified map each table to itself
-      tableMap = tables;
-    }
-    conf.setStrings(TABLES_KEY, tables);
-    conf.setStrings(TABLE_MAP_KEY, tableMap);
-    conf.set(FileInputFormat.INPUT_DIR, inputDirs);
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
-    job.setJarByClass(WALPlayer.class);
-
-    job.setInputFormatClass(WALInputFormat.class);
-    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-
-    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
-    if (hfileOutPath != null) {
-      LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
-
-      // the bulk HFile case
-      if (tables.length != 1) {
-        throw new IOException("Exactly one table must be specified for the bulk export option");
-      }
-      TableName tableName = TableName.valueOf(tables[0]);
-      job.setMapperClass(WALKeyValueMapper.class);
-      job.setReducerClass(KeyValueSortReducer.class);
-      Path outputDir = new Path(hfileOutPath);
-      FileOutputFormat.setOutputPath(job, outputDir);
-      job.setMapOutputValueClass(KeyValue.class);
-      try (Connection conn = ConnectionFactory.createConnection(conf);
-          Table table = conn.getTable(tableName);
-          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
-        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
-      }
-      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-          org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
-    } else {
-      // output to live cluster
-      job.setMapperClass(WALMapper.class);
-      job.setOutputFormatClass(MultiTableOutputFormat.class);
-      TableMapReduceUtil.addDependencyJars(job);
-      TableMapReduceUtil.initCredentials(job);
-      // No reducers.
-      job.setNumReduceTasks(0);
-    }
-    String codecCls = WALCellCodec.getWALCellCodecClass(conf);
-    try {
-      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls));
-    } catch (Exception e) {
-      throw new IOException("Cannot determine wal codec class " + codecCls, e);
-    }
-    return job;
-  }
-
-
-  /**
-   * Print usage
-   * @param errorMsg Error message.  Can be null.
-   */
-  private void usage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
-    System.err.println("Read all WAL entries for <tables>.");
-    System.err.println("If no tables (\"\") are specific, all tables are imported.");
-    System.err.println("(Careful, even hbase:meta entries will be imported"+
-      " in that case.)");
-    System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
-    System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
-    System.err.println("<tableMapping> is a command separated list of targettables.");
-    System.err.println("If specified, each table in <tables> must have a mapping.\n");
-    System.err.println("By default " + NAME + " will load data directly into HBase.");
-    System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
-    System.err.println("  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
-    System.err.println("  (Only one table can be specified, and no mapping is allowed!)");
-    System.err.println("Other options: (specify time range to WAL edit to consider)");
-    System.err.println("  -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
-    System.err.println("  -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
-    System.err.println("   -D " + JOB_NAME_CONF_KEY
-      + "=jobName - use the specified mapreduce job name for the wal player");
-    System.err.println("For performance also consider the following options:\n"
-      + "  -Dmapreduce.map.speculative=false\n"
-      + "  -Dmapreduce.reduce.speculative=false");
-  }
-
-  /**
-   * Main entry point.
-   *
-   * @param args  The command line parameters.
-   * @throws Exception When running the job fails.
-   */
-  public static void main(String[] args) throws Exception {
-    int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
-    System.exit(ret);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length < 2) {
-      usage("Wrong number of arguments: " + args.length);
-      System.exit(-1);
-    }
-    Job job = createSubmittableJob(args);
-    return job.waitForCompletion(true) ? 0 : 1;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
deleted file mode 100644
index 199e168..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
-Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
-Input/OutputFormats, a table indexing MapReduce job, and utility methods.
-
-<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
-in the HBase Reference Guide for mapreduce over hbase documentation. 
-*/
-package org.apache.hadoop.hbase.mapreduce;

[08/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
deleted file mode 100644
index ac2f20d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableSet;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTableMapReduceUtil {
-
-  private static final Log LOG = LogFactory
-      .getLog(TestTableMapReduceUtil.class);
-
-  private static Table presidentsTable;
-  private static final String TABLE_NAME = "People";
-
-  private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info");
-  private static final byte[] COLUMN_QUALIFIER = Bytes.toBytes("name");
-
-  private static ImmutableSet<String> presidentsRowKeys = ImmutableSet.of(
-      "president1", "president2", "president3");
-  private static Iterator<String> presidentNames = ImmutableSet.of(
-      "John F. Kennedy", "George W. Bush", "Barack Obama").iterator();
-
-  private static ImmutableSet<String> actorsRowKeys = ImmutableSet.of("actor1",
-      "actor2");
-  private static Iterator<String> actorNames = ImmutableSet.of(
-      "Jack Nicholson", "Martin Freeman").iterator();
-
-  private static String PRESIDENT_PATTERN = "president";
-  private static String ACTOR_PATTERN = "actor";
-  private static ImmutableMap<String, ImmutableSet<String>> relation = ImmutableMap
-      .of(PRESIDENT_PATTERN, presidentsRowKeys, ACTOR_PATTERN, actorsRowKeys);
-
-  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-    presidentsTable = createAndFillTable(TableName.valueOf(TABLE_NAME));
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void before() throws IOException {
-    LOG.info("before");
-    UTIL.ensureSomeRegionServersAvailable(1);
-    LOG.info("before done");
-  }
-
-  public static Table createAndFillTable(TableName tableName) throws IOException {
-    Table table = UTIL.createTable(tableName, COLUMN_FAMILY);
-    createPutCommand(table);
-    return table;
-  }
-
-  private static void createPutCommand(Table table) throws IOException {
-    for (String president : presidentsRowKeys) {
-      if (presidentNames.hasNext()) {
-        Put p = new Put(Bytes.toBytes(president));
-        p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(presidentNames.next()));
-        table.put(p);
-      }
-    }
-
-    for (String actor : actorsRowKeys) {
-      if (actorNames.hasNext()) {
-        Put p = new Put(Bytes.toBytes(actor));
-        p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(actorNames.next()));
-        table.put(p);
-      }
-    }
-  }
-
-  /**
-   * Check what the given number of reduce tasks for the given job configuration
-   * does not exceed the number of regions for the given table.
-   */
-  @Test
-  public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
-      throws IOException {
-    Assert.assertNotNull(presidentsTable);
-    Configuration cfg = UTIL.getConfiguration();
-    JobConf jobConf = new JobConf(cfg);
-    TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
-    TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
-    TableMapReduceUtil.setScannerCaching(jobConf, 100);
-    assertEquals(1, jobConf.getNumReduceTasks());
-    assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));
-
-    jobConf.setNumReduceTasks(10);
-    TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
-    TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
-    assertEquals(1, jobConf.getNumReduceTasks());
-  }
-
-  @Test
-  public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
-      throws IOException {
-    Configuration cfg = UTIL.getConfiguration();
-    JobConf jobConf = new JobConf(cfg);
-    TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
-    TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
-    assertEquals(1, jobConf.getNumMapTasks());
-
-    jobConf.setNumMapTasks(10);
-    TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
-    TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
-    assertEquals(1, jobConf.getNumMapTasks());
-  }
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void shoudBeValidMapReduceEvaluation() throws Exception {
-    Configuration cfg = UTIL.getConfiguration();
-    JobConf jobConf = new JobConf(cfg);
-    try {
-      jobConf.setJobName("process row task");
-      jobConf.setNumReduceTasks(1);
-      TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
-          ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
-          jobConf);
-      TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
-          ClassificatorRowReduce.class, jobConf);
-      RunningJob job = JobClient.runJob(jobConf);
-      assertTrue(job.isSuccessful());
-    } finally {
-      if (jobConf != null)
-        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
-    }
-  }
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void shoudBeValidMapReduceWithPartitionerEvaluation()
-      throws IOException {
-    Configuration cfg = UTIL.getConfiguration();
-    JobConf jobConf = new JobConf(cfg);
-    try {
-      jobConf.setJobName("process row task");
-      jobConf.setNumReduceTasks(2);
-      TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
-          ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
-          jobConf);
-
-      TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
-          ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
-      RunningJob job = JobClient.runJob(jobConf);
-      assertTrue(job.isSuccessful());
-    } finally {
-      if (jobConf != null)
-        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
-    }
-  }
-
-  @SuppressWarnings("deprecation")
-  static class ClassificatorRowReduce extends MapReduceBase implements
-      TableReduce<ImmutableBytesWritable, Put> {
-
-    @Override
-    public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
-        OutputCollector<ImmutableBytesWritable, Put> output, Reporter reporter)
-        throws IOException {
-      String strKey = Bytes.toString(key.get());
-      List<Put> result = new ArrayList<>();
-      while (values.hasNext())
-        result.add(values.next());
-
-      if (relation.keySet().contains(strKey)) {
-        Set<String> set = relation.get(strKey);
-        if (set != null) {
-          assertEquals(set.size(), result.size());
-        } else {
-          throwAccertionError("Test infrastructure error: set is null");
-        }
-      } else {
-        throwAccertionError("Test infrastructure error: key not found in map");
-      }
-    }
-
-    private void throwAccertionError(String errorMessage) throws AssertionError {
-      throw new AssertionError(errorMessage);
-    }
-  }
-
-  @SuppressWarnings("deprecation")
-  static class ClassificatorMapper extends MapReduceBase implements
-      TableMap<ImmutableBytesWritable, Put> {
-
-    @Override
-    public void map(ImmutableBytesWritable row, Result result,
-        OutputCollector<ImmutableBytesWritable, Put> outCollector,
-        Reporter reporter) throws IOException {
-      String rowKey = Bytes.toString(result.getRow());
-      final ImmutableBytesWritable pKey = new ImmutableBytesWritable(
-          Bytes.toBytes(PRESIDENT_PATTERN));
-      final ImmutableBytesWritable aKey = new ImmutableBytesWritable(
-          Bytes.toBytes(ACTOR_PATTERN));
-      ImmutableBytesWritable outKey = null;
-
-      if (rowKey.startsWith(PRESIDENT_PATTERN)) {
-        outKey = pKey;
-      } else if (rowKey.startsWith(ACTOR_PATTERN)) {
-        outKey = aKey;
-      } else {
-        throw new AssertionError("unexpected rowKey");
-      }
-
-      String name = Bytes.toString(result.getValue(COLUMN_FAMILY,
-          COLUMN_QUALIFIER));
-      outCollector.collect(outKey,
-              new Put(Bytes.toBytes("rowKey2"))
-              .addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(name)));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
deleted file mode 100644
index 835117c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-
-import static org.junit.Assert.fail;
-
-/**
- * Spark creates many instances of TableOutputFormat within a single process.  We need to make
- * sure we can have many instances and not leak connections.
- *
- * This test creates a few TableOutputFormats and shouldn't fail due to ZK connection exhaustion.
- */
-@Category(MediumTests.class)
-public class TestTableOutputFormatConnectionExhaust {
-
-  private static final Log LOG =
-      LogFactory.getLog(TestTableOutputFormatConnectionExhaust.class);
-
-  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  static final String TABLE = "TestTableOutputFormatConnectionExhaust";
-  static final String FAMILY = "family";
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    // Default in ZookeeperMiniCluster is 1000, setting artificially low to trigger exhaustion.
-    // need min of 7 to properly start the default mini HBase cluster
-    UTIL.getConfiguration().setInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 10);
-    UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void before() throws IOException {
-    LOG.info("before");
-    UTIL.ensureSomeRegionServersAvailable(1);
-    LOG.info("before done");
-  }
-
-  /**
-   * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
-   * Connection (ZK) resources, and will throw exception if they are exhausted.
-   */
-  static void openCloseTableOutputFormat(int iter)  throws IOException {
-    LOG.info("Instantiating TableOutputFormat connection  " + iter);
-    JobConf conf = new JobConf();
-    conf.addResource(UTIL.getConfiguration());
-    conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
-    TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
-        ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
-    TableOutputFormat tof = new TableOutputFormat();
-    RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
-    rw.close(null);
-  }
-
-  @Test
-  public void testConnectionExhaustion() throws IOException {
-    int MAX_INSTANCES = 5; // fails on iteration 3 if zk connections leak
-    for (int i = 0; i < MAX_INSTANCES; i++) {
-      final int iter = i;
-      try {
-        openCloseTableOutputFormat(iter);
-      } catch (Exception e) {
-        LOG.error("Exception encountered", e);
-        fail("Failed on iteration " + i);
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
deleted file mode 100644
index c689c83..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,271 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import static org.mockito.Mockito.mock;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
-
-  private static final byte[] aaa = Bytes.toBytes("aaa");
-  private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
-  private static final String COLUMNS =
-    Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
-
-  @Rule
-  public TestName name = new TestName();
-
-  @Override
-  protected byte[] getStartRow() {
-    return aaa;
-  }
-
-  @Override
-  protected byte[] getEndRow() {
-    return after_zzz;
-  }
-
-  static class TestTableSnapshotMapper extends MapReduceBase
-      implements  TableMap<ImmutableBytesWritable, NullWritable> {
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-        OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
-        throws IOException {
-      verifyRowFromMap(key, value);
-      collector.collect(key, NullWritable.get());
-    }
-  }
-
-  public static class TestTableSnapshotReducer extends MapReduceBase
-      implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
-    HBaseTestingUtility.SeenRowTracker rowTracker =
-      new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
-
-    @Override
-    public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
-        OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
-        throws IOException {
-      rowTracker.addRow(key.get());
-    }
-
-    @Override
-    public void close() {
-      rowTracker.validate();
-    }
-  }
-
-  @Test
-  public void testInitTableSnapshotMapperJobConfig() throws Exception {
-    setupCluster();
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    String snapshotName = "foo";
-
-    try {
-      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
-      JobConf job = new JobConf(UTIL.getConfiguration());
-      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
-      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
-        COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-        NullWritable.class, job, false, tmpTableDir);
-
-      // TODO: would be better to examine directly the cache instance that results from this
-      // config. Currently this is not possible because BlockCache initialization is static.
-      Assert.assertEquals(
-        "Snapshot job should be configured for default LruBlockCache.",
-        HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
-        job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
-      Assert.assertEquals(
-        "Snapshot job should not use BucketCache.",
-        0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
-    } finally {
-      UTIL.getAdmin().deleteSnapshot(snapshotName);
-      UTIL.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  // TODO: mapred does not support limiting input range by startrow, endrow.
-  // Thus the following tests must override parameterverification.
-
-  @Test
-  @Override
-  public void testWithMockedMapReduceMultiRegion() throws Exception {
-    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
-  }
-
-  @Test
-  @Override
-  public void testWithMapReduceMultiRegion() throws Exception {
-    testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
-  }
-
-  @Test
-  @Override
-  // run the MR job while HBase is offline
-  public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
-    testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
-  }
-
-  @Override
-  public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
-      String snapshotName, Path tmpTableDir) throws Exception {
-    JobConf job = new JobConf(UTIL.getConfiguration());
-    TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
-      COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-      NullWritable.class, job, false, tmpTableDir);
-  }
-
-  @Override
-  protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
-      int numRegions, int expectedNumSplits) throws Exception {
-    setupCluster();
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    try {
-      createTableAndSnapshot(
-        util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
-
-      JobConf job = new JobConf(util.getConfiguration());
-      Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
-
-      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
-        COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-        NullWritable.class, job, false, tmpTableDir);
-
-      // mapred doesn't support start and end keys? o.O
-      verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
-
-    } finally {
-      util.getAdmin().deleteSnapshot(snapshotName);
-      util.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
-      byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
-    TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
-    InputSplit[] splits = tsif.getSplits(job, 0);
-
-    Assert.assertEquals(expectedNumSplits, splits.length);
-
-    HBaseTestingUtility.SeenRowTracker rowTracker =
-      new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
-
-    for (int i = 0; i < splits.length; i++) {
-      // validate input split
-      InputSplit split = splits[i];
-      Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
-
-      // validate record reader
-      OutputCollector collector = mock(OutputCollector.class);
-      Reporter reporter = mock(Reporter.class);
-      RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
-
-      // validate we can read all the data back
-      ImmutableBytesWritable key = rr.createKey();
-      Result value = rr.createValue();
-      while (rr.next(key, value)) {
-        verifyRowFromMap(key, value);
-        rowTracker.addRow(key.copyBytes());
-      }
-
-      rr.close();
-    }
-
-    // validate all rows are seen
-    rowTracker.validate();
-  }
-
-  @Override
-  protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
-      String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
-      boolean shutdownCluster) throws Exception {
-    doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
-      numRegions, expectedNumSplits, shutdownCluster);
-  }
-
-  // this is also called by the IntegrationTestTableSnapshotInputFormat
-  public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
-      String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
-      int expectedNumSplits, boolean shutdownCluster) throws Exception {
-
-    //create the table and snapshot
-    createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
-
-    if (shutdownCluster) {
-      util.shutdownMiniHBaseCluster();
-    }
-
-    try {
-      // create the job
-      JobConf jobConf = new JobConf(util.getConfiguration());
-
-      jobConf.setJarByClass(util.getClass());
-      org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
-        TestTableSnapshotInputFormat.class);
-
-      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
-        TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-        NullWritable.class, jobConf, true, tableDir);
-
-      jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
-      jobConf.setNumReduceTasks(1);
-      jobConf.setOutputFormat(NullOutputFormat.class);
-
-      RunningJob job = JobClient.runJob(jobConf);
-      Assert.assertTrue(job.isSuccessful());
-    } finally {
-      if (!shutdownCluster) {
-        util.getAdmin().deleteSnapshot(snapshotName);
-        util.deleteTable(tableName);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
deleted file mode 100644
index b342f64..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.security.UserProvider;
-
-/**
- * A {@link UserProvider} that always says hadoop security is enabled, regardless of the underlying
- * configuration. HBase security is <i>not enabled</i> as this is used to determine if SASL is used
- * to do the authentication, which requires a Kerberos ticket (which we currently don't have in
- * tests).
- * <p>
- * This should only be used for <b>TESTING</b>.
- */
-public class HadoopSecurityEnabledUserProviderForTesting extends UserProvider {
-
-  @Override
-  public boolean isHBaseSecurityEnabled() {
-    return false;
-  }
-
-  @Override
-  public boolean isHadoopSecurityEnabled() {
-    return true;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
deleted file mode 100644
index c717fa9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Base set of tests and setup for input formats touching multiple tables.
- */
-public abstract class MultiTableInputFormatTestBase {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
-  public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  static final String TABLE_NAME = "scantest";
-  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
-  static final String KEY_STARTROW = "startRow";
-  static final String KEY_LASTROW = "stpRow";
-
-  static List<String> TABLES = Lists.newArrayList();
-
-  static {
-    for (int i = 0; i < 3; i++) {
-      TABLES.add(TABLE_NAME + String.valueOf(i));
-    }
-  }
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    // switch TIF to log at DEBUG level
-    TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
-    // start mini hbase cluster
-    TEST_UTIL.startMiniCluster(3);
-    // create and fill table
-    for (String tableName : TABLES) {
-      try (Table table =
-          TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
-            INPUT_FAMILY, 4)) {
-        TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
-      }
-    }
-  }
-
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    Configuration c = TEST_UTIL.getConfiguration();
-    FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
-  }
-
-  /**
-   * Pass the key and value to reducer.
-   */
-  public static class ScanMapper extends
-      TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
-    /**
-     * Pass the key and value to reduce.
-     *
-     * @param key The key, here "aaa", "aab" etc.
-     * @param value The value is the same as the key.
-     * @param context The task context.
-     * @throws IOException When reading the rows fails.
-     */
-    @Override
-    public void map(ImmutableBytesWritable key, Result value, Context context)
-        throws IOException, InterruptedException {
-      makeAssertions(key, value);
-      context.write(key, key);
-    }
-
-    public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
-      if (value.size() != 1) {
-        throw new IOException("There should only be one input column");
-      }
-      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
-          value.getMap();
-      if (!cf.containsKey(INPUT_FAMILY)) {
-        throw new IOException("Wrong input columns. Missing: '" +
-            Bytes.toString(INPUT_FAMILY) + "'.");
-      }
-      String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
-      LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
-          ", value -> " + val);
-    }
-  }
-
-  /**
-   * Checks the last and first keys seen against the scanner boundaries.
-   */
-  public static class ScanReducer
-      extends
-      Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
-          NullWritable, NullWritable> {
-    private String first = null;
-    private String last = null;
-
-    @Override
-    protected void reduce(ImmutableBytesWritable key,
-        Iterable<ImmutableBytesWritable> values, Context context)
-        throws IOException, InterruptedException {
-      makeAssertions(key, values);
-    }
-
-    protected void makeAssertions(ImmutableBytesWritable key,
-        Iterable<ImmutableBytesWritable> values) {
-      int count = 0;
-      for (ImmutableBytesWritable value : values) {
-        String val = Bytes.toStringBinary(value.get());
-        LOG.debug("reduce: key[" + count + "] -> " +
-            Bytes.toStringBinary(key.get()) + ", value -> " + val);
-        if (first == null) first = val;
-        last = val;
-        count++;
-      }
-      assertEquals(3, count);
-    }
-
-    @Override
-    protected void cleanup(Context context) throws IOException,
-        InterruptedException {
-      Configuration c = context.getConfiguration();
-      cleanup(c);
-    }
-
-    protected void cleanup(Configuration c) {
-      String startRow = c.get(KEY_STARTROW);
-      String lastRow = c.get(KEY_LASTROW);
-      LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
-          startRow + "\"");
-      LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
-          "\"");
-      if (startRow != null && startRow.length() > 0) {
-        assertEquals(startRow, first);
-      }
-      if (lastRow != null && lastRow.length() > 0) {
-        assertEquals(lastRow, last);
-      }
-    }
-  }
-
-  @Test
-  public void testScanEmptyToEmpty() throws IOException, InterruptedException,
-      ClassNotFoundException {
-    testScan(null, null, null);
-  }
-
-  @Test
-  public void testScanEmptyToAPP() throws IOException, InterruptedException,
-      ClassNotFoundException {
-    testScan(null, "app", "apo");
-  }
-
-  @Test
-  public void testScanOBBToOPP() throws IOException, InterruptedException,
-      ClassNotFoundException {
-    testScan("obb", "opp", "opo");
-  }
-
-  @Test
-  public void testScanYZYToEmpty() throws IOException, InterruptedException,
-      ClassNotFoundException {
-    testScan("yzy", null, "zzz");
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  private void testScan(String start, String stop, String last)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    String jobName =
-        "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
-            (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
-    LOG.info("Before map/reduce startup - job " + jobName);
-    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
-
-    c.set(KEY_STARTROW, start != null ? start : "");
-    c.set(KEY_LASTROW, last != null ? last : "");
-
-    List<Scan> scans = new ArrayList<>();
-
-    for (String tableName : TABLES) {
-      Scan scan = new Scan();
-
-      scan.addFamily(INPUT_FAMILY);
-      scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
-
-      if (start != null) {
-        scan.setStartRow(Bytes.toBytes(start));
-      }
-      if (stop != null) {
-        scan.setStopRow(Bytes.toBytes(stop));
-      }
-
-      scans.add(scan);
-
-      LOG.info("scan before: " + scan);
-    }
-
-    runJob(jobName, c, scans);
-  }
-
-  protected void runJob(String jobName, Configuration c, List<Scan> scans)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    Job job = new Job(c, jobName);
-
-    initJob(scans, job);
-    job.setReducerClass(ScanReducer.class);
-    job.setNumReduceTasks(1); // one to get final "first" and "last" key
-    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
-    LOG.info("Started " + job.getJobName());
-    job.waitForCompletion(true);
-    assertTrue(job.isSuccessful());
-    LOG.info("After map/reduce completion - job " + jobName);
-  }
-
-  protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
-
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
deleted file mode 100644
index efacca9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Input format that creates a configurable number of map tasks
- * each provided with a single row of NullWritables. This can be
- * useful when trying to write mappers which don't have any real
- * input (eg when the mapper is simply producing random data as output)
- */
-public class NMapInputFormat extends InputFormat<NullWritable, NullWritable> {
-  private static final String NMAPS_KEY = "nmapinputformat.num.maps";
-
-  @Override
-  public RecordReader<NullWritable, NullWritable> createRecordReader(
-      InputSplit split,
-      TaskAttemptContext tac) throws IOException, InterruptedException {
-    return new SingleRecordReader<>(NullWritable.get(), NullWritable.get());
-  }
-
-  @Override
-  public List<InputSplit> getSplits(JobContext context) throws IOException,
-      InterruptedException {
-    int count = getNumMapTasks(context.getConfiguration());
-    List<InputSplit> splits = new ArrayList<>(count);
-    for (int i = 0; i < count; i++) {
-      splits.add(new NullInputSplit());
-    }
-    return splits;
-  }
-
-  public static void setNumMapTasks(Configuration conf, int numTasks) {
-    conf.setInt(NMAPS_KEY, numTasks);
-  }
-
-  public static int getNumMapTasks(Configuration conf) {
-    return conf.getInt(NMAPS_KEY, 1);
-  }
-
-  private static class NullInputSplit extends InputSplit implements Writable {
-    @Override
-    public long getLength() throws IOException, InterruptedException {
-      return 0;
-    }
-
-    @Override
-    public String[] getLocations() throws IOException, InterruptedException {
-      return new String[] {};
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-    }
-  }
-  
-  private static class SingleRecordReader<K, V>
-    extends RecordReader<K, V> {
-    
-    private final K key;
-    private final V value;
-    boolean providedKey = false;
-
-    SingleRecordReader(K key, V value) {
-      this.key = key;
-      this.value = value;
-    }
-
-    @Override
-    public void close() {
-    }
-
-    @Override
-    public K getCurrentKey() {
-      return key;
-    }
-
-    @Override
-    public V getCurrentValue(){
-      return value;
-    }
-
-    @Override
-    public float getProgress() {
-      return 0;
-    }
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext tac) {
-    }
-
-    @Override
-    public boolean nextKeyValue() {
-      if (providedKey) return false;
-      providedKey = true;
-      return true;
-    }
-    
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
deleted file mode 100644
index fa47253..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellScanner;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.HFileLink;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.HFileArchiveUtil;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-import static org.junit.Assert.assertFalse;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-public abstract class TableSnapshotInputFormatTestBase {
-  private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatTestBase.class);
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  protected final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  protected static final int NUM_REGION_SERVERS = 2;
-  protected static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
-
-  protected FileSystem fs;
-  protected Path rootDir;
-
-  public void setupCluster() throws Exception {
-    setupConf(UTIL.getConfiguration());
-    UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
-    rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
-    fs = rootDir.getFileSystem(UTIL.getConfiguration());
-  }
-
-  public void tearDownCluster() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  private static void setupConf(Configuration conf) {
-    // Enable snapshot
-    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
-  }
-
-  protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
-    int numRegions, int expectedNumSplits) throws Exception;
-
-  protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
-    String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
-    boolean shutdownCluster) throws Exception;
-
-  protected abstract byte[] getStartRow();
-
-  protected abstract byte[] getEndRow();
-
-  @Test
-  public void testWithMockedMapReduceSingleRegion() throws Exception {
-    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
-  }
-
-  @Test
-  public void testWithMockedMapReduceMultiRegion() throws Exception {
-    testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
-  }
-
-  @Test
-  public void testWithMapReduceSingleRegion() throws Exception {
-    testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
-  }
-
-  @Test
-  public void testWithMapReduceMultiRegion() throws Exception {
-    testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
-  }
-
-  @Test
-  // run the MR job while HBase is offline
-  public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
-    testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
-  }
-
-  // Test that snapshot restore does not create back references in the HBase root dir.
-  @Test
-  public void testRestoreSnapshotDoesNotCreateBackRefLinks() throws Exception {
-    setupCluster();
-    TableName tableName = TableName.valueOf("testRestoreSnapshotDoesNotCreateBackRefLinks");
-    String snapshotName = "foo";
-
-    try {
-      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
-
-      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
-      testRestoreSnapshotDoesNotCreateBackRefLinksInit(tableName, snapshotName,tmpTableDir);
-
-      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
-      for (Path regionDir : FSUtils.getRegionDirs(fs, FSUtils.getTableDir(rootDir, tableName))) {
-        for (Path storeDir : FSUtils.getFamilyDirs(fs, regionDir)) {
-          for (FileStatus status : fs.listStatus(storeDir)) {
-            System.out.println(status.getPath());
-            if (StoreFileInfo.isValid(status)) {
-              Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(UTIL.getConfiguration(),
-                tableName, regionDir.getName(), storeDir.getName());
-
-              Path path = HFileLink.getBackReferencesDir(storeDir, status.getPath().getName());
-              // assert back references directory is empty
-              assertFalse("There is a back reference in " + path, fs.exists(path));
-
-              path = HFileLink.getBackReferencesDir(archiveStoreDir, status.getPath().getName());
-              // assert back references directory is empty
-              assertFalse("There is a back reference in " + path, fs.exists(path));
-            }
-          }
-        }
-      }
-    } finally {
-      UTIL.getAdmin().deleteSnapshot(snapshotName);
-      UTIL.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  public abstract void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
-      String snapshotName, Path tmpTableDir) throws Exception;
-
-  protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
-      int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
-    setupCluster();
-    try {
-      Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
-      TableName tableName = TableName.valueOf("testWithMapReduce");
-      testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
-        expectedNumSplits, shutdownCluster);
-    } finally {
-      tearDownCluster();
-    }
-  }
-
-  protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result)
-    throws IOException {
-    byte[] row = key.get();
-    CellScanner scanner = result.cellScanner();
-    while (scanner.advance()) {
-      Cell cell = scanner.current();
-
-      //assert that all Cells in the Result have the same key
-      Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
-        cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
-    }
-
-    for (int j = 0; j < FAMILIES.length; j++) {
-      byte[] actual = result.getValue(FAMILIES[j], FAMILIES[j]);
-      Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
-        + " ,actual:" + Bytes.toString(actual), row, actual);
-    }
-  }
-
-  protected static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
-    String snapshotName, byte[] startRow, byte[] endRow, int numRegions)
-    throws Exception {
-    try {
-      LOG.debug("Ensuring table doesn't exist.");
-      util.deleteTable(tableName);
-    } catch(Exception ex) {
-      // ignore
-    }
-
-    LOG.info("creating table '" + tableName + "'");
-    if (numRegions > 1) {
-      util.createTable(tableName, FAMILIES, 1, startRow, endRow, numRegions);
-    } else {
-      util.createTable(tableName, FAMILIES);
-    }
-    Admin admin = util.getAdmin();
-
-    LOG.info("put some stuff in the table");
-    Table table = util.getConnection().getTable(tableName);
-    util.loadTable(table, FAMILIES);
-
-    Path rootDir = FSUtils.getRootDir(util.getConfiguration());
-    FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
-
-    LOG.info("snapshot");
-    SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
-      Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
-
-    LOG.info("load different values");
-    byte[] value = Bytes.toBytes("after_snapshot_value");
-    util.loadTable(table, FAMILIES, value);
-
-    LOG.info("cause flush to create new files in the region");
-    admin.flush(tableName);
-    table.close();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
deleted file mode 100644
index ff623cb..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
+++ /dev/null
@@ -1,376 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.io.*;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestCellCounter {
-  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
-  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
-  private static final String FAMILY_A_STRING = "a";
-  private static final String FAMILY_B_STRING = "b";
-  private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
-  private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
-  private static final byte[] QUALIFIER = Bytes.toBytes("q");
-
-  private static Path FQ_OUTPUT_DIR;
-  private static final String OUTPUT_DIR = "target" + File.separator + "test-data" + File.separator
-      + "output";
-  private static long now = System.currentTimeMillis();
-
-  @Rule
-  public TestName name = new TestName();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-    FQ_OUTPUT_DIR = new Path(OUTPUT_DIR).makeQualified(new LocalFileSystem());
-    FileUtil.fullyDelete(new File(OUTPUT_DIR));
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Test CellCounter all data should print to output
-   *
-   */
-  @Test (timeout=300000)
-  public void testCellCounter() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try{
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1" };
-      runCount(args);
-      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
-          "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
-      assertTrue(data.contains("b;q" + "\t" + "1"));
-      assertTrue(data.contains("a;q" + "\t" + "1"));
-      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
-    }finally{
-      t.close();
-      FileUtil.fullyDelete(new File(OUTPUT_DIR));
-    }
-  }
-
-  /**
-   * Test CellCounter all data should print to output
-   */
-  @Test(timeout = 300000)
-  public void testCellCounterPrefix() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try {
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1" };
-      runCount(args);
-      FileInputStream inputStream =
-          new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
-      assertTrue(data.contains("b;q" + "\t" + "1"));
-      assertTrue(data.contains("a;q" + "\t" + "1"));
-      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
-    } finally {
-      t.close();
-      FileUtil.fullyDelete(new File(OUTPUT_DIR));
-    }
-  }
-
-  /**
-   * Test CellCounter with time range all data should print to output
-   */
-  @Test (timeout=300000)
-  public void testCellCounterStartTimeRange() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try{
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = {
-          sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "^row1",
-          "--starttime=" + now,
-          "--endtime=" + now + 2 };
-      runCount(args);
-      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
-          "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
-      assertTrue(data.contains("b;q" + "\t" + "1"));
-      assertTrue(data.contains("a;q" + "\t" + "1"));
-      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
-    }finally{
-      t.close();
-      FileUtil.fullyDelete(new File(OUTPUT_DIR));
-    }
-  }
-
-  /**
-   * Test CellCounter with time range all data should print to output
-   */
-  @Test (timeout=300000)
-  public void testCellCounteEndTimeRange() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try{
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = {
-          sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "^row1",
-          "--endtime=" + now + 1 };
-      runCount(args);
-      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
-          "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
-      assertTrue(data.contains("b;q" + "\t" + "1"));
-      assertTrue(data.contains("a;q" + "\t" + "1"));
-      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
-    }finally{
-      t.close();
-      FileUtil.fullyDelete(new File(OUTPUT_DIR));
-    }
-  }
-
-  /**
-   * Test CellCounter with time range all data should print to output
-   */
-  @Test (timeout=300000)
-  public void testCellCounteOutOfTimeRange() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try{
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = {
-      sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(),  ";", "--starttime=" + now + 1,
-          "--endtime=" + now + 2 };
-
-      runCount(args);
-      FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
-          "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-    inputStream.close();
-      // nothing should hace been emitted to the reducer
-      assertTrue(data.isEmpty());
-    }finally{
-      t.close();
-      FileUtil.fullyDelete(new File(OUTPUT_DIR));
-    }
-  }
-
-
-  private boolean runCount(String[] args) throws Exception {
-    // need to make a copy of the configuration because to make sure
-    // different temp dirs are used.
-    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new CellCounter(),
-        args);
-    return status == 0;
-  }
-
-  /**
-   * Test main method of CellCounter
-   */
-  @Test (timeout=300000)
-  public void testCellCounterMain() throws Exception {
-
-    PrintStream oldPrintStream = System.err;
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    String[] args = {};
-    System.setErr(new PrintStream(data));
-    try {
-      System.setErr(new PrintStream(data));
-
-      try {
-        CellCounter.main(args);
-        fail("should be SecurityException");
-      } catch (SecurityException e) {
-        assertEquals(-1, newSecurityManager.getExitCode());
-        assertTrue(data.toString().contains("ERROR: Wrong number of parameters:"));
-        // should be information about usage
-        assertTrue(data.toString().contains("Usage:"));
-      }
-
-    } finally {
-      System.setErr(oldPrintStream);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-  }
-
-  /**
-   * Test CellCounter for complete table all data should print to output
-   */
-  @Test(timeout = 600000)
-  public void testCellCounterForCompleteTable() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName());
-    String outputPath = OUTPUT_DIR + sourceTable;
-    LocalFileSystem localFileSystem = new LocalFileSystem();
-    Path outputDir =
-        new Path(outputPath).makeQualified(localFileSystem.getUri(),
-          localFileSystem.getWorkingDirectory());
-    byte[][] families = { FAMILY_A, FAMILY_B };
-    Table t = UTIL.createTable(sourceTable, families);
-    try {
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
-      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
-      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
-      t.put(p);
-      String[] args = { sourceTable.getNameAsString(), outputDir.toString(), ";" };
-      runCount(args);
-      FileInputStream inputStream =
-          new FileInputStream(outputPath + File.separator + "part-r-00000");
-      String data = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
-      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "4"));
-      assertTrue(data.contains("Total ROWS" + "\t" + "2"));
-      assertTrue(data.contains("b;q" + "\t" + "2"));
-      assertTrue(data.contains("a;q" + "\t" + "2"));
-      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row2;a;q_Versions" + "\t" + "1"));
-      assertTrue(data.contains("row2;b;q_Versions" + "\t" + "1"));
-
-      FileUtil.fullyDelete(new File(outputPath));
-      args = new String[] { "-D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=a, b",
-                  sourceTable.getNameAsString(), outputDir.toString(), ";"};
-      runCount(args);
-      inputStream = new FileInputStream(outputPath + File.separator + "part-r-00000");
-      String data2 = IOUtils.toString(inputStream);
-      inputStream.close();
-      assertEquals(data, data2);
-    } finally {
-      t.close();
-      localFileSystem.close();
-      FileUtil.fullyDelete(new File(outputPath));
-    }
-  }
-
-  @Test
-  public void TestCellCounterWithoutOutputDir() throws Exception {
-    String[] args = new String[] { "tableName" };
-    assertEquals("CellCounter should exit with -1 as output directory is not specified.", -1,
-      ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
deleted file mode 100644
index 0bec03b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
+++ /dev/null
@@ -1,262 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-/**
- * Basic test for the CopyTable M/R tool
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestCopyTable {
-  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  private static final byte[] ROW1 = Bytes.toBytes("row1");
-  private static final byte[] ROW2 = Bytes.toBytes("row2");
-  private static final String FAMILY_A_STRING = "a";
-  private static final String FAMILY_B_STRING = "b";
-  private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
-  private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
-  private static final byte[] QUALIFIER = Bytes.toBytes("q");
-
-  @Rule
-  public TestName name = new TestName();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    TEST_UTIL.startMiniCluster(3);
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  private void doCopyTableTest(boolean bulkload) throws Exception {
-    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
-    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
-    final byte[] FAMILY = Bytes.toBytes("family");
-    final byte[] COLUMN1 = Bytes.toBytes("c1");
-
-    try (Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
-         Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);) {
-      // put rows into the first table
-      for (int i = 0; i < 10; i++) {
-        Put p = new Put(Bytes.toBytes("row" + i));
-        p.addColumn(FAMILY, COLUMN1, COLUMN1);
-        t1.put(p);
-      }
-
-      CopyTable copy = new CopyTable();
-
-      int code;
-      if (bulkload) {
-        code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
-            copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
-            "--bulkload", tableName1.getNameAsString() });
-      } else {
-        code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
-            copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
-            tableName1.getNameAsString() });
-      }
-      assertEquals("copy job failed", 0, code);
-
-      // verify the data was copied into table 2
-      for (int i = 0; i < 10; i++) {
-        Get g = new Get(Bytes.toBytes("row" + i));
-        Result r = t2.get(g);
-        assertEquals(1, r.size());
-        assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
-      }
-    } finally {
-      TEST_UTIL.deleteTable(tableName1);
-      TEST_UTIL.deleteTable(tableName2);
-    }
-  }
-
-  /**
-   * Simple end-to-end test
-   * @throws Exception
-   */
-  @Test
-  public void testCopyTable() throws Exception {
-    doCopyTableTest(false);
-  }
-
-  /**
-   * Simple end-to-end test with bulkload.
-   */
-  @Test
-  public void testCopyTableWithBulkload() throws Exception {
-    doCopyTableTest(true);
-  }
-
-  @Test
-  public void testStartStopRow() throws Exception {
-    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
-    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
-    final byte[] FAMILY = Bytes.toBytes("family");
-    final byte[] COLUMN1 = Bytes.toBytes("c1");
-    final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
-    final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
-    final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
-
-    Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
-    Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
-
-    // put rows into the first table
-    Put p = new Put(ROW0);
-    p.addColumn(FAMILY, COLUMN1, COLUMN1);
-    t1.put(p);
-    p = new Put(ROW1);
-    p.addColumn(FAMILY, COLUMN1, COLUMN1);
-    t1.put(p);
-    p = new Put(ROW2);
-    p.addColumn(FAMILY, COLUMN1, COLUMN1);
-    t1.put(p);
-
-    CopyTable copy = new CopyTable();
-    assertEquals(
-      0,
-      ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
-        copy, new String[] { "--new.name=" + tableName2, "--startrow=\\x01row1",
-            "--stoprow=\\x01row2", tableName1.getNameAsString() }));
-
-    // verify the data was copied into table 2
-    // row1 exist, row0, row2 do not exist
-    Get g = new Get(ROW1);
-    Result r = t2.get(g);
-    assertEquals(1, r.size());
-    assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
-
-    g = new Get(ROW0);
-    r = t2.get(g);
-    assertEquals(0, r.size());
-
-    g = new Get(ROW2);
-    r = t2.get(g);
-    assertEquals(0, r.size());
-
-    t1.close();
-    t2.close();
-    TEST_UTIL.deleteTable(tableName1);
-    TEST_UTIL.deleteTable(tableName2);
-  }
-
-  /**
-   * Test copy of table from sourceTable to targetTable all rows from family a
-   */
-  @Test
-  public void testRenameFamily() throws Exception {
-    final TableName sourceTable = TableName.valueOf(name.getMethodName() + "source");
-    final TableName targetTable = TableName.valueOf(name.getMethodName() + "-target");
-
-    byte[][] families = { FAMILY_A, FAMILY_B };
-
-    Table t = TEST_UTIL.createTable(sourceTable, families);
-    Table t2 = TEST_UTIL.createTable(targetTable, families);
-    Put p = new Put(ROW1);
-    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data11"));
-    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data12"));
-    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data13"));
-    t.put(p);
-    p = new Put(ROW2);
-    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Dat21"));
-    p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data22"));
-    p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data23"));
-    t.put(p);
-
-    long currentTime = System.currentTimeMillis();
-    String[] args = new String[] { "--new.name=" + targetTable, "--families=a:b", "--all.cells",
-        "--starttime=" + (currentTime - 100000), "--endtime=" + (currentTime + 100000),
-        "--versions=1", sourceTable.getNameAsString() };
-    assertNull(t2.get(new Get(ROW1)).getRow());
-
-    assertTrue(runCopy(args));
-
-    assertNotNull(t2.get(new Get(ROW1)).getRow());
-    Result res = t2.get(new Get(ROW1));
-    byte[] b1 = res.getValue(FAMILY_B, QUALIFIER);
-    assertEquals("Data13", new String(b1));
-    assertNotNull(t2.get(new Get(ROW2)).getRow());
-    res = t2.get(new Get(ROW2));
-    b1 = res.getValue(FAMILY_A, QUALIFIER);
-    // Data from the family of B is not copied
-    assertNull(b1);
-
-  }
-
-  /**
-   * Test main method of CopyTable.
-   */
-  @Test
-  public void testMainMethod() throws Exception {
-    String[] emptyArgs = { "-h" };
-    PrintStream oldWriter = System.err;
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    PrintStream writer = new PrintStream(data);
-    System.setErr(writer);
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    try {
-      CopyTable.main(emptyArgs);
-      fail("should be exit");
-    } catch (SecurityException e) {
-      assertEquals(1, newSecurityManager.getExitCode());
-    } finally {
-      System.setErr(oldWriter);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-    assertTrue(data.toString().contains("rs.class"));
-    // should print usage information
-    assertTrue(data.toString().contains("Usage:"));
-  }
-
-  private boolean runCopy(String[] args) throws Exception {
-    int status = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()), new CopyTable(),
-        args);
-    return status == 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
deleted file mode 100644
index b7fdb47..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import static org.mockito.Mockito.*;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestGroupingTableMapper {
-
-  /**
-   * Test GroupingTableMapper class
-   */
-  @Test
-  public void testGroupingTableMapper() throws Exception {
-
-    GroupingTableMapper mapper = new GroupingTableMapper();
-    Configuration configuration = new Configuration();
-    configuration.set(GroupingTableMapper.GROUP_COLUMNS, "family1:clm family2:clm");
-    mapper.setConf(configuration);
-
-    Result result = mock(Result.class);
-    @SuppressWarnings("unchecked")
-    Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context =
-        mock(Mapper.Context.class);
-    context.write(any(ImmutableBytesWritable.class), any(Result.class));
-    List<Cell> keyValue = new ArrayList<>();
-    byte[] row = {};
-    keyValue.add(new KeyValue(row, Bytes.toBytes("family2"), Bytes.toBytes("clm"), Bytes
-        .toBytes("value1")));
-    keyValue.add(new KeyValue(row, Bytes.toBytes("family1"), Bytes.toBytes("clm"), Bytes
-        .toBytes("value2")));
-    when(result.listCells()).thenReturn(keyValue);
-    mapper.map(null, result, context);
-    // template data
-    byte[][] data = { Bytes.toBytes("value1"), Bytes.toBytes("value2") };
-    ImmutableBytesWritable ibw = mapper.createGroupKey(data);
-    verify(context).write(ibw, result);
-  }
-
-}

[37/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
new file mode 100644
index 0000000..b64271e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
@@ -0,0 +1,793 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static java.lang.String.format;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+/**
+ * Tool to import data from a TSV file.
+ *
+ * This tool is rather simplistic - it doesn't do any quoting or
+ * escaping, but is useful for many data loads.
+ *
+ * @see ImportTsv#usage(String)
+ */
+@InterfaceAudience.Public
+public class ImportTsv extends Configured implements Tool {
+
+  protected static final Log LOG = LogFactory.getLog(ImportTsv.class);
+
+  final static String NAME = "importtsv";
+
+  public final static String MAPPER_CONF_KEY = "importtsv.mapper.class";
+  public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output";
+  public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp";
+  public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+  // TODO: the rest of these configs are used exclusively by TsvImporterMapper.
+  // Move them out of the tool and let the mapper handle its own validation.
+  public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run";
+  // If true, bad lines are logged to stderr. Default: false.
+  public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines";
+  public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines";
+  public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns";
+  public final static String COLUMNS_CONF_KEY = "importtsv.columns";
+  public final static String SEPARATOR_CONF_KEY = "importtsv.separator";
+  public final static String ATTRIBUTE_SEPERATOR_CONF_KEY = "attributes.seperator";
+  //This config is used to propagate credentials from parent MR jobs which launch
+  //ImportTSV jobs. SEE IntegrationTestImportTsv.
+  public final static String CREDENTIALS_LOCATION = "credentials_location";
+  final static String DEFAULT_SEPARATOR = "\t";
+  final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>";
+  final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";
+  final static Class DEFAULT_MAPPER = TsvImporterMapper.class;
+  public final static String CREATE_TABLE_CONF_KEY = "create.table";
+  public final static String NO_STRICT_COL_FAMILY = "no.strict";
+  /**
+   * If table didn't exist and was created in dry-run mode, this flag is
+   * flipped to delete it when MR ends.
+   */
+  private static boolean DRY_RUN_TABLE_CREATED;
+
+  public static class TsvParser {
+    /**
+     * Column families and qualifiers mapped to the TSV columns
+     */
+    private final byte[][] families;
+    private final byte[][] qualifiers;
+
+    private final byte separatorByte;
+
+    private int rowKeyColumnIndex;
+
+    private int maxColumnCount;
+
+    // Default value must be negative
+    public static final int DEFAULT_TIMESTAMP_COLUMN_INDEX = -1;
+
+    private int timestampKeyColumnIndex = DEFAULT_TIMESTAMP_COLUMN_INDEX;
+
+    public static final String ROWKEY_COLUMN_SPEC = "HBASE_ROW_KEY";
+
+    public static final String TIMESTAMPKEY_COLUMN_SPEC = "HBASE_TS_KEY";
+
+    public static final String ATTRIBUTES_COLUMN_SPEC = "HBASE_ATTRIBUTES_KEY";
+
+    public static final String CELL_VISIBILITY_COLUMN_SPEC = "HBASE_CELL_VISIBILITY";
+
+    public static final String CELL_TTL_COLUMN_SPEC = "HBASE_CELL_TTL";
+
+    private int attrKeyColumnIndex = DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+
+    public static final int DEFAULT_ATTRIBUTES_COLUMN_INDEX = -1;
+
+    public static final int DEFAULT_CELL_VISIBILITY_COLUMN_INDEX = -1;
+
+    public static final int DEFAULT_CELL_TTL_COLUMN_INDEX = -1;
+
+    private int cellVisibilityColumnIndex = DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+
+    private int cellTTLColumnIndex = DEFAULT_CELL_TTL_COLUMN_INDEX;
+
+    /**
+     * @param columnsSpecification the list of columns to parser out, comma separated.
+     * The row key should be the special token TsvParser.ROWKEY_COLUMN_SPEC
+     * @param separatorStr
+     */
+    public TsvParser(String columnsSpecification, String separatorStr) {
+      // Configure separator
+      byte[] separator = Bytes.toBytes(separatorStr);
+      Preconditions.checkArgument(separator.length == 1,
+        "TsvParser only supports single-byte separators");
+      separatorByte = separator[0];
+
+      // Configure columns
+      ArrayList<String> columnStrings = Lists.newArrayList(
+        Splitter.on(',').trimResults().split(columnsSpecification));
+
+      maxColumnCount = columnStrings.size();
+      families = new byte[maxColumnCount][];
+      qualifiers = new byte[maxColumnCount][];
+
+      for (int i = 0; i < columnStrings.size(); i++) {
+        String str = columnStrings.get(i);
+        if (ROWKEY_COLUMN_SPEC.equals(str)) {
+          rowKeyColumnIndex = i;
+          continue;
+        }
+        if (TIMESTAMPKEY_COLUMN_SPEC.equals(str)) {
+          timestampKeyColumnIndex = i;
+          continue;
+        }
+        if (ATTRIBUTES_COLUMN_SPEC.equals(str)) {
+          attrKeyColumnIndex = i;
+          continue;
+        }
+        if (CELL_VISIBILITY_COLUMN_SPEC.equals(str)) {
+          cellVisibilityColumnIndex = i;
+          continue;
+        }
+        if (CELL_TTL_COLUMN_SPEC.equals(str)) {
+          cellTTLColumnIndex = i;
+          continue;
+        }
+        String[] parts = str.split(":", 2);
+        if (parts.length == 1) {
+          families[i] = str.getBytes();
+          qualifiers[i] = HConstants.EMPTY_BYTE_ARRAY;
+        } else {
+          families[i] = parts[0].getBytes();
+          qualifiers[i] = parts[1].getBytes();
+        }
+      }
+    }
+
+    public boolean hasTimestamp() {
+      return timestampKeyColumnIndex != DEFAULT_TIMESTAMP_COLUMN_INDEX;
+    }
+
+    public int getTimestampKeyColumnIndex() {
+      return timestampKeyColumnIndex;
+    }
+
+    public boolean hasAttributes() {
+      return attrKeyColumnIndex != DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+    }
+
+    public boolean hasCellVisibility() {
+      return cellVisibilityColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+    }
+
+    public boolean hasCellTTL() {
+      return cellTTLColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+    }
+
+    public int getAttributesKeyColumnIndex() {
+      return attrKeyColumnIndex;
+    }
+
+    public int getCellVisibilityColumnIndex() {
+      return cellVisibilityColumnIndex;
+    }
+
+    public int getCellTTLColumnIndex() {
+      return cellTTLColumnIndex;
+    }
+
+    public int getRowKeyColumnIndex() {
+      return rowKeyColumnIndex;
+    }
+
+    public byte[] getFamily(int idx) {
+      return families[idx];
+    }
+    public byte[] getQualifier(int idx) {
+      return qualifiers[idx];
+    }
+
+    public ParsedLine parse(byte[] lineBytes, int length)
+    throws BadTsvLineException {
+      // Enumerate separator offsets
+      ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount);
+      for (int i = 0; i < length; i++) {
+        if (lineBytes[i] == separatorByte) {
+          tabOffsets.add(i);
+        }
+      }
+      if (tabOffsets.isEmpty()) {
+        throw new BadTsvLineException("No delimiter");
+      }
+
+      tabOffsets.add(length);
+
+      if (tabOffsets.size() > maxColumnCount) {
+        throw new BadTsvLineException("Excessive columns");
+      } else if (tabOffsets.size() <= getRowKeyColumnIndex()) {
+        throw new BadTsvLineException("No row key");
+      } else if (hasTimestamp()
+          && tabOffsets.size() <= getTimestampKeyColumnIndex()) {
+        throw new BadTsvLineException("No timestamp");
+      } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) {
+        throw new BadTsvLineException("No attributes specified");
+      } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) {
+        throw new BadTsvLineException("No cell visibility specified");
+      } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) {
+        throw new BadTsvLineException("No cell TTL specified");
+      }
+      return new ParsedLine(tabOffsets, lineBytes);
+    }
+
+    class ParsedLine {
+      private final ArrayList<Integer> tabOffsets;
+      private byte[] lineBytes;
+
+      ParsedLine(ArrayList<Integer> tabOffsets, byte[] lineBytes) {
+        this.tabOffsets = tabOffsets;
+        this.lineBytes = lineBytes;
+      }
+
+      public int getRowKeyOffset() {
+        return getColumnOffset(rowKeyColumnIndex);
+      }
+      public int getRowKeyLength() {
+        return getColumnLength(rowKeyColumnIndex);
+      }
+
+      public long getTimestamp(long ts) throws BadTsvLineException {
+        // Return ts if HBASE_TS_KEY is not configured in column spec
+        if (!hasTimestamp()) {
+          return ts;
+        }
+
+        String timeStampStr = Bytes.toString(lineBytes,
+            getColumnOffset(timestampKeyColumnIndex),
+            getColumnLength(timestampKeyColumnIndex));
+        try {
+          return Long.parseLong(timeStampStr);
+        } catch (NumberFormatException nfe) {
+          // treat this record as bad record
+          throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
+        }
+      }
+
+      private String getAttributes() {
+        if (!hasAttributes()) {
+          return null;
+        } else {
+          return Bytes.toString(lineBytes, getColumnOffset(attrKeyColumnIndex),
+              getColumnLength(attrKeyColumnIndex));
+        }
+      }
+
+      public String[] getIndividualAttributes() {
+        String attributes = getAttributes();
+        if (attributes != null) {
+          return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR);
+        } else {
+          return null;
+        }
+      }
+
+      public int getAttributeKeyOffset() {
+        if (hasAttributes()) {
+          return getColumnOffset(attrKeyColumnIndex);
+        } else {
+          return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+        }
+      }
+
+      public int getAttributeKeyLength() {
+        if (hasAttributes()) {
+          return getColumnLength(attrKeyColumnIndex);
+        } else {
+          return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+        }
+      }
+
+      public int getCellVisibilityColumnOffset() {
+        if (hasCellVisibility()) {
+          return getColumnOffset(cellVisibilityColumnIndex);
+        } else {
+          return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+        }
+      }
+
+      public int getCellVisibilityColumnLength() {
+        if (hasCellVisibility()) {
+          return getColumnLength(cellVisibilityColumnIndex);
+        } else {
+          return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+        }
+      }
+
+      public String getCellVisibility() {
+        if (!hasCellVisibility()) {
+          return null;
+        } else {
+          return Bytes.toString(lineBytes, getColumnOffset(cellVisibilityColumnIndex),
+              getColumnLength(cellVisibilityColumnIndex));
+        }
+      }
+
+      public int getCellTTLColumnOffset() {
+        if (hasCellTTL()) {
+          return getColumnOffset(cellTTLColumnIndex);
+        } else {
+          return DEFAULT_CELL_TTL_COLUMN_INDEX;
+        }
+      }
+
+      public int getCellTTLColumnLength() {
+        if (hasCellTTL()) {
+          return getColumnLength(cellTTLColumnIndex);
+        } else {
+          return DEFAULT_CELL_TTL_COLUMN_INDEX;
+        }
+      }
+
+      public long getCellTTL() {
+        if (!hasCellTTL()) {
+          return 0;
+        } else {
+          return Bytes.toLong(lineBytes, getColumnOffset(cellTTLColumnIndex),
+              getColumnLength(cellTTLColumnIndex));
+        }
+      }
+
+      public int getColumnOffset(int idx) {
+        if (idx > 0)
+          return tabOffsets.get(idx - 1) + 1;
+        else
+          return 0;
+      }
+      public int getColumnLength(int idx) {
+        return tabOffsets.get(idx) - getColumnOffset(idx);
+      }
+      public int getColumnCount() {
+        return tabOffsets.size();
+      }
+      public byte[] getLineBytes() {
+        return lineBytes;
+      }
+    }
+
+    public static class BadTsvLineException extends Exception {
+      public BadTsvLineException(String err) {
+        super(err);
+      }
+      private static final long serialVersionUID = 1L;
+    }
+
+    /**
+     * Return starting position and length of row key from the specified line bytes.
+     * @param lineBytes
+     * @param length
+     * @return Pair of row key offset and length.
+     * @throws BadTsvLineException
+     */
+    public Pair<Integer, Integer> parseRowKey(byte[] lineBytes, int length)
+        throws BadTsvLineException {
+      int rkColumnIndex = 0;
+      int startPos = 0, endPos = 0;
+      for (int i = 0; i <= length; i++) {
+        if (i == length || lineBytes[i] == separatorByte) {
+          endPos = i - 1;
+          if (rkColumnIndex++ == getRowKeyColumnIndex()) {
+            if ((endPos + 1) == startPos) {
+              throw new BadTsvLineException("Empty value for ROW KEY.");
+            }
+            break;
+          } else {
+            startPos = endPos + 2;
+          }
+        }
+        if (i == length) {
+          throw new BadTsvLineException(
+              "Row key does not exist as number of columns in the line"
+                  + " are less than row key position.");
+        }
+      }
+      return new Pair<>(startPos, endPos - startPos + 1);
+    }
+  }
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param conf  The current configuration.
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  protected static Job createSubmittableJob(Configuration conf, String[] args)
+      throws IOException, ClassNotFoundException {
+    Job job = null;
+    boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
+    try (Connection connection = ConnectionFactory.createConnection(conf)) {
+      try (Admin admin = connection.getAdmin()) {
+        // Support non-XML supported characters
+        // by re-encoding the passed separator as a Base64 string.
+        String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
+        if (actualSeparator != null) {
+          conf.set(SEPARATOR_CONF_KEY,
+              Base64.encodeBytes(actualSeparator.getBytes()));
+        }
+
+        // See if a non-default Mapper was set
+        String mapperClassName = conf.get(MAPPER_CONF_KEY);
+        Class mapperClass = mapperClassName != null? Class.forName(mapperClassName): DEFAULT_MAPPER;
+
+        TableName tableName = TableName.valueOf(args[0]);
+        Path inputDir = new Path(args[1]);
+        String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
+        job = Job.getInstance(conf, jobName);
+        job.setJarByClass(mapperClass);
+        FileInputFormat.setInputPaths(job, inputDir);
+        job.setInputFormatClass(TextInputFormat.class);
+        job.setMapperClass(mapperClass);
+        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+        String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+        String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
+        if(StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
+          String fileLoc = conf.get(CREDENTIALS_LOCATION);
+          Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
+          job.getCredentials().addAll(cred);
+        }
+
+        if (hfileOutPath != null) {
+          if (!admin.tableExists(tableName)) {
+            LOG.warn(format("Table '%s' does not exist.", tableName));
+            if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
+              // TODO: this is backwards. Instead of depending on the existence of a table,
+              // create a sane splits file for HFileOutputFormat based on data sampling.
+              createTable(admin, tableName, columns);
+              if (isDryRun) {
+                LOG.warn("Dry run: Table will be deleted at end of dry run.");
+                synchronized (ImportTsv.class) {
+                  DRY_RUN_TABLE_CREATED = true;
+                }
+              }
+            } else {
+              String errorMsg =
+                  format("Table '%s' does not exist and '%s' is set to no.", tableName,
+                      CREATE_TABLE_CONF_KEY);
+              LOG.error(errorMsg);
+              throw new TableNotFoundException(errorMsg);
+            }
+          }
+          try (Table table = connection.getTable(tableName);
+              RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
+            boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
+            // if no.strict is false then check column family
+            if(!noStrict) {
+              ArrayList<String> unmatchedFamilies = new ArrayList<>();
+              Set<String> cfSet = getColumnFamilies(columns);
+              TableDescriptor tDesc = table.getDescriptor();
+              for (String cf : cfSet) {
+                if(!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
+                  unmatchedFamilies.add(cf);
+                }
+              }
+              if(unmatchedFamilies.size() > 0) {
+                ArrayList<String> familyNames = new ArrayList<>();
+                for (ColumnFamilyDescriptor family : table.getDescriptor().getColumnFamilies()) {
+                  familyNames.add(family.getNameAsString());
+                }
+                String msg =
+                    "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY
+                    + " does not match with any of the table " + tableName
+                    + " column families " + familyNames + ".\n"
+                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
+                    + "=true.\n";
+                usage(msg);
+                System.exit(-1);
+              }
+            }
+            if (mapperClass.equals(TsvImporterTextMapper.class)) {
+              job.setMapOutputValueClass(Text.class);
+              job.setReducerClass(TextSortReducer.class);
+            } else {
+              job.setMapOutputValueClass(Put.class);
+              job.setCombinerClass(PutCombiner.class);
+              job.setReducerClass(PutSortReducer.class);
+            }
+            if (!isDryRun) {
+              Path outputDir = new Path(hfileOutPath);
+              FileOutputFormat.setOutputPath(job, outputDir);
+              HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
+                  regionLocator);
+            }
+          }
+        } else {
+          if (!admin.tableExists(tableName)) {
+            String errorMsg = format("Table '%s' does not exist.", tableName);
+            LOG.error(errorMsg);
+            throw new TableNotFoundException(errorMsg);
+          }
+          if (mapperClass.equals(TsvImporterTextMapper.class)) {
+            usage(TsvImporterTextMapper.class.toString()
+                + " should not be used for non bulkloading case. use "
+                + TsvImporterMapper.class.toString()
+                + " or custom mapper whose value type is Put.");
+            System.exit(-1);
+          }
+          if (!isDryRun) {
+            // No reducers. Just write straight to table. Call initTableReducerJob
+            // to set up the TableOutputFormat.
+            TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
+          }
+          job.setNumReduceTasks(0);
+        }
+        if (isDryRun) {
+          job.setOutputFormatClass(NullOutputFormat.class);
+          job.getConfiguration().setStrings("io.serializations",
+              job.getConfiguration().get("io.serializations"),
+              MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+              KeyValueSerialization.class.getName());
+        }
+        TableMapReduceUtil.addDependencyJars(job);
+        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+            org.apache.hadoop.hbase.shaded.com.google.common.base.Function.class /* Guava used by TsvParser */);
+      }
+    }
+    return job;
+  }
+
+  private static void createTable(Admin admin, TableName tableName, String[] columns)
+      throws IOException {
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    Set<String> cfSet = getColumnFamilies(columns);
+    for (String cf : cfSet) {
+      HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
+      htd.addFamily(hcd);
+    }
+    LOG.warn(format("Creating table '%s' with '%s' columns and default descriptors.",
+      tableName, cfSet));
+    admin.createTable(htd);
+  }
+
+  private static void deleteTable(Configuration conf, String[] args) {
+    TableName tableName = TableName.valueOf(args[0]);
+    try (Connection connection = ConnectionFactory.createConnection(conf);
+         Admin admin = connection.getAdmin()) {
+      try {
+        admin.disableTable(tableName);
+      } catch (TableNotEnabledException e) {
+        LOG.debug("Dry mode: Table: " + tableName + " already disabled, so just deleting it.");
+      }
+      admin.deleteTable(tableName);
+    } catch (IOException e) {
+      LOG.error(format("***Dry run: Failed to delete table '%s'.***%n%s", tableName,
+          e.toString()));
+      return;
+    }
+    LOG.info(format("Dry run: Deleted table '%s'.", tableName));
+  }
+
+  private static Set<String> getColumnFamilies(String[] columns) {
+    Set<String> cfSet = new HashSet<>();
+    for (String aColumn : columns) {
+      if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)
+          || TsvParser.TIMESTAMPKEY_COLUMN_SPEC.equals(aColumn)
+          || TsvParser.CELL_VISIBILITY_COLUMN_SPEC.equals(aColumn)
+          || TsvParser.CELL_TTL_COLUMN_SPEC.equals(aColumn)
+          || TsvParser.ATTRIBUTES_COLUMN_SPEC.equals(aColumn))
+        continue;
+      // we are only concerned with the first one (in case this is a cf:cq)
+      cfSet.add(aColumn.split(":", 2)[0]);
+    }
+    return cfSet;
+  }
+
+  /*
+   * @param errorMsg Error message.  Can be null.
+   */
+  private static void usage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    String usage =
+      "Usage: " + NAME + " -D"+ COLUMNS_CONF_KEY + "=a,b,c <tablename> <inputdir>\n" +
+      "\n" +
+      "Imports the given input directory of TSV data into the specified table.\n" +
+      "\n" +
+      "The column names of the TSV data must be specified using the -D" + COLUMNS_CONF_KEY + "\n" +
+      "option. This option takes the form of comma-separated column names, where each\n" +
+      "column name is either a simple column family, or a columnfamily:qualifier. The special\n" +
+      "column name " + TsvParser.ROWKEY_COLUMN_SPEC + " is used to designate that this column should be used\n" +
+      "as the row key for each imported record. You must specify exactly one column\n" +
+      "to be the row key, and you must specify a column name for every column that exists in the\n" +
+      "input data. Another special column" + TsvParser.TIMESTAMPKEY_COLUMN_SPEC +
+      " designates that this column should be\n" +
+      "used as timestamp for each record. Unlike " + TsvParser.ROWKEY_COLUMN_SPEC + ", " +
+      TsvParser.TIMESTAMPKEY_COLUMN_SPEC + " is optional." + "\n" +
+      "You must specify at most one column as timestamp key for each imported record.\n" +
+      "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n" +
+      "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n" +
+      "\n" +
+      "Other special columns that can be specified are " + TsvParser.CELL_TTL_COLUMN_SPEC +
+      " and " + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + ".\n" +
+      TsvParser.CELL_TTL_COLUMN_SPEC + " designates that this column will be used " +
+      "as a Cell's Time To Live (TTL) attribute.\n" +
+      TsvParser.CELL_VISIBILITY_COLUMN_SPEC + " designates that this column contains the " +
+      "visibility label expression.\n" +
+      "\n" +
+      TsvParser.ATTRIBUTES_COLUMN_SPEC+" can be used to specify Operation Attributes per record.\n"+
+      " Should be specified as key=>value where "+TsvParser.DEFAULT_ATTRIBUTES_COLUMN_INDEX+ " is used \n"+
+      " as the seperator.  Note that more than one OperationAttributes can be specified.\n"+
+      "By default importtsv will load data directly into HBase. To instead generate\n" +
+      "HFiles of data to prepare for a bulk data load, pass the option:\n" +
+      "  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output\n" +
+      "  Note: if you do not use this option, then the target table must already exist in HBase\n" +
+      "\n" +
+      "Other options that may be specified with -D include:\n" +
+      "  -D" + DRY_RUN_CONF_KEY + "=true - Dry run mode. Data is not actually populated into" +
+      " table. If table does not exist, it is created but deleted in the end.\n" +
+      "  -D" + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" +
+      "  -D" + LOG_BAD_LINES_CONF_KEY + "=true - logs invalid lines to stderr\n" +
+      "  -D" + SKIP_EMPTY_COLUMNS + "=false - If true then skip empty columns in bulk import\n" +
+      "  '-D" + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" +
+      "  -D" + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n" +
+      "  -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " +
+      DEFAULT_MAPPER.getName() + "\n" +
+      "  -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" +
+      "  -D" + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n" +
+      "  Note: if you set this to 'no', then the target table must already exist in HBase\n" +
+      "  -D" + NO_STRICT_COL_FAMILY + "=true - ignore column family check in hbase table. " +
+      "Default is false\n\n" +
+      "For performance consider the following options:\n" +
+      "  -Dmapreduce.map.speculative=false\n" +
+      "  -Dmapreduce.reduce.speculative=false";
+
+    System.err.println(usage);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 2) {
+      usage("Wrong number of arguments: " + args.length);
+      return -1;
+    }
+
+    // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
+    // perform validation on these additional args. When it's not null, user has provided their
+    // own mapper, thus these validation are not relevant.
+    // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
+    if (null == getConf().get(MAPPER_CONF_KEY)) {
+      // Make sure columns are specified
+      String[] columns = getConf().getStrings(COLUMNS_CONF_KEY);
+      if (columns == null) {
+        usage("No columns specified. Please specify with -D" +
+            COLUMNS_CONF_KEY+"=...");
+        return -1;
+      }
+
+      // Make sure they specify exactly one column as the row key
+      int rowkeysFound = 0;
+      for (String col : columns) {
+        if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++;
+      }
+      if (rowkeysFound != 1) {
+        usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
+        return -1;
+      }
+
+      // Make sure we have at most one column as the timestamp key
+      int tskeysFound = 0;
+      for (String col : columns) {
+        if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
+          tskeysFound++;
+      }
+      if (tskeysFound > 1) {
+        usage("Must specify at most one column as "
+            + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
+        return -1;
+      }
+
+      int attrKeysFound = 0;
+      for (String col : columns) {
+        if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
+          attrKeysFound++;
+      }
+      if (attrKeysFound > 1) {
+        usage("Must specify at most one column as "
+            + TsvParser.ATTRIBUTES_COLUMN_SPEC);
+        return -1;
+      }
+
+      // Make sure one or more columns are specified excluding rowkey and
+      // timestamp key
+      if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
+        usage("One or more columns in addition to the row key and timestamp(optional) are required");
+        return -1;
+      }
+    }
+
+    // If timestamp option is not specified, use current system time.
+    long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());
+
+    // Set it back to replace invalid timestamp (non-numeric) with current
+    // system time
+    getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);
+
+    synchronized (ImportTsv.class) {
+      DRY_RUN_TABLE_CREATED = false;
+    }
+    Job job = createSubmittableJob(getConf(), args);
+    boolean success = job.waitForCompletion(true);
+    boolean delete = false;
+    synchronized (ImportTsv.class) {
+      delete = DRY_RUN_TABLE_CREATED;
+    }
+    if (delete) {
+      deleteTable(getConf(), args);
+    }
+    return success ? 0 : 1;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int status = ToolRunner.run(HBaseConfiguration.create(), new ImportTsv(), args);
+    System.exit(status);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
new file mode 100644
index 0000000..953df62
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.text.MessageFormat;
+import java.util.Enumeration;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+/**
+ * Finds the Jar for a class. If the class is in a directory in the
+ * classpath, it creates a Jar on the fly with the contents of the directory
+ * and returns the path to that Jar. If a Jar is created, it is created in
+ * the system temporary directory.
+ *
+ * This file was forked from hadoop/common/branches/branch-2@1377176.
+ */
+public class JarFinder {
+
+  private static void copyToZipStream(File file, ZipEntry entry,
+                              ZipOutputStream zos) throws IOException {
+    InputStream is = new FileInputStream(file);
+    try {
+      zos.putNextEntry(entry);
+      byte[] arr = new byte[4096];
+      int read = is.read(arr);
+      while (read > -1) {
+        zos.write(arr, 0, read);
+        read = is.read(arr);
+      }
+    } finally {
+      try {
+        is.close();
+      } finally {
+        zos.closeEntry();
+      }
+    }
+  }
+
+  public static void jarDir(File dir, String relativePath, ZipOutputStream zos)
+    throws IOException {
+    Preconditions.checkNotNull(relativePath, "relativePath");
+    Preconditions.checkNotNull(zos, "zos");
+
+    // by JAR spec, if there is a manifest, it must be the first entry in the
+    // ZIP.
+    File manifestFile = new File(dir, JarFile.MANIFEST_NAME);
+    ZipEntry manifestEntry = new ZipEntry(JarFile.MANIFEST_NAME);
+    if (!manifestFile.exists()) {
+      zos.putNextEntry(manifestEntry);
+      new Manifest().write(new BufferedOutputStream(zos));
+      zos.closeEntry();
+    } else {
+      copyToZipStream(manifestFile, manifestEntry, zos);
+    }
+    zos.closeEntry();
+    zipDir(dir, relativePath, zos, true);
+    zos.close();
+  }
+
+  private static void zipDir(File dir, String relativePath, ZipOutputStream zos,
+                             boolean start) throws IOException {
+    String[] dirList = dir.list();
+    if (dirList == null) {
+      return;
+    }
+    for (String aDirList : dirList) {
+      File f = new File(dir, aDirList);
+      if (!f.isHidden()) {
+        if (f.isDirectory()) {
+          if (!start) {
+            ZipEntry dirEntry = new ZipEntry(relativePath + f.getName() + "/");
+            zos.putNextEntry(dirEntry);
+            zos.closeEntry();
+          }
+          String filePath = f.getPath();
+          File file = new File(filePath);
+          zipDir(file, relativePath + f.getName() + "/", zos, false);
+        }
+        else {
+          String path = relativePath + f.getName();
+          if (!path.equals(JarFile.MANIFEST_NAME)) {
+            ZipEntry anEntry = new ZipEntry(path);
+            copyToZipStream(f, anEntry, zos);
+          }
+        }
+      }
+    }
+  }
+
+  private static void createJar(File dir, File jarFile) throws IOException {
+    Preconditions.checkNotNull(dir, "dir");
+    Preconditions.checkNotNull(jarFile, "jarFile");
+    File jarDir = jarFile.getParentFile();
+    if (!jarDir.exists()) {
+      if (!jarDir.mkdirs()) {
+        throw new IOException(MessageFormat.format("could not create dir [{0}]",
+                                                   jarDir));
+      }
+    }
+    try (FileOutputStream fos = new FileOutputStream(jarFile);
+         JarOutputStream jos = new JarOutputStream(fos)) {
+      jarDir(dir, "", jos);
+    }
+  }
+
+  /**
+   * Returns the full path to the Jar containing the class. It always return a
+   * JAR.
+   *
+   * @param klass class.
+   *
+   * @return path to the Jar containing the class.
+   */
+  public static String getJar(Class klass) {
+    Preconditions.checkNotNull(klass, "klass");
+    ClassLoader loader = klass.getClassLoader();
+    if (loader != null) {
+      String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
+      try {
+        for (Enumeration itr = loader.getResources(class_file);
+             itr.hasMoreElements(); ) {
+          URL url = (URL) itr.nextElement();
+          String path = url.getPath();
+          if (path.startsWith("file:")) {
+            path = path.substring("file:".length());
+          }
+          path = URLDecoder.decode(path, "UTF-8");
+          if ("jar".equals(url.getProtocol())) {
+            path = URLDecoder.decode(path, "UTF-8");
+            return path.replaceAll("!.*$", "");
+          }
+          else if ("file".equals(url.getProtocol())) {
+            String klassName = klass.getName();
+            klassName = klassName.replace(".", "/") + ".class";
+            path = path.substring(0, path.length() - klassName.length());
+            File baseDir = new File(path);
+            File testDir = new File(System.getProperty("test.build.dir", "target/test-dir"));
+            testDir = testDir.getAbsoluteFile();
+            if (!testDir.exists()) {
+              testDir.mkdirs();
+            }
+            File tempJar = File.createTempFile("hadoop-", "", testDir);
+            tempJar = new File(tempJar.getAbsolutePath() + ".jar");
+            tempJar.deleteOnExit();
+            createJar(baseDir, tempJar);
+            return tempJar.getAbsolutePath();
+          }
+        }
+      }
+      catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
new file mode 100644
index 0000000..241608b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class KeyValueSerialization implements Serialization<KeyValue> {
+  @Override
+  public boolean accept(Class<?> c) {
+    return KeyValue.class.isAssignableFrom(c);
+  }
+
+  @Override
+  public KeyValueDeserializer getDeserializer(Class<KeyValue> t) {
+    return new KeyValueDeserializer();
+  }
+
+  @Override
+  public KeyValueSerializer getSerializer(Class<KeyValue> c) {
+    return new KeyValueSerializer();
+  }
+
+  public static class KeyValueDeserializer implements Deserializer<KeyValue> {
+    private DataInputStream dis;
+
+    @Override
+    public void close() throws IOException {
+      this.dis.close();
+    }
+
+    @Override
+    public KeyValue deserialize(KeyValue ignore) throws IOException {
+      // I can't overwrite the passed in KV, not from a proto kv, not just yet.  TODO
+      return KeyValueUtil.create(this.dis);
+    }
+
+    @Override
+    public void open(InputStream is) throws IOException {
+      this.dis = new DataInputStream(is);
+    }
+  }
+
+  public static class KeyValueSerializer implements Serializer<KeyValue> {
+    private DataOutputStream dos;
+
+    @Override
+    public void close() throws IOException {
+      this.dos.close();
+    }
+
+    @Override
+    public void open(OutputStream os) throws IOException {
+      this.dos = new DataOutputStream(os);
+    }
+
+    @Override
+    public void serialize(KeyValue kv) throws IOException {
+      KeyValueUtil.write(kv, this.dos);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
new file mode 100644
index 0000000..997e5a8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
@@ -0,0 +1,57 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.util.TreeSet;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Emits sorted KeyValues.
+ * Reads in all KeyValues from passed Iterator, sorts them, then emits
+ * KeyValues in sorted order.  If lots of columns per row, it will use lots of
+ * memory sorting.
+ * @see HFileOutputFormat2
+ */
+@InterfaceAudience.Public
+public class KeyValueSortReducer
+    extends Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue> {
+  protected void reduce(ImmutableBytesWritable row, Iterable<KeyValue> kvs,
+      Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
+  throws java.io.IOException, InterruptedException {
+    TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
+    for (KeyValue kv: kvs) {
+      try {
+        map.add(kv.clone());
+      } catch (CloneNotSupportedException e) {
+        throw new java.io.IOException(e);
+      }
+    }
+    context.setStatus("Read " + map.getClass());
+    int index = 0;
+    for (KeyValue kv: map) {
+      context.write(row, kv);
+      if (++index % 100 == 0) context.setStatus("Wrote " + index);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
new file mode 100644
index 0000000..9f783f1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Create 3 level tree directory, first level is using table name as parent
+ * directory and then use family name as child directory, and all related HFiles
+ * for one family are under child directory
+ * -tableName1
+ *     -columnFamilyName1
+ *     -columnFamilyName2
+ *         -HFiles
+ * -tableName2
+ *     -columnFamilyName1
+ *         -HFiles
+ *     -columnFamilyName2
+ */
+@InterfaceAudience.Public
+@VisibleForTesting
+public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
+  private static final Log LOG = LogFactory.getLog(MultiTableHFileOutputFormat.class);
+
+  /**
+   * Creates a composite key to use as a mapper output key when using
+   * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
+   *
+   * @param tableName Name of the Table - Eg: TableName.getNameAsString()
+   * @param suffix    Usually represents a rowkey when creating a mapper key or column family
+   * @return          byte[] representation of composite key
+   */
+  public static byte[] createCompositeKey(byte[] tableName,
+                                          byte[] suffix) {
+    return combineTableNameSuffix(tableName, suffix);
+  }
+
+  /**
+   * Alternate api which accepts an ImmutableBytesWritable for the suffix
+   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
+   */
+  public static byte[] createCompositeKey(byte[] tableName,
+                                          ImmutableBytesWritable suffix) {
+    return combineTableNameSuffix(tableName, suffix.get());
+  }
+
+  /**
+   * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
+   * suffix
+   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
+   */
+  public static byte[] createCompositeKey(String tableName,
+                                          ImmutableBytesWritable suffix) {
+    return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
+  }
+
+  /**
+   * Analogous to
+   * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
+   * this function will configure the requisite number of reducers to write HFiles for multple
+   * tables simultaneously
+   *
+   * @param job                   See {@link org.apache.hadoop.mapreduce.Job}
+   * @param multiTableDescriptors Table descriptor and region locator pairs
+   * @throws IOException
+   */
+  public static void configureIncrementalLoad(Job job, List<TableInfo>
+      multiTableDescriptors)
+      throws IOException {
+    MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
+            MultiTableHFileOutputFormat.class);
+  }
+
+  final private static int validateCompositeKey(byte[] keyBytes) {
+
+    int separatorIdx = Bytes.indexOf(keyBytes, tableSeparator);
+
+    // Either the separator was not found or a tablename wasn't present or a key wasn't present
+    if (separatorIdx == -1) {
+      throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
+              .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
+    }
+    return separatorIdx;
+  }
+
+  protected static byte[] getTableName(byte[] keyBytes) {
+    int separatorIdx = validateCompositeKey(keyBytes);
+    return Bytes.copy(keyBytes, 0, separatorIdx);
+  }
+
+  protected static byte[] getSuffix(byte[] keyBytes) {
+    int separatorIdx = validateCompositeKey(keyBytes);
+    return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
new file mode 100644
index 0000000..f8fb6dc
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+
+/**
+ * Convert HBase tabular data from multiple scanners into a format that
+ * is consumable by Map/Reduce.
+ *
+ * <p>
+ * Usage example
+ * </p>
+ *
+ * <pre>
+ * List&lt;Scan&gt; scans = new ArrayList&lt;Scan&gt;();
+ *
+ * Scan scan1 = new Scan();
+ * scan1.setStartRow(firstRow1);
+ * scan1.setStopRow(lastRow1);
+ * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
+ * scans.add(scan1);
+ *
+ * Scan scan2 = new Scan();
+ * scan2.setStartRow(firstRow2);
+ * scan2.setStopRow(lastRow2);
+ * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
+ * scans.add(scan2);
+ *
+ * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
+ *     IntWritable.class, job);
+ * </pre>
+ */
+@InterfaceAudience.Public
+public class MultiTableInputFormat extends MultiTableInputFormatBase implements
+    Configurable {
+
+  /** Job parameter that specifies the scan list. */
+  public static final String SCANS = "hbase.mapreduce.scans";
+
+  /** The configuration. */
+  private Configuration conf = null;
+
+  /**
+   * Returns the current configuration.
+   *
+   * @return The current configuration.
+   * @see org.apache.hadoop.conf.Configurable#getConf()
+   */
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /**
+   * Sets the configuration. This is used to set the details for the tables to
+   *  be scanned.
+   *
+   * @param configuration The configuration to set.
+   * @see org.apache.hadoop.conf.Configurable#setConf(
+   *        org.apache.hadoop.conf.Configuration)
+   */
+  @Override
+  public void setConf(Configuration configuration) {
+    this.conf = configuration;
+    String[] rawScans = conf.getStrings(SCANS);
+    if (rawScans.length <= 0) {
+      throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
+          + SCANS);
+    }
+    List<Scan> scans = new ArrayList<>();
+
+    for (int i = 0; i < rawScans.length; i++) {
+      try {
+        scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
+      } catch (IOException e) {
+        throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
+      }
+    }
+    this.setScans(scans);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
new file mode 100644
index 0000000..5d541a6
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+/**
+ * A base for {@link MultiTableInputFormat}s. Receives a list of
+ * {@link Scan} instances that define the input tables and
+ * filters etc. Subclasses may use other TableRecordReader implementations.
+ */
+@InterfaceAudience.Public
+public abstract class MultiTableInputFormatBase extends
+    InputFormat<ImmutableBytesWritable, Result> {
+
+  private static final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
+
+  /** Holds the set of scans used to define the input. */
+  private List<Scan> scans;
+
+  /** The reader scanning the table, can be a custom one. */
+  private TableRecordReader tableRecordReader = null;
+
+  /**
+   * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
+   * default.
+   *
+   * @param split The split to work with.
+   * @param context The current context.
+   * @return The newly created record reader.
+   * @throws IOException When creating the reader fails.
+   * @throws InterruptedException when record reader initialization fails
+   * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
+   *      org.apache.hadoop.mapreduce.InputSplit,
+   *      org.apache.hadoop.mapreduce.TaskAttemptContext)
+   */
+  @Override
+  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+      InputSplit split, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    TableSplit tSplit = (TableSplit) split;
+    LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
+
+    if (tSplit.getTable() == null) {
+      throw new IOException("Cannot create a record reader because of a"
+          + " previous error. Please look at the previous logs lines from"
+          + " the task's full log for more details.");
+    }
+    final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
+    Table table = connection.getTable(tSplit.getTable());
+
+    if (this.tableRecordReader == null) {
+      this.tableRecordReader = new TableRecordReader();
+    }
+    final TableRecordReader trr = this.tableRecordReader;
+
+    try {
+      Scan sc = tSplit.getScan();
+      sc.setStartRow(tSplit.getStartRow());
+      sc.setStopRow(tSplit.getEndRow());
+      trr.setScan(sc);
+      trr.setTable(table);
+      return new RecordReader<ImmutableBytesWritable, Result>() {
+
+        @Override
+        public void close() throws IOException {
+          trr.close();
+          connection.close();
+        }
+
+        @Override
+        public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+          return trr.getCurrentKey();
+        }
+
+        @Override
+        public Result getCurrentValue() throws IOException, InterruptedException {
+          return trr.getCurrentValue();
+        }
+
+        @Override
+        public float getProgress() throws IOException, InterruptedException {
+          return trr.getProgress();
+        }
+
+        @Override
+        public void initialize(InputSplit inputsplit, TaskAttemptContext context)
+            throws IOException, InterruptedException {
+          trr.initialize(inputsplit, context);
+        }
+
+        @Override
+        public boolean nextKeyValue() throws IOException, InterruptedException {
+          return trr.nextKeyValue();
+        }
+      };
+    } catch (IOException ioe) {
+      // If there is an exception make sure that all
+      // resources are closed and released.
+      trr.close();
+      connection.close();
+      throw ioe;
+    }
+  }
+
+  /**
+   * Calculates the splits that will serve as input for the map tasks. The
+   * number of splits matches the number of regions in a table.
+   *
+   * @param context The current job context.
+   * @return The list of input splits.
+   * @throws IOException When creating the list of splits fails.
+   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException {
+    if (scans.isEmpty()) {
+      throw new IOException("No scans were provided.");
+    }
+
+    Map<TableName, List<Scan>> tableMaps = new HashMap<>();
+    for (Scan scan : scans) {
+      byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
+      if (tableNameBytes == null)
+        throw new IOException("A scan object did not have a table name");
+
+      TableName tableName = TableName.valueOf(tableNameBytes);
+
+      List<Scan> scanList = tableMaps.get(tableName);
+      if (scanList == null) {
+        scanList = new ArrayList<>();
+        tableMaps.put(tableName, scanList);
+      }
+      scanList.add(scan);
+    }
+
+    List<InputSplit> splits = new ArrayList<>();
+    Iterator iter = tableMaps.entrySet().iterator();
+    while (iter.hasNext()) {
+      Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
+      TableName tableName = entry.getKey();
+      List<Scan> scanList = entry.getValue();
+
+      try (Connection conn = ConnectionFactory.createConnection(context.getConfiguration());
+        Table table = conn.getTable(tableName);
+        RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+        RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
+                regionLocator, conn.getAdmin());
+        Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
+        for (Scan scan : scanList) {
+          if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
+            throw new IOException("Expecting at least one region for table : "
+                    + tableName.getNameAsString());
+          }
+          int count = 0;
+
+          byte[] startRow = scan.getStartRow();
+          byte[] stopRow = scan.getStopRow();
+
+          for (int i = 0; i < keys.getFirst().length; i++) {
+            if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+              continue;
+            }
+
+            if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+                    Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+                    (stopRow.length == 0 || Bytes.compareTo(stopRow,
+                            keys.getFirst()[i]) > 0)) {
+              byte[] splitStart = startRow.length == 0 ||
+                      Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
+                      keys.getFirst()[i] : startRow;
+              byte[] splitStop = (stopRow.length == 0 ||
+                      Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
+                      keys.getSecond()[i].length > 0 ?
+                      keys.getSecond()[i] : stopRow;
+
+              HRegionLocation hregionLocation = regionLocator.getRegionLocation(
+                      keys.getFirst()[i], false);
+              String regionHostname = hregionLocation.getHostname();
+              HRegionInfo regionInfo = hregionLocation.getRegionInfo();
+              String encodedRegionName = regionInfo.getEncodedName();
+              long regionSize = sizeCalculator.getRegionSize(
+                      regionInfo.getRegionName());
+
+              TableSplit split = new TableSplit(table.getName(),
+                      scan, splitStart, splitStop, regionHostname,
+                      encodedRegionName, regionSize);
+
+              splits.add(split);
+
+              if (LOG.isDebugEnabled())
+                LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+            }
+          }
+        }
+      }
+    }
+
+    return splits;
+  }
+
+  /**
+   * Test if the given region is to be included in the InputSplit while
+   * splitting the regions of a table.
+   * <p>
+   * This optimization is effective when there is a specific reasoning to
+   * exclude an entire region from the M-R job, (and hence, not contributing to
+   * the InputSplit), given the start and end keys of the same. <br>
+   * Useful when we need to remember the last-processed top record and revisit
+   * the [last, current) interval for M-R processing, continuously. In addition
+   * to reducing InputSplits, reduces the load on the region server as well, due
+   * to the ordering of the keys. <br>
+   * <br>
+   * Note: It is possible that <code>endKey.length() == 0 </code> , for the last
+   * (recent) region. <br>
+   * Override this method, if you want to bulk exclude regions altogether from
+   * M-R. By default, no region is excluded( i.e. all regions are included).
+   *
+   * @param startKey Start key of the region
+   * @param endKey End key of the region
+   * @return true, if this region needs to be included as part of the input
+   *         (default).
+   */
+  protected boolean includeRegionInSplit(final byte[] startKey,
+      final byte[] endKey) {
+    return true;
+  }
+
+  /**
+   * Allows subclasses to get the list of {@link Scan} objects.
+   */
+  protected List<Scan> getScans() {
+    return this.scans;
+  }
+
+  /**
+   * Allows subclasses to set the list of {@link Scan} objects.
+   *
+   * @param scans The list of {@link Scan} used to define the input
+   */
+  protected void setScans(List<Scan> scans) {
+    this.scans = scans;
+  }
+
+  /**
+   * Allows subclasses to set the {@link TableRecordReader}.
+   *
+   * @param tableRecordReader A different {@link TableRecordReader}
+   *          implementation.
+   */
+  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+    this.tableRecordReader = tableRecordReader;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
new file mode 100644
index 0000000..4cc784f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
@@ -0,0 +1,176 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * <p>
+ * Hadoop output format that writes to one or more HBase tables. The key is
+ * taken to be the table name while the output value <em>must</em> be either a
+ * {@link Put} or a {@link Delete} instance. All tables must already exist, and
+ * all Puts and Deletes must reference only valid column families.
+ * </p>
+ *
+ * <p>
+ * Write-ahead logging (WAL) for Puts can be disabled by setting
+ * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
+ * Note that disabling write-ahead logging is only appropriate for jobs where
+ * loss of data due to region server failure can be tolerated (for example,
+ * because it is easy to rerun a bulk import).
+ * </p>
+ */
+@InterfaceAudience.Public
+public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
+  /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (WAL) */
+  public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
+  /** Property value to use write-ahead logging */
+  public static final boolean WAL_ON = true;
+  /** Property value to disable write-ahead logging */
+  public static final boolean WAL_OFF = false;
+  /**
+   * Record writer for outputting to multiple HTables.
+   */
+  protected static class MultiTableRecordWriter extends
+      RecordWriter<ImmutableBytesWritable, Mutation> {
+    private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
+    Connection connection;
+    Map<ImmutableBytesWritable, BufferedMutator> mutatorMap = new HashMap<>();
+    Configuration conf;
+    boolean useWriteAheadLogging;
+
+    /**
+     * @param conf
+     *          HBaseConfiguration to used
+     * @param useWriteAheadLogging
+     *          whether to use write ahead logging. This can be turned off (
+     *          <tt>false</tt>) to improve performance when bulk loading data.
+     */
+    public MultiTableRecordWriter(Configuration conf,
+        boolean useWriteAheadLogging) throws IOException {
+      LOG.debug("Created new MultiTableRecordReader with WAL "
+          + (useWriteAheadLogging ? "on" : "off"));
+      this.conf = conf;
+      this.useWriteAheadLogging = useWriteAheadLogging;
+    }
+
+    /**
+     * @param tableName
+     *          the name of the table, as a string
+     * @return the named mutator
+     * @throws IOException
+     *           if there is a problem opening a table
+     */
+    BufferedMutator getBufferedMutator(ImmutableBytesWritable tableName) throws IOException {
+      if(this.connection == null){
+        this.connection = ConnectionFactory.createConnection(conf);
+      }
+      if (!mutatorMap.containsKey(tableName)) {
+        LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
+
+        BufferedMutator mutator =
+            connection.getBufferedMutator(TableName.valueOf(tableName.get()));
+        mutatorMap.put(tableName, mutator);
+      }
+      return mutatorMap.get(tableName);
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException {
+      for (BufferedMutator mutator : mutatorMap.values()) {
+        mutator.close();
+      }
+      if (connection != null) {
+        connection.close();
+      }
+    }
+
+    /**
+     * Writes an action (Put or Delete) to the specified table.
+     *
+     * @param tableName
+     *          the table being updated.
+     * @param action
+     *          the update, either a put or a delete.
+     * @throws IllegalArgumentException
+     *          if the action is not a put or a delete.
+     */
+    @Override
+    public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
+      BufferedMutator mutator = getBufferedMutator(tableName);
+      // The actions are not immutable, so we defensively copy them
+      if (action instanceof Put) {
+        Put put = new Put((Put) action);
+        put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
+            : Durability.SKIP_WAL);
+        mutator.mutate(put);
+      } else if (action instanceof Delete) {
+        Delete delete = new Delete((Delete) action);
+        mutator.mutate(delete);
+      } else
+        throw new IllegalArgumentException(
+            "action must be either Delete or Put");
+    }
+  }
+
+  @Override
+  public void checkOutputSpecs(JobContext context) throws IOException,
+      InterruptedException {
+    // we can't know ahead of time if it's going to blow up when the user
+    // passes a table name that doesn't exist, so nothing useful here.
+  }
+
+  @Override
+  public OutputCommitter getOutputCommitter(TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    return new TableOutputCommitter();
+  }
+
+  @Override
+  public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    Configuration conf = context.getConfiguration();
+    return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
+        conf.getBoolean(WAL_PROPERTY, WAL_ON));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..e7538a8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * MultiTableSnapshotInputFormat generalizes
+ * {@link TableSnapshotInputFormat}
+ * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
+ * configured for each.
+ * Internally, the input format delegates to
+ * {@link TableSnapshotInputFormat}
+ * and thus has the same performance advantages;
+ * see {@link TableSnapshotInputFormat} for
+ * more details.
+ * Usage is similar to TableSnapshotInputFormat, with the following exception:
+ * initMultiTableSnapshotMapperJob takes in a map
+ * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
+ * scan will be applied;
+ * the overall dataset for the job is defined by the concatenation of the regions and tables
+ * included in each snapshot/scan
+ * pair.
+ * {@link TableMapReduceUtil#initMultiTableSnapshotMapperJob
+ * (java.util.Map, Class, Class, Class, org.apache.hadoop.mapreduce.Job, boolean, org.apache
+ * .hadoop.fs.Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
+ *    "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
+ *    "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
+ * );
+ * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
+ * TableMapReduceUtil.initTableSnapshotMapperJob(
+ *     snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
+ *      MyMapOutputValueWritable.class, job, true, restoreDir);
+ * }
+ * </pre>
+ * Internally, this input format restores each snapshot into a subdirectory of the given tmp
+ * directory. Input splits and
+ * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * (one per region).
+ * See {@link TableSnapshotInputFormat} for more notes on
+ * permissioning; the
+ * same caveats apply here.
+ *
+ * @see TableSnapshotInputFormat
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat {
+
+  private final MultiTableSnapshotInputFormatImpl delegate;
+
+  public MultiTableSnapshotInputFormat() {
+    this.delegate = new MultiTableSnapshotInputFormatImpl();
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext jobContext)
+      throws IOException, InterruptedException {
+    List<TableSnapshotInputFormatImpl.InputSplit> splits =
+        delegate.getSplits(jobContext.getConfiguration());
+    List<InputSplit> rtn = Lists.newArrayListWithCapacity(splits.size());
+
+    for (TableSnapshotInputFormatImpl.InputSplit split : splits) {
+      rtn.add(new TableSnapshotInputFormat.TableSnapshotRegionSplit(split));
+    }
+
+    return rtn;
+  }
+
+  public static void setInput(Configuration configuration,
+      Map<String, Collection<Scan>> snapshotScans, Path tmpRestoreDir) throws IOException {
+    new MultiTableSnapshotInputFormatImpl().setInput(configuration, snapshotScans, tmpRestoreDir);
+  }
+}

[39/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
new file mode 100644
index 0000000..9cccf8c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
@@ -0,0 +1,386 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Tool used to copy a table to another one which can be on a different setup.
+ * It is also configurable with a start and time as well as a specification
+ * of the region server implementation if different from the local cluster.
+ */
+@InterfaceAudience.Public
+public class CopyTable extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(CopyTable.class);
+
+  final static String NAME = "copytable";
+  long startTime = 0;
+  long endTime = HConstants.LATEST_TIMESTAMP;
+  int batch = Integer.MAX_VALUE;
+  int cacheRow = -1;
+  int versions = -1;
+  String tableName = null;
+  String startRow = null;
+  String stopRow = null;
+  String dstTableName = null;
+  String peerAddress = null;
+  String families = null;
+  boolean allCells = false;
+  static boolean shuffle = false;
+
+  boolean bulkload = false;
+  Path bulkloadDir = null;
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public Job createSubmittableJob(String[] args)
+  throws IOException {
+    if (!doCommandLine(args)) {
+      return null;
+    }
+
+    Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJarByClass(CopyTable.class);
+    Scan scan = new Scan();
+
+    scan.setBatch(batch);
+    scan.setCacheBlocks(false);
+
+    if (cacheRow > 0) {
+      scan.setCaching(cacheRow);
+    } else {
+      scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
+    }
+
+    scan.setTimeRange(startTime, endTime);
+
+    if (allCells) {
+      scan.setRaw(true);
+    }
+    if (shuffle) {
+      job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");
+    }
+    if (versions >= 0) {
+      scan.setMaxVersions(versions);
+    }
+
+    if (startRow != null) {
+      scan.setStartRow(Bytes.toBytesBinary(startRow));
+    }
+
+    if (stopRow != null) {
+      scan.setStopRow(Bytes.toBytesBinary(stopRow));
+    }
+
+    if(families != null) {
+      String[] fams = families.split(",");
+      Map<String,String> cfRenameMap = new HashMap<>();
+      for(String fam : fams) {
+        String sourceCf;
+        if(fam.contains(":")) {
+            // fam looks like "sourceCfName:destCfName"
+            String[] srcAndDest = fam.split(":", 2);
+            sourceCf = srcAndDest[0];
+            String destCf = srcAndDest[1];
+            cfRenameMap.put(sourceCf, destCf);
+        } else {
+            // fam is just "sourceCf"
+            sourceCf = fam;
+        }
+        scan.addFamily(Bytes.toBytes(sourceCf));
+      }
+      Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
+    }
+    job.setNumReduceTasks(0);
+
+    if (bulkload) {
+      TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
+        null, job);
+
+      // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
+      TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
+
+      FileSystem fs = FileSystem.get(getConf());
+      Random rand = new Random();
+      Path root = new Path(fs.getWorkingDirectory(), "copytable");
+      fs.mkdirs(root);
+      while (true) {
+        bulkloadDir = new Path(root, "" + rand.nextLong());
+        if (!fs.exists(bulkloadDir)) {
+          break;
+        }
+      }
+
+      System.out.println("HFiles will be stored at " + this.bulkloadDir);
+      HFileOutputFormat2.setOutputPath(job, bulkloadDir);
+      try (Connection conn = ConnectionFactory.createConnection(getConf());
+          Admin admin = conn.getAdmin()) {
+        HFileOutputFormat2.configureIncrementalLoadMap(job,
+            admin.listTableDescriptor((TableName.valueOf(dstTableName))));
+      }
+    } else {
+      TableMapReduceUtil.initTableMapperJob(tableName, scan,
+        Import.Importer.class, null, null, job);
+
+      TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
+        null);
+    }
+
+    return job;
+  }
+
+  /*
+   * @param errorMsg Error message.  Can be null.
+   */
+  private static void printUsage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
+        "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
+    System.err.println();
+    System.err.println("Options:");
+    System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
+    System.err.println("              specify if different from current cluster");
+    System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
+    System.err.println(" startrow     the start row");
+    System.err.println(" stoprow      the stop row");
+    System.err.println(" starttime    beginning of the time range (unixtime in millis)");
+    System.err.println("              without endtime means from starttime to forever");
+    System.err.println(" endtime      end of the time range.  Ignored if no starttime specified.");
+    System.err.println(" versions     number of cell versions to copy");
+    System.err.println(" new.name     new table's name");
+    System.err.println(" peer.adr     Address of the peer cluster given in the format");
+    System.err.println("              hbase.zookeeper.quorum:hbase.zookeeper.client"
+        + ".port:zookeeper.znode.parent");
+    System.err.println(" families     comma-separated list of families to copy");
+    System.err.println("              To copy from cf1 to cf2, give sourceCfName:destCfName. ");
+    System.err.println("              To keep the same name, just give \"cfName\"");
+    System.err.println(" all.cells    also copy delete markers and deleted cells");
+    System.err.println(" bulkload     Write input into HFiles and bulk load to the destination "
+        + "table");
+    System.err.println();
+    System.err.println("Args:");
+    System.err.println(" tablename    Name of the table to copy");
+    System.err.println();
+    System.err.println("Examples:");
+    System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
+    System.err.println(" $ hbase " +
+        "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
+        "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
+    System.err.println("For performance consider the following general option:\n"
+        + "  It is recommended that you set the following to >=100. A higher value uses more memory but\n"
+        + "  decreases the round trip time to the server and may increase performance.\n"
+        + "    -Dhbase.client.scanner.caching=100\n"
+        + "  The following should always be set to false, to prevent writing data twice, which may produce \n"
+        + "  inaccurate results.\n"
+        + "    -Dmapreduce.map.speculative=false");
+  }
+
+  private boolean doCommandLine(final String[] args) {
+    // Process command-line args. TODO: Better cmd-line processing
+    // (but hopefully something not as painful as cli options).
+    if (args.length < 1) {
+      printUsage(null);
+      return false;
+    }
+    try {
+      for (int i = 0; i < args.length; i++) {
+        String cmd = args[i];
+        if (cmd.equals("-h") || cmd.startsWith("--h")) {
+          printUsage(null);
+          return false;
+        }
+
+        final String startRowArgKey = "--startrow=";
+        if (cmd.startsWith(startRowArgKey)) {
+          startRow = cmd.substring(startRowArgKey.length());
+          continue;
+        }
+
+        final String stopRowArgKey = "--stoprow=";
+        if (cmd.startsWith(stopRowArgKey)) {
+          stopRow = cmd.substring(stopRowArgKey.length());
+          continue;
+        }
+
+        final String startTimeArgKey = "--starttime=";
+        if (cmd.startsWith(startTimeArgKey)) {
+          startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+          continue;
+        }
+
+        final String endTimeArgKey = "--endtime=";
+        if (cmd.startsWith(endTimeArgKey)) {
+          endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+          continue;
+        }
+
+        final String batchArgKey = "--batch=";
+        if (cmd.startsWith(batchArgKey)) {
+          batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
+          continue;
+        }
+
+        final String cacheRowArgKey = "--cacheRow=";
+        if (cmd.startsWith(cacheRowArgKey)) {
+          cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));
+          continue;
+        }
+
+        final String versionsArgKey = "--versions=";
+        if (cmd.startsWith(versionsArgKey)) {
+          versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+          continue;
+        }
+
+        final String newNameArgKey = "--new.name=";
+        if (cmd.startsWith(newNameArgKey)) {
+          dstTableName = cmd.substring(newNameArgKey.length());
+          continue;
+        }
+
+        final String peerAdrArgKey = "--peer.adr=";
+        if (cmd.startsWith(peerAdrArgKey)) {
+          peerAddress = cmd.substring(peerAdrArgKey.length());
+          continue;
+        }
+
+        final String familiesArgKey = "--families=";
+        if (cmd.startsWith(familiesArgKey)) {
+          families = cmd.substring(familiesArgKey.length());
+          continue;
+        }
+
+        if (cmd.startsWith("--all.cells")) {
+          allCells = true;
+          continue;
+        }
+
+        if (cmd.startsWith("--bulkload")) {
+          bulkload = true;
+          continue;
+        }
+
+        if (cmd.startsWith("--shuffle")) {
+          shuffle = true;
+          continue;
+        }
+
+        if (i == args.length-1) {
+          tableName = cmd;
+        } else {
+          printUsage("Invalid argument '" + cmd + "'");
+          return false;
+        }
+      }
+      if (dstTableName == null && peerAddress == null) {
+        printUsage("At least a new table name or a " +
+            "peer address must be specified");
+        return false;
+      }
+      if ((endTime != 0) && (startTime > endTime)) {
+        printUsage("Invalid time range filter: starttime=" + startTime + " >  endtime=" + endTime);
+        return false;
+      }
+
+      if (bulkload && peerAddress != null) {
+        printUsage("Remote bulkload is not supported!");
+        return false;
+      }
+
+      // set dstTableName if necessary
+      if (dstTableName == null) {
+        dstTableName = tableName;
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      printUsage("Can't start because " + e.getMessage());
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Main entry point.
+   *
+   * @param args  The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(HBaseConfiguration.create(), new CopyTable(), args);
+    System.exit(ret);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    Job job = createSubmittableJob(args);
+    if (job == null) return 1;
+    if (!job.waitForCompletion(true)) {
+      LOG.info("Map-reduce job failed!");
+      if (bulkload) {
+        LOG.info("Files are not bulkloaded!");
+      }
+      return 1;
+    }
+    int code = 0;
+    if (bulkload) {
+      code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
+          this.dstTableName});
+      if (code == 0) {
+        // bulkloadDir is deleted only LoadIncrementalHFiles was successful so that one can rerun
+        // LoadIncrementalHFiles.
+        FileSystem fs = FileSystem.get(this.getConf());
+        if (!fs.delete(this.bulkloadDir, true)) {
+          LOG.error("Deleting folder " + bulkloadDir + " failed!");
+          code = 1;
+        }
+      }
+    }
+    return code;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
new file mode 100644
index 0000000..004ee5c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_FAMILY;
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_NAME;
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABEL_QUALIFIER;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.visibility.Authorizations;
+import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
+import org.apache.hadoop.hbase.security.visibility.VisibilityLabelOrdinalProvider;
+import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * This implementation creates tags by expanding expression using label ordinal. Labels will be
+ * serialized in sorted order of it's ordinal.
+ */
+@InterfaceAudience.Private
+public class DefaultVisibilityExpressionResolver implements VisibilityExpressionResolver {
+  private static final Log LOG = LogFactory.getLog(DefaultVisibilityExpressionResolver.class);
+
+  private Configuration conf;
+  private final Map<String, Integer> labels = new HashMap<>();
+
+  @Override
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public void init() {
+    // Reading all the labels and ordinal.
+    // This scan should be done by user with global_admin privileges.. Ensure that it works
+    Table labelsTable = null;
+    Connection connection = null;
+    try {
+      connection = ConnectionFactory.createConnection(conf);
+      try {
+        labelsTable = connection.getTable(LABELS_TABLE_NAME);
+      } catch (IOException e) {
+        LOG.error("Error opening 'labels' table", e);
+        return;
+      }
+      Scan scan = new Scan();
+      scan.setAuthorizations(new Authorizations(VisibilityUtils.SYSTEM_LABEL));
+      scan.addColumn(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
+      ResultScanner scanner = null;
+      try {
+        scanner = labelsTable.getScanner(scan);
+        Result next = null;
+        while ((next = scanner.next()) != null) {
+          byte[] row = next.getRow();
+          byte[] value = next.getValue(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
+          labels.put(Bytes.toString(value), Bytes.toInt(row));
+        }
+      } catch (TableNotFoundException e) {
+        // Table not found. So just return
+        return;
+      } catch (IOException e) {
+        LOG.error("Error scanning 'labels' table", e);
+      } finally {
+        if (scanner != null) scanner.close();
+      }
+    } catch (IOException ioe) {
+      LOG.error("Failed reading 'labels' tags", ioe);
+      return;
+    } finally {
+      if (labelsTable != null) {
+        try {
+          labelsTable.close();
+        } catch (IOException ioe) {
+          LOG.warn("Error closing 'labels' table", ioe);
+        }
+      }
+      if (connection != null)
+        try {
+          connection.close();
+        } catch (IOException ioe) {
+          LOG.warn("Failed close of temporary connection", ioe);
+        }
+    }
+  }
+
+  @Override
+  public List<Tag> createVisibilityExpTags(String visExpression) throws IOException {
+    VisibilityLabelOrdinalProvider provider = new VisibilityLabelOrdinalProvider() {
+      @Override
+      public int getLabelOrdinal(String label) {
+        Integer ordinal = null;
+        ordinal = labels.get(label);
+        if (ordinal != null) {
+          return ordinal.intValue();
+        }
+        return VisibilityConstants.NON_EXIST_LABEL_ORDINAL;
+      }
+
+      @Override
+      public String getLabel(int ordinal) {
+        // Unused
+        throw new UnsupportedOperationException(
+            "getLabel should not be used in VisibilityExpressionResolver");
+      }
+    };
+    return VisibilityUtils.createVisibilityExpTags(visExpression, true, false, null, provider);
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
new file mode 100644
index 0000000..9737b55
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
@@ -0,0 +1,64 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
+import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
+import org.apache.hadoop.util.ProgramDriver;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing
+ * name of job to this main.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+@InterfaceStability.Stable
+public class Driver {
+  /**
+   * @param args
+   * @throws Throwable
+   */
+  public static void main(String[] args) throws Throwable {
+    ProgramDriver pgd = new ProgramDriver();
+
+    pgd.addClass(RowCounter.NAME, RowCounter.class,
+      "Count rows in HBase table.");
+    pgd.addClass(CellCounter.NAME, CellCounter.class,
+      "Count cells in HBase table.");
+    pgd.addClass(Export.NAME, Export.class, "Write table data to HDFS.");
+    pgd.addClass(Import.NAME, Import.class, "Import data written by Export.");
+    pgd.addClass(ImportTsv.NAME, ImportTsv.class, "Import data in TSV format.");
+    pgd.addClass(LoadIncrementalHFiles.NAME, LoadIncrementalHFiles.class,
+                 "Complete a bulk data load.");
+    pgd.addClass(CopyTable.NAME, CopyTable.class,
+        "Export a table from local cluster to peer cluster.");
+    pgd.addClass(VerifyReplication.NAME, VerifyReplication.class, "Compare" +
+        " the data from tables in two different clusters. WARNING: It" +
+        " doesn't work for incrementColumnValues'd cells since the" +
+        " timestamp is changed after being appended to the log.");
+    pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
+    pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
+        " the specific snapshot to a given FileSystem.");
+
+    ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
+      invoke(pgd, new Object[]{args});
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
new file mode 100644
index 0000000..de6cf3a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
@@ -0,0 +1,197 @@
+/**
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Export an HBase table.
+ * Writes content to sequence files up in HDFS.  Use {@link Import} to read it
+ * back in again.
+ */
+@InterfaceAudience.Public
+public class Export extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(Export.class);
+  final static String NAME = "export";
+  final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
+  final static String EXPORT_BATCHING = "hbase.export.scanner.batch";
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param conf  The current configuration.
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public static Job createSubmittableJob(Configuration conf, String[] args)
+  throws IOException {
+    String tableName = args[0];
+    Path outputDir = new Path(args[1]);
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJobName(NAME + "_" + tableName);
+    job.setJarByClass(Export.class);
+    // Set optional scan parameters
+    Scan s = getConfiguredScanForJob(conf, args);
+    IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
+    // No reducers.  Just write straight to output files.
+    job.setNumReduceTasks(0);
+    job.setOutputFormatClass(SequenceFileOutputFormat.class);
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(Result.class);
+    FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
+    return job;
+  }
+
+  private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
+    Scan s = new Scan();
+    // Optional arguments.
+    // Set Scan Versions
+    int versions = args.length > 2? Integer.parseInt(args[2]): 1;
+    s.setMaxVersions(versions);
+    // Set Scan Range
+    long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
+    long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
+    s.setTimeRange(startTime, endTime);
+    // Set cache blocks
+    s.setCacheBlocks(false);
+    // set Start and Stop row
+    if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
+      s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
+    }
+    if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
+      s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
+    }
+    // Set Scan Column Family
+    boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
+    if (raw) {
+      s.setRaw(raw);
+    }
+    for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
+      s.addFamily(Bytes.toBytes(columnFamily));
+    }
+    // Set RowFilter or Prefix Filter if applicable.
+    Filter exportFilter = getExportFilter(args);
+    if (exportFilter!= null) {
+        LOG.info("Setting Scan Filter for Export.");
+      s.setFilter(exportFilter);
+    }
+
+    int batching = conf.getInt(EXPORT_BATCHING, -1);
+    if (batching !=  -1){
+      try {
+        s.setBatch(batching);
+      } catch (IncompatibleFilterException e) {
+        LOG.error("Batching could not be set", e);
+      }
+    }
+    LOG.info("versions=" + versions + ", starttime=" + startTime +
+      ", endtime=" + endTime + ", keepDeletedCells=" + raw);
+    return s;
+  }
+
+  private static Filter getExportFilter(String[] args) {
+    Filter exportFilter = null;
+    String filterCriteria = (args.length > 5) ? args[5]: null;
+    if (filterCriteria == null) return null;
+    if (filterCriteria.startsWith("^")) {
+      String regexPattern = filterCriteria.substring(1, filterCriteria.length());
+      exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
+    } else {
+      exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
+    }
+    return exportFilter;
+  }
+
+  /*
+   * @param errorMsg Error message.  Can be null.
+   */
+  private static void usage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+      "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
+    System.err.println("  Note: -D properties will be applied to the conf used. ");
+    System.err.println("  For example: ");
+    System.err.println("   -D mapreduce.output.fileoutputformat.compress=true");
+    System.err.println("   -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");
+    System.err.println("   -D mapreduce.output.fileoutputformat.compress.type=BLOCK");
+    System.err.println("  Additionally, the following SCAN properties can be specified");
+    System.err.println("  to control/limit what is exported..");
+    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
+    System.err.println("   -D " + RAW_SCAN + "=true");
+    System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");
+    System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");
+    System.err.println("   -D " + JOB_NAME_CONF_KEY
+        + "=jobName - use the specified mapreduce job name for the export");
+    System.err.println("For performance consider the following properties:\n"
+        + "   -Dhbase.client.scanner.caching=100\n"
+        + "   -Dmapreduce.map.speculative=false\n"
+        + "   -Dmapreduce.reduce.speculative=false");
+    System.err.println("For tables with very wide rows consider setting the batch size as below:\n"
+        + "   -D" + EXPORT_BATCHING + "=10");
+  }
+
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 2) {
+      usage("Wrong number of arguments: " + args.length);
+      return -1;
+    }
+    Job job = createSubmittableJob(getConf(), args);
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+
+  /**
+   * Main entry point.
+   * @param args The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);
+    System.exit(errCode);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
new file mode 100644
index 0000000..dc30c6e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
@@ -0,0 +1,177 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Extract grouping columns from input record.
+ */
+@InterfaceAudience.Public
+public class GroupingTableMapper
+extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
+
+  /**
+   * JobConf parameter to specify the columns used to produce the key passed to
+   * collect from the map phase.
+   */
+  public static final String GROUP_COLUMNS =
+    "hbase.mapred.groupingtablemap.columns";
+
+  /** The grouping columns. */
+  protected byte [][] columns;
+  /** The current configuration. */
+  private Configuration conf = null;
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table The table to be processed.
+   * @param scan  The scan with the columns etc.
+   * @param groupColumns  A space separated list of columns used to form the
+   * key used in collect.
+   * @param mapper  The mapper class.
+   * @param job  The current job.
+   * @throws IOException When setting up the job fails.
+   */
+  @SuppressWarnings("unchecked")
+  public static void initJob(String table, Scan scan, String groupColumns,
+    Class<? extends TableMapper> mapper, Job job) throws IOException {
+    TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
+        ImmutableBytesWritable.class, Result.class, job);
+    job.getConfiguration().set(GROUP_COLUMNS, groupColumns);
+  }
+
+  /**
+   * Extract the grouping columns from value to construct a new key. Pass the
+   * new key and value to reduce. If any of the grouping columns are not found
+   * in the value, the record is skipped.
+   *
+   * @param key  The current key.
+   * @param value  The current value.
+   * @param context  The current context.
+   * @throws IOException When writing the record fails.
+   * @throws InterruptedException When the job is aborted.
+   */
+  @Override
+  public void map(ImmutableBytesWritable key, Result value, Context context)
+  throws IOException, InterruptedException {
+    byte[][] keyVals = extractKeyValues(value);
+    if(keyVals != null) {
+      ImmutableBytesWritable tKey = createGroupKey(keyVals);
+      context.write(tKey, value);
+    }
+  }
+
+  /**
+   * Extract columns values from the current record. This method returns
+   * null if any of the columns are not found.
+   * <p>
+   * Override this method if you want to deal with nulls differently.
+   *
+   * @param r  The current values.
+   * @return Array of byte values.
+   */
+  protected byte[][] extractKeyValues(Result r) {
+    byte[][] keyVals = null;
+    ArrayList<byte[]> foundList = new ArrayList<>();
+    int numCols = columns.length;
+    if (numCols > 0) {
+      for (Cell value: r.listCells()) {
+        byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
+            CellUtil.cloneQualifier(value));
+        for (int i = 0; i < numCols; i++) {
+          if (Bytes.equals(column, columns[i])) {
+            foundList.add(CellUtil.cloneValue(value));
+            break;
+          }
+        }
+      }
+      if(foundList.size() == numCols) {
+        keyVals = foundList.toArray(new byte[numCols][]);
+      }
+    }
+    return keyVals;
+  }
+
+  /**
+   * Create a key by concatenating multiple column values.
+   * <p>
+   * Override this function in order to produce different types of keys.
+   *
+   * @param vals  The current key/values.
+   * @return A key generated by concatenating multiple column values.
+   */
+  protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
+    if(vals == null) {
+      return null;
+    }
+    StringBuilder sb =  new StringBuilder();
+    for(int i = 0; i < vals.length; i++) {
+      if(i > 0) {
+        sb.append(" ");
+      }
+      sb.append(Bytes.toString(vals[i]));
+    }
+    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
+  }
+
+  /**
+   * Returns the current configuration.
+   *
+   * @return The current configuration.
+   * @see org.apache.hadoop.conf.Configurable#getConf()
+   */
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /**
+   * Sets the configuration. This is used to set up the grouping details.
+   *
+   * @param configuration  The configuration to set.
+   * @see org.apache.hadoop.conf.Configurable#setConf(
+   *   org.apache.hadoop.conf.Configuration)
+   */
+  @Override
+  public void setConf(Configuration configuration) {
+    this.conf = configuration;
+    String[] cols = conf.get(GROUP_COLUMNS, "").split(" ");
+    columns = new byte[cols.length][];
+    for(int i = 0; i < cols.length; i++) {
+      columns[i] = Bytes.toBytes(cols[i]);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
new file mode 100644
index 0000000..e90d5c1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple MR input format for HFiles.
+ * This code was borrowed from Apache Crunch project.
+ * Updated to the recent version of HBase.
+ */
+public class HFileInputFormat extends FileInputFormat<NullWritable, Cell> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HFileInputFormat.class);
+
+  /**
+   * File filter that removes all "hidden" files. This might be something worth removing from
+   * a more general purpose utility; it accounts for the presence of metadata files created
+   * in the way we're doing exports.
+   */
+  static final PathFilter HIDDEN_FILE_FILTER = new PathFilter() {
+    @Override
+    public boolean accept(Path p) {
+      String name = p.getName();
+      return !name.startsWith("_") && !name.startsWith(".");
+    }
+  };
+
+  /**
+   * Record reader for HFiles.
+   */
+  private static class HFileRecordReader extends RecordReader<NullWritable, Cell> {
+
+    private Reader in;
+    protected Configuration conf;
+    private HFileScanner scanner;
+
+    /**
+     * A private cache of the key value so it doesn't need to be loaded twice from the scanner.
+     */
+    private Cell value = null;
+    private long count;
+    private boolean seeked = false;
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context)
+        throws IOException, InterruptedException {
+      FileSplit fileSplit = (FileSplit) split;
+      conf = context.getConfiguration();
+      Path path = fileSplit.getPath();
+      FileSystem fs = path.getFileSystem(conf);
+      LOG.info("Initialize HFileRecordReader for {}", path);
+      this.in = HFile.createReader(fs, path, conf);
+
+      // The file info must be loaded before the scanner can be used.
+      // This seems like a bug in HBase, but it's easily worked around.
+      this.in.loadFileInfo();
+      this.scanner = in.getScanner(false, false);
+
+    }
+
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      boolean hasNext;
+      if (!seeked) {
+        LOG.info("Seeking to start");
+        hasNext = scanner.seekTo();
+        seeked = true;
+      } else {
+        hasNext = scanner.next();
+      }
+      if (!hasNext) {
+        return false;
+      }
+      value = scanner.getCell();
+      count++;
+      return true;
+    }
+
+    @Override
+    public NullWritable getCurrentKey() throws IOException, InterruptedException {
+      return NullWritable.get();
+    }
+
+    @Override
+    public Cell getCurrentValue() throws IOException, InterruptedException {
+      return value;
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      // This would be inaccurate if KVs are not uniformly-sized or we have performed a seek to
+      // the start row, but better than nothing anyway.
+      return 1.0f * count / in.getEntries();
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (in != null) {
+        in.close();
+        in = null;
+      }
+    }
+  }
+
+  @Override
+  protected List<FileStatus> listStatus(JobContext job) throws IOException {
+    List<FileStatus> result = new ArrayList<FileStatus>();
+
+    // Explode out directories that match the original FileInputFormat filters
+    // since HFiles are written to directories where the
+    // directory name is the column name
+    for (FileStatus status : super.listStatus(job)) {
+      if (status.isDirectory()) {
+        FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
+        for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
+          result.add(match);
+        }
+      } else {
+        result.add(status);
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public RecordReader<NullWritable, Cell> createRecordReader(InputSplit split, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    return new HFileRecordReader();
+  }
+
+  @Override
+  protected boolean isSplitable(JobContext context, Path filename) {
+    // This file isn't splittable.
+    return false;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
new file mode 100644
index 0000000..7fea254
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
@@ -0,0 +1,902 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.InetSocketAddress;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.UUID;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Writes HFiles. Passed Cells must arrive in order.
+ * Writes current time as the sequence id for the file. Sets the major compacted
+ * attribute on created @{link {@link HFile}s. Calling write(null,null) will forcibly roll
+ * all HFiles being written.
+ * <p>
+ * Using this class as part of a MapReduce job is best done
+ * using {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}.
+ */
+@InterfaceAudience.Public
+public class HFileOutputFormat2
+    extends FileOutputFormat<ImmutableBytesWritable, Cell> {
+  private static final Log LOG = LogFactory.getLog(HFileOutputFormat2.class);
+  static class TableInfo {
+    private TableDescriptor tableDesctiptor;
+    private RegionLocator regionLocator;
+
+    public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) {
+      this.tableDesctiptor = tableDesctiptor;
+      this.regionLocator = regionLocator;
+    }
+
+    /**
+     * The modification for the returned HTD doesn't affect the inner TD.
+     * @return A clone of inner table descriptor
+     * @deprecated use {@link #getTableDescriptor}
+     */
+    @Deprecated
+    public HTableDescriptor getHTableDescriptor() {
+      return new HTableDescriptor(tableDesctiptor);
+    }
+
+    public TableDescriptor getTableDescriptor() {
+      return tableDesctiptor;
+    }
+
+    public RegionLocator getRegionLocator() {
+      return regionLocator;
+    }
+  }
+
+  protected static final byte[] tableSeparator = ";".getBytes(StandardCharsets.UTF_8);
+
+  protected static byte[] combineTableNameSuffix(byte[] tableName,
+                                       byte[] suffix ) {
+    return Bytes.add(tableName, tableSeparator, suffix);
+  }
+
+  // The following constants are private since these are used by
+  // HFileOutputFormat2 to internally transfer data between job setup and
+  // reducer run using conf.
+  // These should not be changed by the client.
+  static final String COMPRESSION_FAMILIES_CONF_KEY =
+      "hbase.hfileoutputformat.families.compression";
+  static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
+      "hbase.hfileoutputformat.families.bloomtype";
+  static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
+      "hbase.mapreduce.hfileoutputformat.blocksize";
+  static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
+      "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
+
+  // This constant is public since the client can modify this when setting
+  // up their conf object and thus refer to this symbol.
+  // It is present for backwards compatibility reasons. Use it only to
+  // override the auto-detection of datablock encoding.
+  public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
+      "hbase.mapreduce.hfileoutputformat.datablock.encoding";
+
+  /**
+   * Keep locality while generating HFiles for bulkload. See HBASE-12596
+   */
+  public static final String LOCALITY_SENSITIVE_CONF_KEY =
+      "hbase.bulkload.locality.sensitive.enabled";
+  private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
+  static final String OUTPUT_TABLE_NAME_CONF_KEY =
+      "hbase.mapreduce.hfileoutputformat.table.name";
+  static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY =
+          "hbase.mapreduce.use.multi.table.hfileoutputformat";
+
+  public static final String STORAGE_POLICY_PROPERTY = "hbase.hstore.storagepolicy";
+  public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
+
+  @Override
+  public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
+      final TaskAttemptContext context) throws IOException, InterruptedException {
+    return createRecordWriter(context);
+  }
+
+  protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) {
+    return combineTableNameSuffix(tableName, family);
+  }
+
+  static <V extends Cell> RecordWriter<ImmutableBytesWritable, V>
+      createRecordWriter(final TaskAttemptContext context)
+          throws IOException {
+
+    // Get the path of the temporary output file
+    final Path outputPath = FileOutputFormat.getOutputPath(context);
+    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
+    final Configuration conf = context.getConfiguration();
+    final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
+    final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
+    if (writeTableNames==null || writeTableNames.isEmpty()) {
+      throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY
+              + " cannot be empty");
+    }
+    final FileSystem fs = outputDir.getFileSystem(conf);
+    // These configs. are from hbase-*.xml
+    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
+        HConstants.DEFAULT_MAX_FILE_SIZE);
+    // Invented config.  Add to hbase-*.xml if other than default compression.
+    final String defaultCompressionStr = conf.get("hfile.compression",
+        Compression.Algorithm.NONE.getName());
+    final Algorithm defaultCompression = HFileWriterImpl
+        .compressionByName(defaultCompressionStr);
+    final boolean compactionExclude = conf.getBoolean(
+        "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
+
+    final Set<String> allTableNames = Arrays.stream(writeTableNames.split(
+            Bytes.toString(tableSeparator))).collect(Collectors.toSet());
+
+    // create a map from column family to the compression algorithm
+    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
+    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
+    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
+
+    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
+    final Map<byte[], DataBlockEncoding> datablockEncodingMap
+        = createFamilyDataBlockEncodingMap(conf);
+    final DataBlockEncoding overriddenEncoding;
+    if (dataBlockEncodingStr != null) {
+      overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
+    } else {
+      overriddenEncoding = null;
+    }
+
+    return new RecordWriter<ImmutableBytesWritable, V>() {
+      // Map of families to writers and how much has been output on the writer.
+      private final Map<byte[], WriterLength> writers =
+              new TreeMap<>(Bytes.BYTES_COMPARATOR);
+      private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
+      private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTime());
+      private boolean rollRequested = false;
+
+      @Override
+      public void write(ImmutableBytesWritable row, V cell)
+          throws IOException {
+        KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+
+        // null input == user explicitly wants to flush
+        if (row == null && kv == null) {
+          rollWriters();
+          return;
+        }
+
+        byte[] rowKey = CellUtil.cloneRow(kv);
+        long length = kv.getLength();
+        byte[] family = CellUtil.cloneFamily(kv);
+        byte[] tableNameBytes = null;
+        if (writeMultipleTables) {
+          tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get());
+          if (!allTableNames.contains(Bytes.toString(tableNameBytes))) {
+            throw new IllegalArgumentException("TableName '" + Bytes.toString(tableNameBytes) +
+                    "' not" + " expected");
+          }
+        } else {
+          tableNameBytes = writeTableNames.getBytes(StandardCharsets.UTF_8);
+        }
+        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
+        WriterLength wl = this.writers.get(tableAndFamily);
+
+        // If this is a new column family, verify that the directory exists
+        if (wl == null) {
+          Path writerPath = null;
+          if (writeMultipleTables) {
+            writerPath = new Path(outputDir, new Path(Bytes.toString(tableNameBytes), Bytes
+                    .toString(family)));
+          }
+          else {
+            writerPath = new Path(outputDir, Bytes.toString(family));
+          }
+          fs.mkdirs(writerPath);
+          configureStoragePolicy(conf, fs, tableAndFamily, writerPath);
+        }
+
+        // If any of the HFiles for the column families has reached
+        // maxsize, we need to roll all the writers
+        if (wl != null && wl.written + length >= maxsize) {
+          this.rollRequested = true;
+        }
+
+        // This can only happen once a row is finished though
+        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
+          rollWriters();
+        }
+
+        // create a new WAL writer, if necessary
+        if (wl == null || wl.writer == null) {
+          if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
+            HRegionLocation loc = null;
+
+            String tableName = Bytes.toString(tableNameBytes);
+            if (tableName != null) {
+              try (Connection connection = ConnectionFactory.createConnection(conf);
+                     RegionLocator locator =
+                       connection.getRegionLocator(TableName.valueOf(tableName))) {
+                loc = locator.getRegionLocation(rowKey);
+              } catch (Throwable e) {
+                LOG.warn("There's something wrong when locating rowkey: " +
+                  Bytes.toString(rowKey) + " for tablename: " + tableName, e);
+                loc = null;
+              } }
+
+            if (null == loc) {
+              if (LOG.isTraceEnabled()) {
+                LOG.trace("failed to get region location, so use default writer for rowkey: " +
+                  Bytes.toString(rowKey));
+              }
+              wl = getNewWriter(tableNameBytes, family, conf, null);
+            } else {
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("first rowkey: [" + Bytes.toString(rowKey) + "]");
+              }
+              InetSocketAddress initialIsa =
+                  new InetSocketAddress(loc.getHostname(), loc.getPort());
+              if (initialIsa.isUnresolved()) {
+                if (LOG.isTraceEnabled()) {
+                  LOG.trace("failed to resolve bind address: " + loc.getHostname() + ":"
+                      + loc.getPort() + ", so use default writer");
+                }
+                wl = getNewWriter(tableNameBytes, family, conf, null);
+              } else {
+                if (LOG.isDebugEnabled()) {
+                  LOG.debug("use favored nodes writer: " + initialIsa.getHostString());
+                }
+                wl = getNewWriter(tableNameBytes, family, conf, new InetSocketAddress[] { initialIsa
+                });
+              }
+            }
+          } else {
+            wl = getNewWriter(tableNameBytes, family, conf, null);
+          }
+        }
+
+        // we now have the proper WAL writer. full steam ahead
+        kv.updateLatestStamp(this.now);
+        wl.writer.append(kv);
+        wl.written += length;
+
+        // Copy the row so we know when a row transition.
+        this.previousRow = rowKey;
+      }
+
+      private void rollWriters() throws IOException {
+        for (WriterLength wl : this.writers.values()) {
+          if (wl.writer != null) {
+            LOG.info(
+                "Writer=" + wl.writer.getPath() + ((wl.written == 0)? "": ", wrote=" + wl.written));
+            close(wl.writer);
+          }
+          wl.writer = null;
+          wl.written = 0;
+        }
+        this.rollRequested = false;
+      }
+
+      /*
+       * Create a new StoreFile.Writer.
+       * @param family
+       * @return A WriterLength, containing a new StoreFile.Writer.
+       * @throws IOException
+       */
+      @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
+          justification="Not important")
+      private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration
+              conf, InetSocketAddress[] favoredNodes) throws IOException {
+        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
+        Path familydir = new Path(outputDir, Bytes.toString(family));
+        if (writeMultipleTables) {
+          familydir = new Path(outputDir,
+                  new Path(Bytes.toString(tableName), Bytes.toString(family)));
+        }
+        WriterLength wl = new WriterLength();
+        Algorithm compression = compressionMap.get(tableAndFamily);
+        compression = compression == null ? defaultCompression : compression;
+        BloomType bloomType = bloomTypeMap.get(tableAndFamily);
+        bloomType = bloomType == null ? BloomType.NONE : bloomType;
+        Integer blockSize = blockSizeMap.get(tableAndFamily);
+        blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
+        DataBlockEncoding encoding = overriddenEncoding;
+        encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding;
+        encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
+        Configuration tempConf = new Configuration(conf);
+        tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
+        HFileContextBuilder contextBuilder = new HFileContextBuilder()
+                                    .withCompression(compression)
+                                    .withChecksumType(HStore.getChecksumType(conf))
+                                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
+                                    .withBlockSize(blockSize);
+
+        if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
+          contextBuilder.withIncludesTags(true);
+        }
+
+        contextBuilder.withDataBlockEncoding(encoding);
+        HFileContext hFileContext = contextBuilder.build();
+        if (null == favoredNodes) {
+          wl.writer =
+              new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), fs)
+                  .withOutputDir(familydir).withBloomType(bloomType)
+                  .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext).build();
+        } else {
+          wl.writer =
+              new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
+                  .withOutputDir(familydir).withBloomType(bloomType)
+                  .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext)
+                  .withFavoredNodes(favoredNodes).build();
+        }
+
+        this.writers.put(tableAndFamily, wl);
+        return wl;
+      }
+
+      private void close(final StoreFileWriter w) throws IOException {
+        if (w != null) {
+          w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
+              Bytes.toBytes(System.currentTimeMillis()));
+          w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
+              Bytes.toBytes(context.getTaskAttemptID().toString()));
+          w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
+              Bytes.toBytes(true));
+          w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
+              Bytes.toBytes(compactionExclude));
+          w.appendTrackedTimestampsToMetadata();
+          w.close();
+        }
+      }
+
+      @Override
+      public void close(TaskAttemptContext c)
+      throws IOException, InterruptedException {
+        for (WriterLength wl: this.writers.values()) {
+          close(wl.writer);
+        }
+      }
+    };
+  }
+
+  /**
+   * Configure block storage policy for CF after the directory is created.
+   */
+  static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
+      byte[] tableAndFamily, Path cfPath) {
+    if (null == conf || null == fs || null == tableAndFamily || null == cfPath) {
+      return;
+    }
+
+    String policy =
+        conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily),
+          conf.get(STORAGE_POLICY_PROPERTY));
+    FSUtils.setStoragePolicy(fs, cfPath, policy);
+  }
+
+  /*
+   * Data structure to hold a Writer and amount of data written on it.
+   */
+  static class WriterLength {
+    long written = 0;
+    StoreFileWriter writer = null;
+  }
+
+  /**
+   * Return the start keys of all of the regions in this table,
+   * as a list of ImmutableBytesWritable.
+   */
+  private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators,
+                                                                 boolean writeMultipleTables)
+          throws IOException {
+
+    ArrayList<ImmutableBytesWritable> ret = new ArrayList<>();
+    for(RegionLocator regionLocator : regionLocators)
+    {
+      TableName tableName = regionLocator.getName();
+      LOG.info("Looking up current regions for table " + tableName);
+      byte[][] byteKeys = regionLocator.getStartKeys();
+      for (byte[] byteKey : byteKeys) {
+        byte[] fullKey = byteKey; //HFileOutputFormat2 use case
+        if (writeMultipleTables)
+        {
+          //MultiTableHFileOutputFormat use case
+          fullKey = combineTableNameSuffix(tableName.getName(), byteKey);
+        }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("SplitPoint startkey for table [" + tableName + "]: [" + Bytes.toStringBinary
+                  (fullKey) + "]");
+        }
+        ret.add(new ImmutableBytesWritable(fullKey));
+      }
+    }
+    return ret;
+  }
+
+  /**
+   * Write out a {@link SequenceFile} that can be read by
+   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
+   */
+  @SuppressWarnings("deprecation")
+  private static void writePartitions(Configuration conf, Path partitionsPath,
+      List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException {
+    LOG.info("Writing partition information to " + partitionsPath);
+    if (startKeys.isEmpty()) {
+      throw new IllegalArgumentException("No regions passed");
+    }
+
+    // We're generating a list of split points, and we don't ever
+    // have keys < the first region (which has an empty start key)
+    // so we need to remove it. Otherwise we would end up with an
+    // empty reducer with index 0
+    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
+    ImmutableBytesWritable first = sorted.first();
+    if (writeMultipleTables) {
+      first = new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first
+              ().get()));
+    }
+    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
+      throw new IllegalArgumentException(
+          "First region of table should have empty start key. Instead has: "
+          + Bytes.toStringBinary(first.get()));
+    }
+    sorted.remove(sorted.first());
+
+    // Write the actual file
+    FileSystem fs = partitionsPath.getFileSystem(conf);
+    SequenceFile.Writer writer = SequenceFile.createWriter(
+      fs, conf, partitionsPath, ImmutableBytesWritable.class,
+      NullWritable.class);
+
+    try {
+      for (ImmutableBytesWritable startKey : sorted) {
+        writer.append(startKey, NullWritable.get());
+      }
+    } finally {
+      writer.close();
+    }
+  }
+
+  /**
+   * Configure a MapReduce Job to perform an incremental load into the given
+   * table. This
+   * <ul>
+   *   <li>Inspects the table to configure a total order partitioner</li>
+   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
+   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
+   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
+   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
+   *     PutSortReducer)</li>
+   * </ul>
+   * The user should be sure to set the map output value class to either KeyValue or Put before
+   * running this function.
+   */
+  public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
+      throws IOException {
+    configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+  }
+
+  /**
+   * Configure a MapReduce Job to perform an incremental load into the given
+   * table. This
+   * <ul>
+   *   <li>Inspects the table to configure a total order partitioner</li>
+   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
+   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
+   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
+   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
+   *     PutSortReducer)</li>
+   * </ul>
+   * The user should be sure to set the map output value class to either KeyValue or Put before
+   * running this function.
+   */
+  public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor,
+      RegionLocator regionLocator) throws IOException {
+    ArrayList<TableInfo> singleTableInfo = new ArrayList<>();
+    singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator));
+    configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class);
+  }
+
+  static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, Class<? extends OutputFormat<?, ?>> cls) throws IOException {
+    Configuration conf = job.getConfiguration();
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(KeyValue.class);
+    job.setOutputFormatClass(cls);
+
+    if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) {
+      throw new IllegalArgumentException("Duplicate entries found in TableInfo argument");
+    }
+    boolean writeMultipleTables = false;
+    if (MultiTableHFileOutputFormat.class.equals(cls)) {
+      writeMultipleTables = true;
+      conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true);
+    }
+    // Based on the configured map output class, set the correct reducer to properly
+    // sort the incoming values.
+    // TODO it would be nice to pick one or the other of these formats.
+    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
+      job.setReducerClass(KeyValueSortReducer.class);
+    } else if (Put.class.equals(job.getMapOutputValueClass())) {
+      job.setReducerClass(PutSortReducer.class);
+    } else if (Text.class.equals(job.getMapOutputValueClass())) {
+      job.setReducerClass(TextSortReducer.class);
+    } else {
+      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
+    }
+
+    conf.setStrings("io.serializations", conf.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+        KeyValueSerialization.class.getName());
+
+    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
+      LOG.info("bulkload locality sensitive enabled");
+    }
+
+    /* Now get the region start keys for every table required */
+    List<String> allTableNames = new ArrayList<>(multiTableInfo.size());
+    List<RegionLocator> regionLocators = new ArrayList<>( multiTableInfo.size());
+    List<TableDescriptor> tableDescriptors = new ArrayList<>( multiTableInfo.size());
+
+    for( TableInfo tableInfo : multiTableInfo )
+    {
+      regionLocators.add(tableInfo.getRegionLocator());
+      allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString());
+      tableDescriptors.add(tableInfo.getTableDescriptor());
+    }
+    // Record tablenames for creating writer by favored nodes, and decoding compression, block size and other attributes of columnfamily per table
+    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, StringUtils.join(allTableNames, Bytes
+            .toString(tableSeparator)));
+    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocators, writeMultipleTables);
+    // Use table's region boundaries for TOP split points.
+    LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
+        "to match current region count for all tables");
+    job.setNumReduceTasks(startKeys.size());
+
+    configurePartitioner(job, startKeys, writeMultipleTables);
+    // Set compression algorithms based on column families
+
+    conf.set(COMPRESSION_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(compressionDetails,
+            tableDescriptors));
+    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(blockSizeDetails,
+            tableDescriptors));
+    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomTypeDetails,
+            tableDescriptors));
+    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+            serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors));
+
+    TableMapReduceUtil.addDependencyJars(job);
+    TableMapReduceUtil.initCredentials(job);
+    LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
+  }
+
+  public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
+      IOException {
+    Configuration conf = job.getConfiguration();
+
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(KeyValue.class);
+    job.setOutputFormatClass(HFileOutputFormat2.class);
+
+    ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
+    singleTableDescriptor.add(tableDescriptor);
+
+    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
+    // Set compression algorithms based on column families
+    conf.set(COMPRESSION_FAMILIES_CONF_KEY,
+        serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
+    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
+        serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
+    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
+        serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
+    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+        serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
+
+    TableMapReduceUtil.addDependencyJars(job);
+    TableMapReduceUtil.initCredentials(job);
+    LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
+  }
+
+  /**
+   * Runs inside the task to deserialize column family to compression algorithm
+   * map from the configuration.
+   *
+   * @param conf to read the serialized values from
+   * @return a map from column family to the configured compression algorithm
+   */
+  @VisibleForTesting
+  static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
+      conf) {
+    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+        COMPRESSION_FAMILIES_CONF_KEY);
+    Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+      Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
+      compressionMap.put(e.getKey(), algorithm);
+    }
+    return compressionMap;
+  }
+
+  /**
+   * Runs inside the task to deserialize column family to bloom filter type
+   * map from the configuration.
+   *
+   * @param conf to read the serialized values from
+   * @return a map from column family to the the configured bloom filter type
+   */
+  @VisibleForTesting
+  static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
+    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+        BLOOM_TYPE_FAMILIES_CONF_KEY);
+    Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+      BloomType bloomType = BloomType.valueOf(e.getValue());
+      bloomTypeMap.put(e.getKey(), bloomType);
+    }
+    return bloomTypeMap;
+  }
+
+  /**
+   * Runs inside the task to deserialize column family to block size
+   * map from the configuration.
+   *
+   * @param conf to read the serialized values from
+   * @return a map from column family to the configured block size
+   */
+  @VisibleForTesting
+  static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
+    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+        BLOCK_SIZE_FAMILIES_CONF_KEY);
+    Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+      Integer blockSize = Integer.parseInt(e.getValue());
+      blockSizeMap.put(e.getKey(), blockSize);
+    }
+    return blockSizeMap;
+  }
+
+  /**
+   * Runs inside the task to deserialize column family to data block encoding
+   * type map from the configuration.
+   *
+   * @param conf to read the serialized values from
+   * @return a map from column family to HFileDataBlockEncoder for the
+   *         configured data block type for the family
+   */
+  @VisibleForTesting
+  static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
+      Configuration conf) {
+    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+        DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
+    Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+      encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
+    }
+    return encoderMap;
+  }
+
+
+  /**
+   * Run inside the task to deserialize column family to given conf value map.
+   *
+   * @param conf to read the serialized values from
+   * @param confName conf key to read from the configuration
+   * @return a map of column family to the given configuration value
+   */
+  private static Map<byte[], String> createFamilyConfValueMap(
+      Configuration conf, String confName) {
+    Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+    String confVal = conf.get(confName, "");
+    for (String familyConf : confVal.split("&")) {
+      String[] familySplit = familyConf.split("=");
+      if (familySplit.length != 2) {
+        continue;
+      }
+      try {
+        confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(StandardCharsets.UTF_8),
+            URLDecoder.decode(familySplit[1], "UTF-8"));
+      } catch (UnsupportedEncodingException e) {
+        // will not happen with UTF-8 encoding
+        throw new AssertionError(e);
+      }
+    }
+    return confValMap;
+  }
+
+  /**
+   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
+   * <code>splitPoints</code>. Cleans up the partitions file after job exists.
+   */
+  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
+          writeMultipleTables)
+      throws IOException {
+    Configuration conf = job.getConfiguration();
+    // create the partitions file
+    FileSystem fs = FileSystem.get(conf);
+    String hbaseTmpFsDir =
+        conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
+          HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
+    Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
+    fs.makeQualified(partitionsPath);
+    writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
+    fs.deleteOnExit(partitionsPath);
+
+    // configure job to use it
+    job.setPartitionerClass(TotalOrderPartitioner.class);
+    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
+  }
+
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
+  @VisibleForTesting
+  static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, List<TableDescriptor> allTables)
+      throws UnsupportedEncodingException {
+    StringBuilder attributeValue = new StringBuilder();
+    int i = 0;
+    for (TableDescriptor tableDescriptor : allTables) {
+      if (tableDescriptor == null) {
+        // could happen with mock table instance
+        // CODEREVIEW: Can I set an empty string in conf if mock table instance?
+        return "";
+      }
+      for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) {
+        if (i++ > 0) {
+          attributeValue.append('&');
+        }
+        attributeValue.append(URLEncoder.encode(
+            Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), familyDescriptor.getName())),
+            "UTF-8"));
+        attributeValue.append('=');
+        attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8"));
+      }
+    }
+    // Get rid of the last ampersand
+    return attributeValue.toString();
+  }
+
+  /**
+   * Serialize column family to compression algorithm map to configuration.
+   * Invoked while configuring the MR job for incremental load.
+   *
+   * @param tableDescriptor to read the properties from
+   * @param conf to persist serialized values into
+   * @throws IOException
+   *           on failure to read column family descriptors
+   */
+  @VisibleForTesting
+  static Function<ColumnFamilyDescriptor, String> compressionDetails = familyDescriptor ->
+          familyDescriptor.getCompressionType().getName();
+
+  /**
+   * Serialize column family to block size map to configuration. Invoked while
+   * configuring the MR job for incremental load.
+   *
+   * @param tableDescriptor
+   *          to read the properties from
+   * @param conf
+   *          to persist serialized values into
+   *
+   * @throws IOException
+   *           on failure to read column family descriptors
+   */
+  @VisibleForTesting
+  static Function<ColumnFamilyDescriptor, String> blockSizeDetails = familyDescriptor -> String
+          .valueOf(familyDescriptor.getBlocksize());
+
+  /**
+   * Serialize column family to bloom type map to configuration. Invoked while
+   * configuring the MR job for incremental load.
+   *
+   * @param tableDescriptor
+   *          to read the properties from
+   * @param conf
+   *          to persist serialized values into
+   *
+   * @throws IOException
+   *           on failure to read column family descriptors
+   */
+  @VisibleForTesting
+  static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> {
+    String bloomType = familyDescriptor.getBloomFilterType().toString();
+    if (bloomType == null) {
+      bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name();
+    }
+    return bloomType;
+  };
+
+  /**
+   * Serialize column family to data block encoding map to configuration.
+   * Invoked while configuring the MR job for incremental load.
+   *
+   * @param tableDescriptor
+   *          to read the properties from
+   * @param conf
+   *          to persist serialized values into
+   * @throws IOException
+   *           on failure to read column family descriptors
+   */
+  @VisibleForTesting
+  static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> {
+    DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
+    if (encoding == null) {
+      encoding = DataBlockEncoding.NONE;
+    }
+    return encoding.toString();
+  };
+
+}

[06/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
deleted file mode 100644
index dc59817..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
+++ /dev/null
@@ -1,727 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeepDeletedCells;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterBase;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.Import.KeyValueImporter;
-import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * Tests the table import and table export MR job functionality
- */
-@Category({VerySlowMapReduceTests.class, MediumTests.class})
-public class TestImportExport {
-  private static final Log LOG = LogFactory.getLog(TestImportExport.class);
-  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1");
-  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2");
-  private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3");
-  private static final String FAMILYA_STRING = "a";
-  private static final String FAMILYB_STRING = "b";
-  private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
-  private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
-  private static final byte[] QUAL = Bytes.toBytes("q");
-  private static final String OUTPUT_DIR = "outputdir";
-  private static String FQ_OUTPUT_DIR;
-  private static final String EXPORT_BATCH_SIZE = "100";
-
-  private static long now = System.currentTimeMillis();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    // Up the handlers; this test needs more than usual.
-    UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
-    UTIL.startMiniCluster();
-    FQ_OUTPUT_DIR =
-      new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Rule
-  public final TestName name = new TestName();
-
-  @Before
-  public void announce() {
-    LOG.info("Running " + name.getMethodName());
-  }
-
-  @Before
-  @After
-  public void cleanup() throws Exception {
-    FileSystem fs = FileSystem.get(UTIL.getConfiguration());
-    fs.delete(new Path(OUTPUT_DIR), true);
-  }
-
-  /**
-   * Runs an export job with the specified command line args
-   * @param args
-   * @return true if job completed successfully
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
-  boolean runExport(String[] args) throws Exception {
-    // need to make a copy of the configuration because to make sure different temp dirs are used.
-    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Export(), args);
-    return status == 0;
-  }
-
-  /**
-   * Runs an import job with the specified command line args
-   * @param args
-   * @return true if job completed successfully
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
-  boolean runImport(String[] args) throws Exception {
-    // need to make a copy of the configuration because to make sure different temp dirs are used.
-    int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Import(), args);
-    return status == 0;
-  }
-
-  /**
-   * Test simple replication case with column mapping
-   * @throws Exception
-   */
-  @Test
-  public void testSimpleCase() throws Exception {
-    try (Table t = UTIL.createTable(TableName.valueOf(name.getMethodName()), FAMILYA, 3);) {
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILYA, QUAL, now, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      t.put(p);
-      p = new Put(ROW2);
-      p.addColumn(FAMILYA, QUAL, now, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      t.put(p);
-      p = new Put(ROW3);
-      p.addColumn(FAMILYA, QUAL, now, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      t.put(p);
-    }
-
-      String[] args = new String[] {
-          // Only export row1 & row2.
-          "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1",
-          "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3",
-          name.getMethodName(),
-          FQ_OUTPUT_DIR,
-          "1000", // max number of key versions per key to export
-      };
-      assertTrue(runExport(args));
-
-      final String IMPORT_TABLE = name.getMethodName() + "import";
-      try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), FAMILYB, 3);) {
-        args = new String[] {
-            "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
-            IMPORT_TABLE,
-            FQ_OUTPUT_DIR
-        };
-        assertTrue(runImport(args));
-
-        Get g = new Get(ROW1);
-        g.setMaxVersions();
-        Result r = t.get(g);
-        assertEquals(3, r.size());
-        g = new Get(ROW2);
-        g.setMaxVersions();
-        r = t.get(g);
-        assertEquals(3, r.size());
-        g = new Get(ROW3);
-        r = t.get(g);
-        assertEquals(0, r.size());
-      }
-  }
-
-  /**
-   * Test export hbase:meta table
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testMetaExport() throws Exception {
-    String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
-    String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
-    assertTrue(runExport(args));
-  }
-
-  /**
-   * Test import data from 0.94 exported file
-   * @throws Exception
-   */
-  @Test
-  public void testImport94Table() throws Exception {
-    final String name = "exportedTableIn94Format";
-    URL url = TestImportExport.class.getResource(name);
-    File f = new File(url.toURI());
-    if (!f.exists()) {
-      LOG.warn("FAILED TO FIND " + f + "; skipping out on test");
-      return;
-    }
-    assertTrue(f.exists());
-    LOG.info("FILE=" + f);
-    Path importPath = new Path(f.toURI());
-    FileSystem fs = FileSystem.get(UTIL.getConfiguration());
-    fs.copyFromLocalFile(importPath, new Path(FQ_OUTPUT_DIR + Path.SEPARATOR + name));
-    String IMPORT_TABLE = name;
-    try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), Bytes.toBytes("f1"), 3);) {
-      String[] args = new String[] {
-              "-Dhbase.import.version=0.94" ,
-              IMPORT_TABLE, FQ_OUTPUT_DIR
-      };
-      assertTrue(runImport(args));
-      /* exportedTableIn94Format contains 5 rows
-      ROW         COLUMN+CELL
-      r1          column=f1:c1, timestamp=1383766761171, value=val1
-      r2          column=f1:c1, timestamp=1383766771642, value=val2
-      r3          column=f1:c1, timestamp=1383766777615, value=val3
-      r4          column=f1:c1, timestamp=1383766785146, value=val4
-      r5          column=f1:c1, timestamp=1383766791506, value=val5
-      */
-     assertEquals(5, UTIL.countRows(t));
-    }
-  }
-
-  /**
-   * Test export scanner batching
-   */
-   @Test
-   public void testExportScannerBatching() throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
-    desc.addFamily(new HColumnDescriptor(FAMILYA)
-        .setMaxVersions(1)
-    );
-    UTIL.getAdmin().createTable(desc);
-    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
-
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILYA, QUAL, now, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
-      t.put(p);
-
-      String[] args = new String[] {
-          "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE,  // added scanner batching arg.
-          name.getMethodName(),
-          FQ_OUTPUT_DIR
-      };
-      assertTrue(runExport(args));
-
-      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
-      fs.delete(new Path(FQ_OUTPUT_DIR), true);
-    }
-  }
-
-  @Test
-  public void testWithDeletes() throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
-    desc.addFamily(new HColumnDescriptor(FAMILYA)
-        .setMaxVersions(5)
-        .setKeepDeletedCells(KeepDeletedCells.TRUE)
-    );
-    UTIL.getAdmin().createTable(desc);
-    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
-
-      Put p = new Put(ROW1);
-      p.addColumn(FAMILYA, QUAL, now, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
-      p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
-      t.put(p);
-
-      Delete d = new Delete(ROW1, now+3);
-      t.delete(d);
-      d = new Delete(ROW1);
-      d.addColumns(FAMILYA, QUAL, now+2);
-      t.delete(d);
-    }
-
-    String[] args = new String[] {
-        "-D" + Export.RAW_SCAN + "=true",
-        name.getMethodName(),
-        FQ_OUTPUT_DIR,
-        "1000", // max number of key versions per key to export
-    };
-    assertTrue(runExport(args));
-
-    final String IMPORT_TABLE = name.getMethodName() + "import";
-    desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
-    desc.addFamily(new HColumnDescriptor(FAMILYA)
-        .setMaxVersions(5)
-        .setKeepDeletedCells(KeepDeletedCells.TRUE)
-    );
-    UTIL.getAdmin().createTable(desc);
-    try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
-      args = new String[] {
-          IMPORT_TABLE,
-          FQ_OUTPUT_DIR
-      };
-      assertTrue(runImport(args));
-
-      Scan s = new Scan();
-      s.setMaxVersions();
-      s.setRaw(true);
-      ResultScanner scanner = t.getScanner(s);
-      Result r = scanner.next();
-      Cell[] res = r.rawCells();
-      assertTrue(CellUtil.isDeleteFamily(res[0]));
-      assertEquals(now+4, res[1].getTimestamp());
-      assertEquals(now+3, res[2].getTimestamp());
-      assertTrue(CellUtil.isDelete(res[3]));
-      assertEquals(now+2, res[4].getTimestamp());
-      assertEquals(now+1, res[5].getTimestamp());
-      assertEquals(now, res[6].getTimestamp());
-    }
-  }
-
-
-  @Test
-  public void testWithMultipleDeleteFamilyMarkersOfSameRowSameFamily() throws Exception {
-    final TableName exportTable = TableName.valueOf(name.getMethodName());
-    HTableDescriptor desc = new HTableDescriptor(exportTable);
-    desc.addFamily(new HColumnDescriptor(FAMILYA)
-        .setMaxVersions(5)
-        .setKeepDeletedCells(KeepDeletedCells.TRUE)
-    );
-    UTIL.getAdmin().createTable(desc);
-
-    Table exportT = UTIL.getConnection().getTable(exportTable);
-
-    //Add first version of QUAL
-    Put p = new Put(ROW1);
-    p.addColumn(FAMILYA, QUAL, now, QUAL);
-    exportT.put(p);
-
-    //Add Delete family marker
-    Delete d = new Delete(ROW1, now+3);
-    exportT.delete(d);
-
-    //Add second version of QUAL
-    p = new Put(ROW1);
-    p.addColumn(FAMILYA, QUAL, now + 5, "s".getBytes());
-    exportT.put(p);
-
-    //Add second Delete family marker
-    d = new Delete(ROW1, now+7);
-    exportT.delete(d);
-
-
-    String[] args = new String[] {
-        "-D" + Export.RAW_SCAN + "=true", exportTable.getNameAsString(),
-        FQ_OUTPUT_DIR,
-        "1000", // max number of key versions per key to export
-    };
-    assertTrue(runExport(args));
-
-    final String importTable = name.getMethodName() + "import";
-    desc = new HTableDescriptor(TableName.valueOf(importTable));
-    desc.addFamily(new HColumnDescriptor(FAMILYA)
-        .setMaxVersions(5)
-        .setKeepDeletedCells(KeepDeletedCells.TRUE)
-    );
-    UTIL.getAdmin().createTable(desc);
-
-    Table importT = UTIL.getConnection().getTable(TableName.valueOf(importTable));
-    args = new String[] {
-        importTable,
-        FQ_OUTPUT_DIR
-    };
-    assertTrue(runImport(args));
-
-    Scan s = new Scan();
-    s.setMaxVersions();
-    s.setRaw(true);
-
-    ResultScanner importedTScanner = importT.getScanner(s);
-    Result importedTResult = importedTScanner.next();
-
-    ResultScanner exportedTScanner = exportT.getScanner(s);
-    Result  exportedTResult =  exportedTScanner.next();
-    try {
-      Result.compareResults(exportedTResult, importedTResult);
-    } catch (Exception e) {
-      fail("Original and imported tables data comparision failed with error:"+e.getMessage());
-    } finally {
-      exportT.close();
-      importT.close();
-    }
-  }
-
-  /**
-   * Create a simple table, run an Export Job on it, Import with filtering on,  verify counts,
-   * attempt with invalid values.
-   */
-  @Test
-  public void testWithFilter() throws Exception {
-    // Create simple table to export
-    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
-    desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
-    UTIL.getAdmin().createTable(desc);
-    Table exportTable = UTIL.getConnection().getTable(desc.getTableName());
-
-    Put p1 = new Put(ROW1);
-    p1.addColumn(FAMILYA, QUAL, now, QUAL);
-    p1.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-    p1.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-    p1.addColumn(FAMILYA, QUAL, now + 3, QUAL);
-    p1.addColumn(FAMILYA, QUAL, now + 4, QUAL);
-
-    // Having another row would actually test the filter.
-    Put p2 = new Put(ROW2);
-    p2.addColumn(FAMILYA, QUAL, now, QUAL);
-
-    exportTable.put(Arrays.asList(p1, p2));
-
-    // Export the simple table
-    String[] args = new String[] { name.getMethodName(), FQ_OUTPUT_DIR, "1000" };
-    assertTrue(runExport(args));
-
-    // Import to a new table
-    final String IMPORT_TABLE = name.getMethodName() + "import";
-    desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
-    desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
-    UTIL.getAdmin().createTable(desc);
-
-    Table importTable = UTIL.getConnection().getTable(desc.getTableName());
-    args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
-        "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE,
-        FQ_OUTPUT_DIR,
-        "1000" };
-    assertTrue(runImport(args));
-
-    // get the count of the source table for that time range
-    PrefixFilter filter = new PrefixFilter(ROW1);
-    int count = getCount(exportTable, filter);
-
-    Assert.assertEquals("Unexpected row count between export and import tables", count,
-      getCount(importTable, null));
-
-    // and then test that a broken command doesn't bork everything - easier here because we don't
-    // need to re-run the export job
-
-    args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
-        "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", name.getMethodName(),
-        FQ_OUTPUT_DIR, "1000" };
-    assertFalse(runImport(args));
-
-    // cleanup
-    exportTable.close();
-    importTable.close();
-  }
-
-  /**
-   * Count the number of keyvalues in the specified table for the given timerange
-   * @param start
-   * @param end
-   * @param table
-   * @return
-   * @throws IOException
-   */
-  private int getCount(Table table, Filter filter) throws IOException {
-    Scan scan = new Scan();
-    scan.setFilter(filter);
-    ResultScanner results = table.getScanner(scan);
-    int count = 0;
-    for (Result res : results) {
-      count += res.size();
-    }
-    results.close();
-    return count;
-  }
-
-  /**
-   * test main method. Import should print help and call System.exit
-   */
-  @Test
-  public void testImportMain() throws Exception {
-    PrintStream oldPrintStream = System.err;
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    String[] args = {};
-    System.setErr(new PrintStream(data));
-    try {
-      System.setErr(new PrintStream(data));
-      Import.main(args);
-      fail("should be SecurityException");
-    } catch (SecurityException e) {
-      assertEquals(-1, newSecurityManager.getExitCode());
-      assertTrue(data.toString().contains("Wrong number of arguments:"));
-      assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
-      assertTrue(data.toString().contains("-Dimport.filter.class=<name of filter class>"));
-      assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
-      assertTrue(data.toString().contains("-Dmapreduce.reduce.speculative=false"));
-    } finally {
-      System.setErr(oldPrintStream);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-  }
-
-  /**
-   * test main method. Export should print help and call System.exit
-   */
-  @Test
-  public void testExportMain() throws Exception {
-    PrintStream oldPrintStream = System.err;
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    String[] args = {};
-    System.setErr(new PrintStream(data));
-    try {
-      System.setErr(new PrintStream(data));
-      Export.main(args);
-      fail("should be SecurityException");
-    } catch (SecurityException e) {
-      assertEquals(-1, newSecurityManager.getExitCode());
-      String errMsg = data.toString();
-      assertTrue(errMsg.contains("Wrong number of arguments:"));
-      assertTrue(errMsg.contains(
-              "Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
-              "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]"));
-      assertTrue(
-        errMsg.contains("-D hbase.mapreduce.scan.column.family=<family1>,<family2>, ..."));
-      assertTrue(errMsg.contains("-D hbase.mapreduce.include.deleted.rows=true"));
-      assertTrue(errMsg.contains("-Dhbase.client.scanner.caching=100"));
-      assertTrue(errMsg.contains("-Dmapreduce.map.speculative=false"));
-      assertTrue(errMsg.contains("-Dmapreduce.reduce.speculative=false"));
-      assertTrue(errMsg.contains("-Dhbase.export.scanner.batch=10"));
-    } finally {
-      System.setErr(oldPrintStream);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-  }
-
-  /**
-   * Test map method of Importer
-   */
-  @SuppressWarnings({ "unchecked", "rawtypes" })
-  @Test
-  public void testKeyValueImporter() throws Exception {
-    KeyValueImporter importer = new KeyValueImporter();
-    Configuration configuration = new Configuration();
-    Context ctx = mock(Context.class);
-    when(ctx.getConfiguration()).thenReturn(configuration);
-
-    doAnswer(new Answer<Void>() {
-
-      @Override
-      public Void answer(InvocationOnMock invocation) throws Throwable {
-        ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
-        KeyValue key = (KeyValue) invocation.getArguments()[1];
-        assertEquals("Key", Bytes.toString(writer.get()));
-        assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
-        return null;
-      }
-    }).when(ctx).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
-
-    importer.setup(ctx);
-    Result value = mock(Result.class);
-    KeyValue[] keys = {
-        new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
-            Bytes.toBytes("value")),
-        new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
-            Bytes.toBytes("value1")) };
-    when(value.rawCells()).thenReturn(keys);
-    importer.map(new ImmutableBytesWritable(Bytes.toBytes("Key")), value, ctx);
-
-  }
-
-  /**
-   * Test addFilterAndArguments method of Import This method set couple
-   * parameters into Configuration
-   */
-  @Test
-  public void testAddFilterAndArguments() throws IOException {
-    Configuration configuration = new Configuration();
-
-    List<String> args = new ArrayList<>();
-    args.add("param1");
-    args.add("param2");
-
-    Import.addFilterAndArguments(configuration, FilterBase.class, args);
-    assertEquals("org.apache.hadoop.hbase.filter.FilterBase",
-        configuration.get(Import.FILTER_CLASS_CONF_KEY));
-    assertEquals("param1,param2", configuration.get(Import.FILTER_ARGS_CONF_KEY));
-  }
-
-  @Test
-  public void testDurability() throws Exception {
-    // Create an export table.
-    String exportTableName = name.getMethodName() + "export";
-    try (Table exportTable = UTIL.createTable(TableName.valueOf(exportTableName), FAMILYA, 3);) {
-
-      // Insert some data
-      Put put = new Put(ROW1);
-      put.addColumn(FAMILYA, QUAL, now, QUAL);
-      put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      exportTable.put(put);
-
-      put = new Put(ROW2);
-      put.addColumn(FAMILYA, QUAL, now, QUAL);
-      put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
-      put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
-      exportTable.put(put);
-
-      // Run the export
-      String[] args = new String[] { exportTableName, FQ_OUTPUT_DIR, "1000"};
-      assertTrue(runExport(args));
-
-      // Create the table for import
-      String importTableName = name.getMethodName() + "import1";
-      Table importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
-
-      // Register the wal listener for the import table
-      HRegionInfo region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
-          .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
-      TableWALActionListener walListener = new TableWALActionListener(region);
-      WAL wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
-      wal.registerWALActionsListener(walListener);
-
-      // Run the import with SKIP_WAL
-      args =
-          new String[] { "-D" + Import.WAL_DURABILITY + "=" + Durability.SKIP_WAL.name(),
-              importTableName, FQ_OUTPUT_DIR };
-      assertTrue(runImport(args));
-      //Assert that the wal is not visisted
-      assertTrue(!walListener.isWALVisited());
-      //Ensure that the count is 2 (only one version of key value is obtained)
-      assertTrue(getCount(importTable, null) == 2);
-
-      // Run the import with the default durability option
-      importTableName = name.getMethodName() + "import2";
-      importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
-      region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
-          .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
-      wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
-      walListener = new TableWALActionListener(region);
-      wal.registerWALActionsListener(walListener);
-      args = new String[] { importTableName, FQ_OUTPUT_DIR };
-      assertTrue(runImport(args));
-      //Assert that the wal is visisted
-      assertTrue(walListener.isWALVisited());
-      //Ensure that the count is 2 (only one version of key value is obtained)
-      assertTrue(getCount(importTable, null) == 2);
-    }
-  }
-
-  /**
-   * This listens to the {@link #visitLogEntryBeforeWrite(HRegionInfo, WALKey, WALEdit)} to
-   * identify that an entry is written to the Write Ahead Log for the given table.
-   */
-  private static class TableWALActionListener extends WALActionsListener.Base {
-
-    private HRegionInfo regionInfo;
-    private boolean isVisited = false;
-
-    public TableWALActionListener(HRegionInfo region) {
-      this.regionInfo = region;
-    }
-
-    @Override
-    public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) {
-      if (logKey.getTablename().getNameAsString().equalsIgnoreCase(
-          this.regionInfo.getTable().getNameAsString()) && (!logEdit.isMetaEdit())) {
-        isVisited = true;
-      }
-    }
-
-    public boolean isWALVisited() {
-      return isVisited;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
deleted file mode 100644
index 6d9b05b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.coprocessor.RegionObserver;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.regionserver.Region;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithOperationAttributes implements Configurable {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private static final Log LOG = LogFactory.getLog(TestImportTSVWithOperationAttributes.class);
-  protected static final String NAME = TestImportTsv.class.getSimpleName();
-  protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
-  /**
-   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
-   * false.
-   */
-  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
-  /**
-   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
-   */
-  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
-  private static Configuration conf;
-
-  private static final String TEST_ATR_KEY = "test";
-
-  private final String FAMILY = "FAM";
-
-  @Rule
-  public TestName name = new TestName();
-
-  public Configuration getConf() {
-    return util.getConfiguration();
-  }
-
-  public void setConf(Configuration conf) {
-    throw new IllegalArgumentException("setConf not supported");
-  }
-
-  @BeforeClass
-  public static void provisionCluster() throws Exception {
-    conf = util.getConfiguration();
-    conf.set("hbase.coprocessor.master.classes", OperationAttributesTestController.class.getName());
-    conf.set("hbase.coprocessor.region.classes", OperationAttributesTestController.class.getName());
-    util.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void releaseCluster() throws Exception {
-    util.shutdownMiniCluster();
-  }
-
-  @Test
-  public void testMROnTable() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY
-            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest=>myvalue\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1, true);
-    util.deleteTable(tableName);
-  }
-
-  @Test
-  public void testMROnTableWithInvalidOperationAttr() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY
-            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest1=>myvalue\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1, false);
-    util.deleteTable(tableName);
-  }
-
-  /**
-   * Run an ImportTsv job and perform basic validation on the results. Returns
-   * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
-   * for further validation as necessary. This method is static to insure
-   * non-reliance on instance's util/conf facilities.
-   * 
-   * @param args
-   *          Any arguments to pass BEFORE inputFile path is appended.
-   * @param dataAvailable
-   * @return The Tool instance used to run the test.
-   */
-  private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
-      int valueMultiplier, boolean dataAvailable) throws Exception {
-    String table = args[args.length - 1];
-    Configuration conf = new Configuration(util.getConfiguration());
-
-    // populate input file
-    FileSystem fs = FileSystem.get(conf);
-    Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
-    FSDataOutputStream op = fs.create(inputPath, true);
-    op.write(Bytes.toBytes(data));
-    op.close();
-    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
-    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
-      LOG.debug("Forcing combiner.");
-      conf.setInt("mapreduce.map.combine.minspills", 1);
-    }
-
-    // run the import
-    List<String> argv = new ArrayList<>(Arrays.asList(args));
-    argv.add(inputPath.toString());
-    Tool tool = new ImportTsv();
-    LOG.debug("Running ImportTsv with arguments: " + argv);
-    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
-
-    validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);
-
-    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
-      LOG.debug("Deleting test subdirectory");
-      util.cleanupDataTestDirOnTestFS(table);
-    }
-    return tool;
-  }
-
-  /**
-   * Confirm ImportTsv via data in online table.
-   * 
-   * @param dataAvailable
-   */
-  private static void validateTable(Configuration conf, TableName tableName, String family,
-      int valueMultiplier, boolean dataAvailable) throws IOException {
-
-    LOG.debug("Validating table.");
-    Connection connection = ConnectionFactory.createConnection(conf);
-    Table table = connection.getTable(tableName);
-    boolean verified = false;
-    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        Scan scan = new Scan();
-        // Scan entire family.
-        scan.addFamily(Bytes.toBytes(family));
-        if (dataAvailable) {
-          ResultScanner resScanner = table.getScanner(scan);
-          for (Result res : resScanner) {
-            LOG.debug("Getting results " + res.size());
-            assertTrue(res.size() == 2);
-            List<Cell> kvs = res.listCells();
-            assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
-            assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
-            assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
-            assertTrue(CellUtil.matchingValue(kvs.get(1),
-                Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
-            // Only one result set is expected, so let it loop.
-            verified = true;
-          }
-        } else {
-          ResultScanner resScanner = table.getScanner(scan);
-          Result[] next = resScanner.next(2);
-          assertEquals(0, next.length);
-          verified = true;
-        }
-
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty. Presume its because updates came in
-        // after the scanner had been opened. Wait a while and retry.
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    table.close();
-    connection.close();
-    assertTrue(verified);
-  }
-
-  public static class OperationAttributesTestController implements RegionObserver {
-
-    @Override
-    public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
-        Durability durability) throws IOException {
-      Region region = e.getEnvironment().getRegion();
-      if (!region.getRegionInfo().isMetaTable()
-          && !region.getRegionInfo().getTable().isSystemTable()) {
-        if (put.getAttribute(TEST_ATR_KEY) != null) {
-          LOG.debug("allow any put to happen " + region.getRegionInfo().getRegionNameAsString());
-        } else {
-          e.bypass();
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
deleted file mode 100644
index 4ab3d29..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.coprocessor.RegionObserver;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.regionserver.Region;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithTTLs implements Configurable {
-
-  protected static final Log LOG = LogFactory.getLog(TestImportTSVWithTTLs.class);
-  protected static final String NAME = TestImportTsv.class.getSimpleName();
-  protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
-  /**
-   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
-   * false.
-   */
-  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
-  /**
-   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
-   */
-  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
-  private final String FAMILY = "FAM";
-  private static Configuration conf;
-
-  @Rule
-  public TestName name = new TestName();
-
-  @Override
-  public Configuration getConf() {
-    return util.getConfiguration();
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    throw new IllegalArgumentException("setConf not supported");
-  }
-
-  @BeforeClass
-  public static void provisionCluster() throws Exception {
-    conf = util.getConfiguration();
-    // We don't check persistence in HFiles in this test, but if we ever do we will
-    // need this where the default hfile version is not 3 (i.e. 0.98)
-    conf.setInt("hfile.format.version", 3);
-    conf.set("hbase.coprocessor.region.classes", TTLCheckingObserver.class.getName());
-    util.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void releaseCluster() throws Exception {
-    util.shutdownMiniCluster();
-  }
-
-  @Test
-  public void testMROnTable() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY
-            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_TTL",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001b1000000\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1);
-    util.deleteTable(tableName);
-  }
-
-  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
-      String[] args, int valueMultiplier) throws Exception {
-    TableName table = TableName.valueOf(args[args.length - 1]);
-    Configuration conf = new Configuration(util.getConfiguration());
-
-    // populate input file
-    FileSystem fs = FileSystem.get(conf);
-    Path inputPath = fs.makeQualified(new Path(util
-        .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
-    FSDataOutputStream op = fs.create(inputPath, true);
-    op.write(Bytes.toBytes(data));
-    op.close();
-    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
-    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
-      LOG.debug("Forcing combiner.");
-      conf.setInt("mapreduce.map.combine.minspills", 1);
-    }
-
-    // run the import
-    List<String> argv = new ArrayList<>(Arrays.asList(args));
-    argv.add(inputPath.toString());
-    Tool tool = new ImportTsv();
-    LOG.debug("Running ImportTsv with arguments: " + argv);
-    try {
-      // Job will fail if observer rejects entries without TTL
-      assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
-    } finally {
-      // Clean up
-      if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
-        LOG.debug("Deleting test subdirectory");
-        util.cleanupDataTestDirOnTestFS(table.getNameAsString());
-      }
-    }
-
-    return tool;
-  }
-
-  public static class TTLCheckingObserver implements RegionObserver {
-
-    @Override
-    public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
-        Durability durability) throws IOException {
-      Region region = e.getEnvironment().getRegion();
-      if (!region.getRegionInfo().isMetaTable()
-          && !region.getRegionInfo().getTable().isSystemTable()) {
-        // The put carries the TTL attribute
-        if (put.getTTL() != Long.MAX_VALUE) {
-          return;
-        }
-        throw new IOException("Operation does not have TTL set");
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
deleted file mode 100644
index 8967ac7..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.visibility.Authorizations;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
-import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
-import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
-import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
-import org.apache.hadoop.hbase.security.visibility.VisibilityController;
-import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithVisibilityLabels implements Configurable {
-
-  private static final Log LOG = LogFactory.getLog(TestImportTSVWithVisibilityLabels.class);
-  protected static final String NAME = TestImportTsv.class.getSimpleName();
-  protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
-  /**
-   * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
-   * false.
-   */
-  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
-  /**
-   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
-   */
-  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
-  private final String FAMILY = "FAM";
-  private final static String TOPSECRET = "topsecret";
-  private final static String PUBLIC = "public";
-  private final static String PRIVATE = "private";
-  private final static String CONFIDENTIAL = "confidential";
-  private final static String SECRET = "secret";
-  private static User SUPERUSER;
-  private static Configuration conf;
-
-  @Rule
-  public TestName name = new TestName();
-
-  @Override
-  public Configuration getConf() {
-    return util.getConfiguration();
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    throw new IllegalArgumentException("setConf not supported");
-  }
-
-  @BeforeClass
-  public static void provisionCluster() throws Exception {
-    conf = util.getConfiguration();
-    SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
-    conf.set("hbase.superuser", "admin,"+User.getCurrent().getName());
-    conf.setInt("hfile.format.version", 3);
-    conf.set("hbase.coprocessor.master.classes", VisibilityController.class.getName());
-    conf.set("hbase.coprocessor.region.classes", VisibilityController.class.getName());
-    conf.setClass(VisibilityUtils.VISIBILITY_LABEL_GENERATOR_CLASS, SimpleScanLabelGenerator.class,
-        ScanLabelGenerator.class);
-    util.startMiniCluster();
-    // Wait for the labels table to become available
-    util.waitTableEnabled(VisibilityConstants.LABELS_TABLE_NAME.getName(), 50000);
-    createLabels();
-  }
-
-  private static void createLabels() throws IOException, InterruptedException {
-    PrivilegedExceptionAction<VisibilityLabelsResponse> action =
-        new PrivilegedExceptionAction<VisibilityLabelsResponse>() {
-      @Override
-      public VisibilityLabelsResponse run() throws Exception {
-        String[] labels = { SECRET, TOPSECRET, CONFIDENTIAL, PUBLIC, PRIVATE };
-        try (Connection conn = ConnectionFactory.createConnection(conf)) {
-          VisibilityClient.addLabels(conn, labels);
-          LOG.info("Added labels ");
-        } catch (Throwable t) {
-          LOG.error("Error in adding labels" , t);
-          throw new IOException(t);
-        }
-        return null;
-      }
-    };
-    SUPERUSER.runAs(action);
-  }
-
-  @AfterClass
-  public static void releaseCluster() throws Exception {
-    util.shutdownMiniCluster();
-  }
-
-  @Test
-  public void testMROnTable() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY
-            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1);
-    util.deleteTable(tableName);
-  }
-
-  @Test
-  public void testMROnTableWithDeletes() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1);
-    issueDeleteAndVerifyData(tableName);
-    util.deleteTable(tableName);
-  }
-
-  private void issueDeleteAndVerifyData(TableName tableName) throws IOException {
-    LOG.debug("Validating table after delete.");
-    Table table = util.getConnection().getTable(tableName);
-    boolean verified = false;
-    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        Delete d = new Delete(Bytes.toBytes("KEY"));
-        d.addFamily(Bytes.toBytes(FAMILY));
-        d.setCellVisibility(new CellVisibility("private&secret"));
-        table.delete(d);
-
-        Scan scan = new Scan();
-        // Scan entire family.
-        scan.addFamily(Bytes.toBytes(FAMILY));
-        scan.setAuthorizations(new Authorizations("secret", "private"));
-        ResultScanner resScanner = table.getScanner(scan);
-        Result[] next = resScanner.next(5);
-        assertEquals(0, next.length);
-        verified = true;
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty. Presume its because updates came in
-        // after the scanner had been opened. Wait a while and retry.
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    table.close();
-    assertTrue(verified);
-  }
-
-  @Test
-  public void testMROnTableWithBulkload() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
-        "-D" + ImportTsv.COLUMNS_CONF_KEY
-            + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1);
-    util.deleteTable(tableName);
-  }
-
-  @Test
-  public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-    String FAMILY = "FAM";
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table.getNameAsString()),"hfiles");
-    // Prepare the arguments required for the test.
-    String[] args =
-        new String[] {
-            "-D" + ImportTsv.MAPPER_CONF_KEY
-                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
-            "-D" + ImportTsv.COLUMNS_CONF_KEY
-                + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
-            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
-            table.getNameAsString()
-            };
-    String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
-    doMROnTableTest(util, FAMILY, data, args, 4);
-    util.deleteTable(table);
-  }
-
-  @Test
-  public void testMRWithOutputFormat() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
-    // Prepare the arguments required for the test.
-    String[] args = new String[] {
-        "-D" + ImportTsv.MAPPER_CONF_KEY
-            + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
-        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-    String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1);
-    util.deleteTable(tableName);
-  }
-
-  @Test
-  public void testBulkOutputWithInvalidLabels() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
-    // Prepare the arguments required for the test.
-    String[] args =
-        new String[] { "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
-            "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-
-    // 2 Data rows, one with valid label and one with invalid label
-    String data =
-        "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1, 2);
-    util.deleteTable(tableName);
-  }
-
-  @Test
-  public void testBulkOutputWithTsvImporterTextMapperWithInvalidLabels() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
-    // Prepare the arguments required for the test.
-    String[] args =
-        new String[] {
-            "-D" + ImportTsv.MAPPER_CONF_KEY
-                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
-            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
-            "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
-            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-
-    // 2 Data rows, one with valid label and one with invalid label
-    String data =
-        "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
-    util.createTable(tableName, FAMILY);
-    doMROnTableTest(util, FAMILY, data, args, 1, 2);
-    util.deleteTable(tableName);
-  }
-
-  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
-      String[] args, int valueMultiplier) throws Exception {
-    return doMROnTableTest(util, family, data, args, valueMultiplier, -1);
-  }
-
-  /**
-   * Run an ImportTsv job and perform basic validation on the results. Returns
-   * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
-   * for further validation as necessary. This method is static to insure
-   * non-reliance on instance's util/conf facilities.
-   *
-   * @param args
-   *          Any arguments to pass BEFORE inputFile path is appended.
-   *
-   * @param expectedKVCount Expected KV count. pass -1 to skip the kvcount check
-   *
-   * @return The Tool instance used to run the test.
-   */
-  protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
-      String[] args, int valueMultiplier,int expectedKVCount) throws Exception {
-    TableName table = TableName.valueOf(args[args.length - 1]);
-    Configuration conf = new Configuration(util.getConfiguration());
-
-    // populate input file
-    FileSystem fs = FileSystem.get(conf);
-    Path inputPath = fs.makeQualified(new Path(util
-        .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
-    FSDataOutputStream op = fs.create(inputPath, true);
-    if (data == null) {
-      data = "KEY\u001bVALUE1\u001bVALUE2\n";
-    }
-    op.write(Bytes.toBytes(data));
-    op.close();
-    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
-    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
-      LOG.debug("Forcing combiner.");
-      conf.setInt("mapreduce.map.combine.minspills", 1);
-    }
-
-    // run the import
-    List<String> argv = new ArrayList<>(Arrays.asList(args));
-    argv.add(inputPath.toString());
-    Tool tool = new ImportTsv();
-    LOG.debug("Running ImportTsv with arguments: " + argv);
-    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
-
-    // Perform basic validation. If the input args did not include
-    // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
-    // Otherwise, validate presence of hfiles.
-    boolean createdHFiles = false;
-    String outputPath = null;
-    for (String arg : argv) {
-      if (arg.contains(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
-        createdHFiles = true;
-        // split '-Dfoo=bar' on '=' and keep 'bar'
-        outputPath = arg.split("=")[1];
-        break;
-      }
-    }
-    LOG.debug("validating the table " + createdHFiles);
-    if (createdHFiles)
-     validateHFiles(fs, outputPath, family,expectedKVCount);
-    else
-      validateTable(conf, table, family, valueMultiplier);
-
-    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
-      LOG.debug("Deleting test subdirectory");
-      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
-    }
-    return tool;
-  }
-
-  /**
-   * Confirm ImportTsv via HFiles on fs.
-   */
-  private static void validateHFiles(FileSystem fs, String outputPath, String family,
-      int expectedKVCount) throws IOException {
-
-    // validate number and content of output columns
-    LOG.debug("Validating HFiles.");
-    Set<String> configFamilies = new HashSet<>();
-    configFamilies.add(family);
-    Set<String> foundFamilies = new HashSet<>();
-    int actualKVCount = 0;
-    for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
-      LOG.debug("The output path has files");
-      String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
-      String cf = elements[elements.length - 1];
-      foundFamilies.add(cf);
-      assertTrue(String.format(
-          "HFile ouput contains a column family (%s) not present in input families (%s)", cf,
-          configFamilies), configFamilies.contains(cf));
-      for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
-        assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),
-            hfile.getLen() > 0);
-        if (expectedKVCount > -1) {
-          actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
-        }
-      }
-    }
-    if (expectedKVCount > -1) {
-      assertTrue(String.format(
-        "KV count in output hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
-        expectedKVCount), actualKVCount == expectedKVCount);
-    }
-  }
-
-  /**
-   * Confirm ImportTsv via data in online table.
-   */
-  private static void validateTable(Configuration conf, TableName tableName, String family,
-      int valueMultiplier) throws IOException {
-
-    LOG.debug("Validating table.");
-    Table table = util.getConnection().getTable(tableName);
-    boolean verified = false;
-    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        Scan scan = new Scan();
-        // Scan entire family.
-        scan.addFamily(Bytes.toBytes(family));
-        scan.setAuthorizations(new Authorizations("secret","private"));
-        ResultScanner resScanner = table.getScanner(scan);
-        Result[] next = resScanner.next(5);
-        assertEquals(1, next.length);
-        for (Result res : resScanner) {
-          LOG.debug("Getting results " + res.size());
-          assertTrue(res.size() == 2);
-          List<Cell> kvs = res.listCells();
-          assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
-          assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
-          assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
-          assertTrue(CellUtil.matchingValue(kvs.get(1),
-              Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
-          // Only one result set is expected, so let it loop.
-        }
-        verified = true;
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty. Presume its because updates came in
-        // after the scanner had been opened. Wait a while and retry.
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    table.close();
-    assertTrue(verified);
-  }
-
-  /**
-   * Method returns the total KVs in given hfile
-   * @param fs File System
-   * @param p HFile path
-   * @return KV count in the given hfile
-   * @throws IOException
-   */
-  private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
-    Configuration conf = util.getConfiguration();
-    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
-    reader.loadFileInfo();
-    HFileScanner scanner = reader.getScanner(false, false);
-    scanner.seekTo();
-    int count = 0;
-    do {
-      count++;
-    } while (scanner.next());
-    reader.close();
-    return count;
-  }
-
-}

[02/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
deleted file mode 100644
index 9d8b8f0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
-import org.apache.hadoop.hbase.util.Bytes;
-
-/**
- * 
- * Just shows a simple example of how the attributes can be extracted and added
- * to the puts
- */
-public class TsvImporterCustomTestMapperForOprAttr extends TsvImporterMapper {
-  @Override
-  protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i)
-      throws BadTsvLineException, IOException {
-    KeyValue kv;
-    kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
-        parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
-        parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
-        parsed.getColumnLength(i));
-    if (parsed.getIndividualAttributes() != null) {
-      String[] attributes = parsed.getIndividualAttributes();
-      for (String attr : attributes) {
-        String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR);
-        if (split == null || split.length <= 1) {
-          throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
-        } else {
-          if (split[0].length() <= 0 || split[1].length() <= 0) {
-            throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
-          }
-          put.setAttribute(split[0], Bytes.toBytes(split[1]));
-        }
-      }
-    }
-    put.add(kv);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index f641887..a81d268 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -65,7 +65,6 @@ import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
 import org.apache.hadoop.hbase.coprocessor.RegionObserver;
 import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
 import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
 import org.apache.hadoop.hbase.master.TableNamespaceManager;
@@ -336,7 +335,7 @@ public class TestNamespaceAuditor {
     byte[] columnFamily = Bytes.toBytes("info");
     HTableDescriptor tableDescOne = new HTableDescriptor(tableTwo);
     tableDescOne.addFamily(new HColumnDescriptor(columnFamily));
-    ADMIN.createTable(tableDescOne, Bytes.toBytes("1"), Bytes.toBytes("2000"), initialRegions);
+    ADMIN.createTable(tableDescOne, Bytes.toBytes("0"), Bytes.toBytes("9"), initialRegions);
     Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
     try (Table table = connection.getTable(tableTwo)) {
       UTIL.loadNumericRows(table, Bytes.toBytes("info"), 1000, 1999);
@@ -354,7 +353,7 @@ public class TestNamespaceAuditor {
     hris = ADMIN.getTableRegions(tableTwo);
     assertEquals(initialRegions - 1, hris.size());
     Collections.sort(hris);
-    ADMIN.split(tableTwo, Bytes.toBytes("500"));
+    ADMIN.split(tableTwo, Bytes.toBytes("3"));
     // Not much we can do here until we have split return a Future.
     Threads.sleep(5000);
     hris = ADMIN.getTableRegions(tableTwo);
@@ -383,8 +382,7 @@ public class TestNamespaceAuditor {
     Collections.sort(hris);
     // verify that we cannot split
     HRegionInfo hriToSplit2 = hris.get(1);
-    ADMIN.split(tableTwo,
-      TableInputFormatBase.getSplitKey(hriToSplit2.getStartKey(), hriToSplit2.getEndKey(), true));
+    ADMIN.split(tableTwo, Bytes.toBytes("6"));
     Thread.sleep(2000);
     assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size());
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
index 0aa39f6..477c870 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
@@ -42,7 +42,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.PerformanceEvaluation;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HTable;
@@ -62,9 +61,11 @@ import org.junit.rules.TestName;
 public class TestHRegionFileSystem {
   private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
   private static final Log LOG = LogFactory.getLog(TestHRegionFileSystem.class);
+
+  public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
   private static final byte[][] FAMILIES = {
-    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")),
-    Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B")) };
+    Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")),
+    Bytes.add(FAMILY_NAME, Bytes.toBytes("-B")) };
   private static final TableName TABLE_NAME = TableName.valueOf("TestTable");
 
   @Rule

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
deleted file mode 100644
index e1cb8ba..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
+++ /dev/null
@@ -1,1059 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.replication;
-
-import static org.junit.Assert.*;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.Waiter;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
-import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.replication.regionserver.Replication;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSource;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.ReplicationTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.JVMClusterUtil;
-import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-@Category({ReplicationTests.class, LargeTests.class})
-public class TestReplicationSmallTests extends TestReplicationBase {
-
-  private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
-  private static final String PEER_ID = "2";
-
-  @Rule
-  public TestName name = new TestName();
-
-  /**
-   * @throws java.lang.Exception
-   */
-  @Before
-  public void setUp() throws Exception {
-    // Starting and stopping replication can make us miss new logs,
-    // rolling like this makes sure the most recent one gets added to the queue
-    for ( JVMClusterUtil.RegionServerThread r :
-        utility1.getHBaseCluster().getRegionServerThreads()) {
-      utility1.getAdmin().rollWALWriter(r.getRegionServer().getServerName());
-    }
-    int rowCount = utility1.countRows(tableName);
-    utility1.deleteTableData(tableName);
-    // truncating the table will send one Delete per row to the slave cluster
-    // in an async fashion, which is why we cannot just call deleteTableData on
-    // utility2 since late writes could make it to the slave in some way.
-    // Instead, we truncate the first table and wait for all the Deletes to
-    // make it to the slave.
-    Scan scan = new Scan();
-    int lastCount = 0;
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for truncate");
-      }
-      ResultScanner scanner = htable2.getScanner(scan);
-      Result[] res = scanner.next(rowCount);
-      scanner.close();
-      if (res.length != 0) {
-        if (res.length < lastCount) {
-          i--; // Don't increment timeout if we make progress
-        }
-        lastCount = res.length;
-        LOG.info("Still got " + res.length + " rows");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        break;
-      }
-    }
-  }
-
-  /**
-   * Verify that version and column delete marker types are replicated
-   * correctly.
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testDeleteTypes() throws Exception {
-    LOG.info("testDeleteTypes");
-    final byte[] v1 = Bytes.toBytes("v1");
-    final byte[] v2 = Bytes.toBytes("v2");
-    final byte[] v3 = Bytes.toBytes("v3");
-    htable1 = utility1.getConnection().getTable(tableName);
-
-    long t = EnvironmentEdgeManager.currentTime();
-    // create three versions for "row"
-    Put put = new Put(row);
-    put.addColumn(famName, row, t, v1);
-    htable1.put(put);
-
-    put = new Put(row);
-    put.addColumn(famName, row, t + 1, v2);
-    htable1.put(put);
-
-    put = new Put(row);
-    put.addColumn(famName, row, t + 2, v3);
-    htable1.put(put);
-
-    Get get = new Get(row);
-    get.setMaxVersions();
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for put replication");
-      }
-      Result res = htable2.get(get);
-      if (res.size() < 3) {
-        LOG.info("Rows not available");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
-        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
-        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
-        break;
-      }
-    }
-    // place a version delete marker (delete last version)
-    Delete d = new Delete(row);
-    d.addColumn(famName, row, t);
-    htable1.delete(d);
-
-    get = new Get(row);
-    get.setMaxVersions();
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for put replication");
-      }
-      Result res = htable2.get(get);
-      if (res.size() > 2) {
-        LOG.info("Version not deleted");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
-        assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
-        break;
-      }
-    }
-
-    // place a column delete marker
-    d = new Delete(row);
-    d.addColumns(famName, row, t+2);
-    htable1.delete(d);
-
-    // now *both* of the remaining version should be deleted
-    // at the replica
-    get = new Get(row);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for del replication");
-      }
-      Result res = htable2.get(get);
-      if (res.size() >= 1) {
-        LOG.info("Rows not deleted");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        break;
-      }
-    }
-  }
-
-  /**
-   * Add a row, check it's replicated, delete it, check's gone
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testSimplePutDelete() throws Exception {
-    LOG.info("testSimplePutDelete");
-    Put put = new Put(row);
-    put.addColumn(famName, row, row);
-
-    htable1 = utility1.getConnection().getTable(tableName);
-    htable1.put(put);
-
-    Get get = new Get(row);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for put replication");
-      }
-      Result res = htable2.get(get);
-      if (res.isEmpty()) {
-        LOG.info("Row not available");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        assertArrayEquals(res.value(), row);
-        break;
-      }
-    }
-
-    Delete del = new Delete(row);
-    htable1.delete(del);
-
-    get = new Get(row);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for del replication");
-      }
-      Result res = htable2.get(get);
-      if (res.size() >= 1) {
-        LOG.info("Row not deleted");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        break;
-      }
-    }
-  }
-
-  /**
-   * Try a small batch upload using the write buffer, check it's replicated
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testSmallBatch() throws Exception {
-    LOG.info("testSmallBatch");
-    // normal Batch tests
-    loadData("", row);
-
-    Scan scan = new Scan();
-
-    ResultScanner scanner1 = htable1.getScanner(scan);
-    Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
-    scanner1.close();
-    assertEquals(NB_ROWS_IN_BATCH, res1.length);
-
-    waitForReplication(NB_ROWS_IN_BATCH, NB_RETRIES);
-  }
-
-  private void waitForReplication(int expectedRows, int retries) throws IOException, InterruptedException {
-    Scan scan;
-    for (int i = 0; i < retries; i++) {
-      scan = new Scan();
-      if (i== retries -1) {
-        fail("Waited too much time for normal batch replication");
-      }
-      ResultScanner scanner = htable2.getScanner(scan);
-      Result[] res = scanner.next(expectedRows);
-      scanner.close();
-      if (res.length != expectedRows) {
-        LOG.info("Only got " + res.length + " rows");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        break;
-      }
-    }
-  }
-
-  private void loadData(String prefix, byte[] row) throws IOException {
-    List<Put> puts = new ArrayList<>(NB_ROWS_IN_BATCH);
-    for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
-      Put put = new Put(Bytes.toBytes(prefix + Integer.toString(i)));
-      put.addColumn(famName, row, row);
-      puts.add(put);
-    }
-    htable1.put(puts);
-  }
-
-  /**
-   * Test disable/enable replication, trying to insert, make sure nothing's
-   * replicated, enable it, the insert should be replicated
-   *
-   * @throws Exception
-   */
-  @Test(timeout = 300000)
-  public void testDisableEnable() throws Exception {
-
-    // Test disabling replication
-    admin.disablePeer(PEER_ID);
-
-    byte[] rowkey = Bytes.toBytes("disable enable");
-    Put put = new Put(rowkey);
-    put.addColumn(famName, row, row);
-    htable1.put(put);
-
-    Get get = new Get(rowkey);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      Result res = htable2.get(get);
-      if (res.size() >= 1) {
-        fail("Replication wasn't disabled");
-      } else {
-        LOG.info("Row not replicated, let's wait a bit more...");
-        Thread.sleep(SLEEP_TIME);
-      }
-    }
-
-    // Test enable replication
-    admin.enablePeer(PEER_ID);
-
-    for (int i = 0; i < NB_RETRIES; i++) {
-      Result res = htable2.get(get);
-      if (res.isEmpty()) {
-        LOG.info("Row not available");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        assertArrayEquals(res.value(), row);
-        return;
-      }
-    }
-    fail("Waited too much time for put replication");
-  }
-
-  /**
-   * Integration test for TestReplicationAdmin, removes and re-add a peer
-   * cluster
-   *
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testAddAndRemoveClusters() throws Exception {
-    LOG.info("testAddAndRemoveClusters");
-    admin.removePeer(PEER_ID);
-    Thread.sleep(SLEEP_TIME);
-    byte[] rowKey = Bytes.toBytes("Won't be replicated");
-    Put put = new Put(rowKey);
-    put.addColumn(famName, row, row);
-    htable1.put(put);
-
-    Get get = new Get(rowKey);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i == NB_RETRIES-1) {
-        break;
-      }
-      Result res = htable2.get(get);
-      if (res.size() >= 1) {
-        fail("Not supposed to be replicated");
-      } else {
-        LOG.info("Row not replicated, let's wait a bit more...");
-        Thread.sleep(SLEEP_TIME);
-      }
-    }
-    ReplicationPeerConfig rpc = new ReplicationPeerConfig();
-    rpc.setClusterKey(utility2.getClusterKey());
-    admin.addPeer(PEER_ID, rpc, null);
-    Thread.sleep(SLEEP_TIME);
-    rowKey = Bytes.toBytes("do rep");
-    put = new Put(rowKey);
-    put.addColumn(famName, row, row);
-    LOG.info("Adding new row");
-    htable1.put(put);
-
-    get = new Get(rowKey);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i==NB_RETRIES-1) {
-        fail("Waited too much time for put replication");
-      }
-      Result res = htable2.get(get);
-      if (res.isEmpty()) {
-        LOG.info("Row not available");
-        Thread.sleep(SLEEP_TIME*i);
-      } else {
-        assertArrayEquals(res.value(), row);
-        break;
-      }
-    }
-  }
-
-
-  /**
-   * Do a more intense version testSmallBatch, one  that will trigger
-   * wal rolling and other non-trivial code paths
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testLoading() throws Exception {
-    LOG.info("Writing out rows to table1 in testLoading");
-    List<Put> puts = new ArrayList<>(NB_ROWS_IN_BIG_BATCH);
-    for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
-      Put put = new Put(Bytes.toBytes(i));
-      put.addColumn(famName, row, row);
-      puts.add(put);
-    }
-    // The puts will be iterated through and flushed only when the buffer
-    // size is reached.
-    htable1.put(puts);
-
-    Scan scan = new Scan();
-
-    ResultScanner scanner = htable1.getScanner(scan);
-    Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
-    scanner.close();
-
-    assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
-
-    LOG.info("Looking in table2 for replicated rows in testLoading");
-    long start = System.currentTimeMillis();
-    // Retry more than NB_RETRIES.  As it was, retries were done in 5 seconds and we'd fail
-    // sometimes.
-    final long retries = NB_RETRIES * 10;
-    for (int i = 0; i < retries; i++) {
-      scan = new Scan();
-      scanner = htable2.getScanner(scan);
-      res = scanner.next(NB_ROWS_IN_BIG_BATCH);
-      scanner.close();
-      if (res.length != NB_ROWS_IN_BIG_BATCH) {
-        if (i == retries - 1) {
-          int lastRow = -1;
-          for (Result result : res) {
-            int currentRow = Bytes.toInt(result.getRow());
-            for (int row = lastRow+1; row < currentRow; row++) {
-              LOG.error("Row missing: " + row);
-            }
-            lastRow = currentRow;
-          }
-          LOG.error("Last row: " + lastRow);
-          fail("Waited too much time for normal batch replication, " +
-            res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
-            (System.currentTimeMillis() - start) + "ms");
-        } else {
-          LOG.info("Only got " + res.length + " rows... retrying");
-          Thread.sleep(SLEEP_TIME);
-        }
-      } else {
-        break;
-      }
-    }
-  }
-
-  /**
-   * Do a small loading into a table, make sure the data is really the same,
-   * then run the VerifyReplication job to check the results. Do a second
-   * comparison where all the cells are different.
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testVerifyRepJob() throws Exception {
-    // Populate the tables, at the same time it guarantees that the tables are
-    // identical since it does the check
-    testSmallBatch();
-
-    String[] args = new String[] {PEER_ID, tableName.getNameAsString()};
-    runVerifyReplication(args, NB_ROWS_IN_BATCH, 0);
-
-    Scan scan = new Scan();
-    ResultScanner rs = htable2.getScanner(scan);
-    Put put = null;
-    for (Result result : rs) {
-      put = new Put(result.getRow());
-      Cell firstVal = result.rawCells()[0];
-      put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
-          Bytes.toBytes("diff data"));
-      htable2.put(put);
-    }
-    Delete delete = new Delete(put.getRow());
-    htable2.delete(delete);
-    runVerifyReplication(args, 0, NB_ROWS_IN_BATCH);
-  }
-
-  /**
-   * Load a row into a table, make sure the data is really the same,
-   * delete the row, make sure the delete marker is replicated,
-   * run verify replication with and without raw to check the results.
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testVerifyRepJobWithRawOptions() throws Exception {
-    LOG.info(name.getMethodName());
-
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    byte[] familyname = Bytes.toBytes("fam_raw");
-    byte[] row = Bytes.toBytes("row_raw");
-
-    Table lHtable1 = null;
-    Table lHtable2 = null;
-
-    try {
-      HTableDescriptor table = new HTableDescriptor(tableName);
-      HColumnDescriptor fam = new HColumnDescriptor(familyname);
-      fam.setMaxVersions(100);
-      fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
-      table.addFamily(fam);
-      scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-      for (HColumnDescriptor f : table.getColumnFamilies()) {
-        scopes.put(f.getName(), f.getScope());
-      }
-
-      Connection connection1 = ConnectionFactory.createConnection(conf1);
-      Connection connection2 = ConnectionFactory.createConnection(conf2);
-      try (Admin admin1 = connection1.getAdmin()) {
-        admin1.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
-      }
-      try (Admin admin2 = connection2.getAdmin()) {
-        admin2.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
-      }
-      utility1.waitUntilAllRegionsAssigned(tableName);
-      utility2.waitUntilAllRegionsAssigned(tableName);
-
-      lHtable1 = utility1.getConnection().getTable(tableName);
-      lHtable2 = utility2.getConnection().getTable(tableName);
-
-      Put put = new Put(row);
-      put.addColumn(familyname, row, row);
-      lHtable1.put(put);
-
-      Get get = new Get(row);
-      for (int i = 0; i < NB_RETRIES; i++) {
-        if (i==NB_RETRIES-1) {
-          fail("Waited too much time for put replication");
-        }
-        Result res = lHtable2.get(get);
-        if (res.isEmpty()) {
-          LOG.info("Row not available");
-          Thread.sleep(SLEEP_TIME);
-        } else {
-          assertArrayEquals(res.value(), row);
-          break;
-        }
-      }
-
-      Delete del = new Delete(row);
-      lHtable1.delete(del);
-
-      get = new Get(row);
-      for (int i = 0; i < NB_RETRIES; i++) {
-        if (i==NB_RETRIES-1) {
-          fail("Waited too much time for del replication");
-        }
-        Result res = lHtable2.get(get);
-        if (res.size() >= 1) {
-          LOG.info("Row not deleted");
-          Thread.sleep(SLEEP_TIME);
-        } else {
-          break;
-        }
-      }
-
-      // Checking verifyReplication for the default behavior.
-      String[] argsWithoutRaw = new String[] {PEER_ID, tableName.getNameAsString()};
-      runVerifyReplication(argsWithoutRaw, 0, 0);
-
-      // Checking verifyReplication with raw
-      String[] argsWithRawAsTrue = new String[] {"--raw", PEER_ID, tableName.getNameAsString()};
-      runVerifyReplication(argsWithRawAsTrue, 1, 0);
-    } finally {
-      if (lHtable1 != null) {
-        lHtable1.close();
-      }
-      if (lHtable2 != null) {
-        lHtable2.close();
-      }
-    }
-  }
-
-  private void runVerifyReplication(String[] args, int expectedGoodRows, int expectedBadRows)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    Job job = new VerifyReplication().createSubmittableJob(new Configuration(conf1), args);
-    if (job == null) {
-      fail("Job wasn't created, see the log");
-    }
-    if (!job.waitForCompletion(true)) {
-      fail("Job failed, see the log");
-    }
-    assertEquals(expectedGoodRows, job.getCounters().
-        findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
-    assertEquals(expectedBadRows, job.getCounters().
-        findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
-  }
-
-  @Test(timeout=300000)
-  // VerifyReplication should honor versions option
-  public void testHBase14905() throws Exception {
-    // normal Batch tests
-    byte[] qualifierName = Bytes.toBytes("f1");
-    Put put = new Put(Bytes.toBytes("r1"));
-    put.addColumn(famName, qualifierName, Bytes.toBytes("v1002"));
-    htable1.put(put);
-    put.addColumn(famName, qualifierName, Bytes.toBytes("v1001"));
-    htable1.put(put);
-    put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
-    htable1.put(put);
-
-    Scan scan = new Scan();
-    scan.setMaxVersions(100);
-    ResultScanner scanner1 = htable1.getScanner(scan);
-    Result[] res1 = scanner1.next(1);
-    scanner1.close();
-
-    assertEquals(1, res1.length);
-    assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-
-    for (int i = 0; i < NB_RETRIES; i++) {
-      scan = new Scan();
-      scan.setMaxVersions(100);
-      scanner1 = htable2.getScanner(scan);
-      res1 = scanner1.next(1);
-      scanner1.close();
-      if (res1.length != 1) {
-        LOG.info("Only got " + res1.length + " rows");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
-        if (cellNumber != 3) {
-          LOG.info("Only got " + cellNumber + " cells");
-          Thread.sleep(SLEEP_TIME);
-        } else {
-          break;
-        }
-      }
-      if (i == NB_RETRIES-1) {
-        fail("Waited too much time for normal batch replication");
-      }
-    }
-
-    put.addColumn(famName, qualifierName, Bytes.toBytes("v1111"));
-    htable2.put(put);
-    put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
-    htable2.put(put);
-
-    scan = new Scan();
-    scan.setMaxVersions(100);
-    scanner1 = htable2.getScanner(scan);
-    res1 = scanner1.next(NB_ROWS_IN_BATCH);
-    scanner1.close();
-
-    assertEquals(1, res1.length);
-    assertEquals(5, res1[0].getColumnCells(famName, qualifierName).size());
-
-    String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
-    runVerifyReplication(args, 0, 1);
-  }
-
-  @Test(timeout=300000)
-  // VerifyReplication should honor versions option
-  public void testVersionMismatchHBase14905() throws Exception {
-    // normal Batch tests
-    byte[] qualifierName = Bytes.toBytes("f1");
-    Put put = new Put(Bytes.toBytes("r1"));
-    long ts = System.currentTimeMillis();
-    put.addColumn(famName, qualifierName, ts + 1, Bytes.toBytes("v1"));
-    htable1.put(put);
-    put.addColumn(famName, qualifierName, ts + 2, Bytes.toBytes("v2"));
-    htable1.put(put);
-    put.addColumn(famName, qualifierName, ts + 3, Bytes.toBytes("v3"));
-    htable1.put(put);
-       
-    Scan scan = new Scan();
-    scan.setMaxVersions(100);
-    ResultScanner scanner1 = htable1.getScanner(scan);
-    Result[] res1 = scanner1.next(1);
-    scanner1.close();
-
-    assertEquals(1, res1.length);
-    assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-    
-    for (int i = 0; i < NB_RETRIES; i++) {
-      scan = new Scan();
-      scan.setMaxVersions(100);
-      scanner1 = htable2.getScanner(scan);
-      res1 = scanner1.next(1);
-      scanner1.close();
-      if (res1.length != 1) {
-        LOG.info("Only got " + res1.length + " rows");
-        Thread.sleep(SLEEP_TIME);
-      } else {
-        int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
-        if (cellNumber != 3) {
-          LOG.info("Only got " + cellNumber + " cells");
-          Thread.sleep(SLEEP_TIME);
-        } else {
-          break;
-        }
-      }
-      if (i == NB_RETRIES-1) {
-        fail("Waited too much time for normal batch replication");
-      }
-    }
-   
-    try {
-      // Disabling replication and modifying the particular version of the cell to validate the feature.  
-      admin.disablePeer(PEER_ID);
-      Put put2 = new Put(Bytes.toBytes("r1"));
-      put2.addColumn(famName, qualifierName, ts +2, Bytes.toBytes("v99"));
-      htable2.put(put2);
-      
-      scan = new Scan();
-      scan.setMaxVersions(100);
-      scanner1 = htable2.getScanner(scan);
-      res1 = scanner1.next(NB_ROWS_IN_BATCH);
-      scanner1.close();
-      assertEquals(1, res1.length);
-      assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-    
-      String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
-      runVerifyReplication(args, 0, 1);
-      }
-    finally {
-      admin.enablePeer(PEER_ID);
-    }
-  }
-
-  /**
-   * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
-   * the compaction WALEdit
-   * @throws Exception
-   */
-  @Test(timeout=300000)
-  public void testCompactionWALEdits() throws Exception {
-    WALProtos.CompactionDescriptor compactionDescriptor =
-        WALProtos.CompactionDescriptor.getDefaultInstance();
-    HRegionInfo hri = new HRegionInfo(htable1.getName(),
-      HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
-    WALEdit edit = WALEdit.createCompaction(hri, compactionDescriptor);
-    Replication.scopeWALEdits(new WALKey(), edit,
-      htable1.getConfiguration(), null);
-  }
-
-  /**
-   * Test for HBASE-8663
-   * Create two new Tables with colfamilies enabled for replication then run
-   * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
-   * TestReplicationAdmin is a better place for this testing but it would need mocks.
-   * @throws Exception
-   */
-  @Test(timeout = 300000)
-  public void testVerifyListReplicatedTable() throws Exception {
-    LOG.info("testVerifyListReplicatedTable");
-
-    final String tName = "VerifyListReplicated_";
-    final String colFam = "cf1";
-    final int numOfTables = 3;
-
-    Admin hadmin = utility1.getAdmin();
-
-    // Create Tables
-    for (int i = 0; i < numOfTables; i++) {
-      HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
-      HColumnDescriptor cfd = new HColumnDescriptor(colFam);
-      cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
-      ht.addFamily(cfd);
-      hadmin.createTable(ht);
-    }
-
-    // verify the result
-    List<HashMap<String, String>> replicationColFams = admin.listReplicated();
-    int[] match = new int[numOfTables]; // array of 3 with init value of zero
-
-    for (int i = 0; i < replicationColFams.size(); i++) {
-      HashMap<String, String> replicationEntry = replicationColFams.get(i);
-      String tn = replicationEntry.get(ReplicationAdmin.TNAME);
-      if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
-        int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
-        match[m]++; // should only increase once
-      }
-    }
-
-    // check the matching result
-    for (int i = 0; i < match.length; i++) {
-      assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
-    }
-
-    // drop tables
-    for (int i = 0; i < numOfTables; i++) {
-      TableName tableName = TableName.valueOf(tName + i);
-      hadmin.disableTable(tableName);
-      hadmin.deleteTable(tableName);
-    }
-
-    hadmin.close();
-  }
-
-  /**
-   *  Test for HBase-15259 WALEdits under replay will also be replicated
-   * */
-  @Test
-  public void testReplicationInReplay() throws Exception {
-    final TableName tableName = htable1.getName();
-
-    HRegion region = utility1.getMiniHBaseCluster().getRegions(tableName).get(0);
-    HRegionInfo hri = region.getRegionInfo();
-    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (byte[] fam : htable1.getTableDescriptor().getFamiliesKeys()) {
-      scopes.put(fam, 1);
-    }
-    final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
-    int index = utility1.getMiniHBaseCluster().getServerWith(hri.getRegionName());
-    WAL wal = utility1.getMiniHBaseCluster().getRegionServer(index).getWAL(region.getRegionInfo());
-    final byte[] rowName = Bytes.toBytes("testReplicationInReplay");
-    final byte[] qualifier = Bytes.toBytes("q");
-    final byte[] value = Bytes.toBytes("v");
-    WALEdit edit = new WALEdit(true);
-    long now = EnvironmentEdgeManager.currentTime();
-    edit.add(new KeyValue(rowName, famName, qualifier,
-      now, value));
-    WALKey walKey = new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes);
-    wal.append(hri, walKey, edit, true);
-    wal.sync();
-
-    Get get = new Get(rowName);
-    for (int i = 0; i < NB_RETRIES; i++) {
-      if (i == NB_RETRIES-1) {
-        break;
-      }
-      Result res = htable2.get(get);
-      if (res.size() >= 1) {
-        fail("Not supposed to be replicated for " + Bytes.toString(res.getRow()));
-      } else {
-        LOG.info("Row not replicated, let's wait a bit more...");
-        Thread.sleep(SLEEP_TIME);
-      }
-    }
-  }
-
-  @Test(timeout=300000)
-  public void testVerifyReplicationPrefixFiltering() throws Exception {
-    final byte[] prefixRow = Bytes.toBytes("prefixrow");
-    final byte[] prefixRow2 = Bytes.toBytes("secondrow");
-    loadData("prefixrow", prefixRow);
-    loadData("secondrow", prefixRow2);
-    loadData("aaa", row);
-    loadData("zzz", row);
-    waitForReplication(NB_ROWS_IN_BATCH * 4, NB_RETRIES * 4);
-    String[] args = new String[] {"--row-prefixes=prefixrow,secondrow", PEER_ID,
-        tableName.getNameAsString()};
-    runVerifyReplication(args, NB_ROWS_IN_BATCH *2, 0);
-  }
-
-  @Test(timeout = 300000)
-  public void testVerifyReplicationSnapshotArguments() {
-    String[] args =
-        new String[] { "--sourceSnapshotName=snapshot1", "2", tableName.getNameAsString() };
-    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--sourceSnapshotTmpDir=tmp", "2", tableName.getNameAsString() };
-    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=tmp", "2",
-        tableName.getNameAsString() };
-    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--peerSnapshotName=snapshot1", "2", tableName.getNameAsString() };
-    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--peerSnapshotTmpDir=/tmp/", "2", tableName.getNameAsString() };
-    assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--peerSnapshotName=snapshot1", "--peerSnapshotTmpDir=/tmp/",
-        "--peerFSAddress=tempfs", "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2",
-        tableName.getNameAsString() };
-    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
-    args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=/tmp/",
-        "--peerSnapshotName=snapshot2", "--peerSnapshotTmpDir=/tmp/", "--peerFSAddress=tempfs",
-        "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2", tableName.getNameAsString() };
-
-    assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-  }
-
-  @Test(timeout = 300000)
-  public void testVerifyReplicationWithSnapshotSupport() throws Exception {
-    // Populate the tables, at the same time it guarantees that the tables are
-    // identical since it does the check
-    testSmallBatch();
-
-    // Take source and target tables snapshot
-    Path rootDir = FSUtils.getRootDir(conf1);
-    FileSystem fs = rootDir.getFileSystem(conf1);
-    String sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
-    SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
-      new String(famName), sourceSnapshotName, rootDir, fs, true);
-
-    // Take target snapshot
-    Path peerRootDir = FSUtils.getRootDir(conf2);
-    FileSystem peerFs = peerRootDir.getFileSystem(conf2);
-    String peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
-    SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
-      new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
-
-    String peerFSAddress = peerFs.getUri().toString();
-    String temPath1 = utility1.getRandomDir().toString();
-    String temPath2 = "/tmp2";
-
-    String[] args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
-        "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
-        "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
-        "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
-
-    Job job = new VerifyReplication().createSubmittableJob(conf1, args);
-    if (job == null) {
-      fail("Job wasn't created, see the log");
-    }
-    if (!job.waitForCompletion(true)) {
-      fail("Job failed, see the log");
-    }
-    assertEquals(NB_ROWS_IN_BATCH,
-      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
-    assertEquals(0,
-      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
-
-    Scan scan = new Scan();
-    ResultScanner rs = htable2.getScanner(scan);
-    Put put = null;
-    for (Result result : rs) {
-      put = new Put(result.getRow());
-      Cell firstVal = result.rawCells()[0];
-      put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
-        Bytes.toBytes("diff data"));
-      htable2.put(put);
-    }
-    Delete delete = new Delete(put.getRow());
-    htable2.delete(delete);
-
-    sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
-    SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
-      new String(famName), sourceSnapshotName, rootDir, fs, true);
-
-    peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
-    SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
-      new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
-
-    args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
-        "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
-        "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
-        "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
-
-    job = new VerifyReplication().createSubmittableJob(conf1, args);
-    if (job == null) {
-      fail("Job wasn't created, see the log");
-    }
-    if (!job.waitForCompletion(true)) {
-      fail("Job failed, see the log");
-    }
-    assertEquals(0,
-      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
-    assertEquals(NB_ROWS_IN_BATCH,
-      job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
-  }
-
-  @Test
-  public void testEmptyWALRecovery() throws Exception {
-    final int numRs = utility1.getHBaseCluster().getRegionServerThreads().size();
-
-    // for each RS, create an empty wal with same walGroupId
-    final List<Path> emptyWalPaths = new ArrayList<>();
-    long ts = System.currentTimeMillis();
-    for (int i = 0; i < numRs; i++) {
-      HRegionInfo regionInfo =
-          utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
-      WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
-      Path currentWalPath = AbstractFSWALProvider.getCurrentFileName(wal);
-      String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(currentWalPath.getName());
-      Path emptyWalPath = new Path(utility1.getDataTestDir(), walGroupId + "." + ts);
-      utility1.getTestFileSystem().create(emptyWalPath).close();
-      emptyWalPaths.add(emptyWalPath);
-    }
-
-    // inject our empty wal into the replication queue
-    for (int i = 0; i < numRs; i++) {
-      Replication replicationService =
-          (Replication) utility1.getHBaseCluster().getRegionServer(i).getReplicationSourceService();
-      replicationService.preLogRoll(null, emptyWalPaths.get(i));
-      replicationService.postLogRoll(null, emptyWalPaths.get(i));
-    }
-
-    // wait for ReplicationSource to start reading from our empty wal
-    waitForLogAdvance(numRs, emptyWalPaths, false);
-
-    // roll the original wal, which enqueues a new wal behind our empty wal
-    for (int i = 0; i < numRs; i++) {
-      HRegionInfo regionInfo =
-          utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
-      WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
-      wal.rollWriter(true);
-    }
-
-    // ReplicationSource should advance past the empty wal, or else the test will fail
-    waitForLogAdvance(numRs, emptyWalPaths, true);
-
-    // we're now writing to the new wal
-    // if everything works, the source should've stopped reading from the empty wal, and start
-    // replicating from the new wal
-    testSimplePutDelete();
-  }
-
-  /**
-   * Waits for the ReplicationSource to start reading from the given paths
-   * @param numRs number of regionservers
-   * @param emptyWalPaths path for each regionserver
-   * @param invert if true, waits until ReplicationSource is NOT reading from the given paths
-   */
-  private void waitForLogAdvance(final int numRs, final List<Path> emptyWalPaths,
-      final boolean invert) throws Exception {
-    Waiter.waitFor(conf1, 10000, new Waiter.Predicate<Exception>() {
-      @Override
-      public boolean evaluate() throws Exception {
-        for (int i = 0; i < numRs; i++) {
-          Replication replicationService = (Replication) utility1.getHBaseCluster()
-              .getRegionServer(i).getReplicationSourceService();
-          for (ReplicationSourceInterface rsi : replicationService.getReplicationManager()
-              .getSources()) {
-            ReplicationSource source = (ReplicationSource) rsi;
-            if (!invert && !emptyWalPaths.get(i).equals(source.getCurrentPath())) {
-              return false;
-            }
-            if (invert && emptyWalPaths.get(i).equals(source.getCurrentPath())) {
-              return false;
-            }
-          }
-        }
-        return true;
-      }
-    });
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
index 07bb2b7..94991e1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
@@ -17,15 +17,22 @@
  */
 package org.apache.hadoop.hbase.security;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
-
 import org.apache.hadoop.hbase.shaded.com.google.common.base.Strings;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import java.io.IOException;
+import java.net.InetAddress;
 
 @InterfaceAudience.Private
 public class HBaseKerberosUtils {
+  private static final Log LOG = LogFactory.getLog(HBaseKerberosUtils.class);
+
   public static final String KRB_PRINCIPAL = "hbase.regionserver.kerberos.principal";
   public static final String MASTER_KRB_PRINCIPAL = "hbase.master.kerberos.principal";
   public static final String KRB_KEYTAB_FILE = "hbase.regionserver.keytab.file";
@@ -81,4 +88,21 @@ public class HBaseKerberosUtils {
     conf.set(KRB_PRINCIPAL, System.getProperty(KRB_PRINCIPAL));
     conf.set(MASTER_KRB_PRINCIPAL, System.getProperty(KRB_PRINCIPAL));
   }
+
+  public static UserGroupInformation loginAndReturnUGI(Configuration conf, String username)
+      throws IOException {
+    String hostname = InetAddress.getLocalHost().getHostName();
+    String keyTabFileConfKey = "hbase." + username + ".keytab.file";
+    String keyTabFileLocation = conf.get(keyTabFileConfKey);
+    String principalConfKey = "hbase." + username + ".kerberos.principal";
+    String principal = org.apache.hadoop.security.SecurityUtil
+        .getServerPrincipal(conf.get(principalConfKey), hostname);
+    if (keyTabFileLocation == null || principal == null) {
+      LOG.warn("Principal or key tab file null for : " + principalConfKey + ", "
+          + keyTabFileConfKey);
+    }
+    UserGroupInformation ugi =
+        UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTabFileLocation);
+    return ugi;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
deleted file mode 100644
index 2e3cb5e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++ /dev/null
@@ -1,381 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import static org.apache.hadoop.util.ToolRunner.run;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-/**
- * Test Export Snapshot Tool
- */
-@Ignore
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestExportSnapshot {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private static final Log LOG = LogFactory.getLog(TestExportSnapshot.class);
-
-  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
-  protected final static byte[] FAMILY = Bytes.toBytes("cf");
-
-  @Rule
-  public final TestName testName = new TestName();
-
-  protected TableName tableName;
-  private byte[] emptySnapshotName;
-  private byte[] snapshotName;
-  private int tableNumFiles;
-  private Admin admin;
-
-  public static void setUpBaseConf(Configuration conf) {
-    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
-    conf.setInt("hbase.regionserver.msginterval", 100);
-    conf.setInt("hbase.client.pause", 250);
-    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
-    conf.setBoolean("hbase.master.enabletable.roundrobin", true);
-    conf.setInt("mapreduce.map.maxattempts", 10);
-    // If a single node has enough failures (default 3), resource manager will blacklist it.
-    // With only 2 nodes and tests injecting faults, we don't want that.
-    conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
-  }
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    setUpBaseConf(TEST_UTIL.getConfiguration());
-    TEST_UTIL.startMiniCluster(1, 3);
-    TEST_UTIL.startMiniMapReduceCluster();
-  }
-
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniMapReduceCluster();
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Create a table and take a snapshot of the table used by the export test.
-   */
-  @Before
-  public void setUp() throws Exception {
-    this.admin = TEST_UTIL.getAdmin();
-
-    tableName = TableName.valueOf("testtb-" + testName.getMethodName());
-    snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
-    emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
-
-    // create Table
-    createTable();
-
-    // Take an empty snapshot
-    admin.snapshot(emptySnapshotName, tableName);
-
-    // Add some rows
-    SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 50, FAMILY);
-    tableNumFiles = admin.getTableRegions(tableName).size();
-
-    // take a snapshot
-    admin.snapshot(snapshotName, tableName);
-  }
-
-  protected void createTable() throws Exception {
-    SnapshotTestingUtils.createPreSplitTable(TEST_UTIL, tableName, 2, FAMILY);
-  }
-
-  protected interface RegionPredicate {
-    boolean evaluate(final HRegionInfo regionInfo);
-  }
-
-  protected RegionPredicate getBypassRegionPredicate() {
-    return null;
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    TEST_UTIL.deleteTable(tableName);
-    SnapshotTestingUtils.deleteAllSnapshots(TEST_UTIL.getAdmin());
-    SnapshotTestingUtils.deleteArchiveDirectory(TEST_UTIL);
-  }
-
-  /**
-   * Verify if exported snapshot and copied files matches the original one.
-   */
-  @Test
-  public void testExportFileSystemState() throws Exception {
-    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
-  }
-
-  @Test
-  public void testExportFileSystemStateWithSkipTmp() throws Exception {
-    TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);
-    try {
-      testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
-    } finally {
-      TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, false);
-    }
-  }
-
-  @Test
-  public void testEmptyExportFileSystemState() throws Exception {
-    testExportFileSystemState(tableName, emptySnapshotName, emptySnapshotName, 0);
-  }
-
-  @Test
-  public void testConsecutiveExports() throws Exception {
-    Path copyDir = getLocalDestinationDir();
-    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, false);
-    testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, true);
-    removeExportDir(copyDir);
-  }
-
-  @Test
-  public void testExportWithTargetName() throws Exception {
-    final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
-    testExportFileSystemState(tableName, snapshotName, targetName, tableNumFiles);
-  }
-
-  private void testExportFileSystemState(final TableName tableName, final byte[] snapshotName,
-      final byte[] targetName, int filesExpected) throws Exception {
-    testExportFileSystemState(tableName, snapshotName, targetName,
-      filesExpected, getHdfsDestinationDir(), false);
-  }
-
-  protected void testExportFileSystemState(final TableName tableName,
-      final byte[] snapshotName, final byte[] targetName, int filesExpected,
-      Path copyDir, boolean overwrite) throws Exception {
-    testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
-      filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
-      overwrite, getBypassRegionPredicate(), true);
-  }
-
-  /**
-   * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
-   */
-  protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
-      final byte[] snapshotName, final byte[] targetName, final int filesExpected,
-      final Path sourceDir, Path copyDir, final boolean overwrite,
-      final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
-    URI hdfsUri = FileSystem.get(conf).getUri();
-    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
-    copyDir = copyDir.makeQualified(fs);
-
-    List<String> opts = new ArrayList<>();
-    opts.add("--snapshot");
-    opts.add(Bytes.toString(snapshotName));
-    opts.add("--copy-to");
-    opts.add(copyDir.toString());
-    if (targetName != snapshotName) {
-      opts.add("--target");
-      opts.add(Bytes.toString(targetName));
-    }
-    if (overwrite) opts.add("--overwrite");
-
-    // Export Snapshot
-    int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
-    assertEquals(success ? 0 : 1, res);
-    if (!success) {
-      final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
-      assertFalse(fs.exists(new Path(copyDir, targetDir)));
-      return;
-    }
-
-    // Verify File-System state
-    FileStatus[] rootFiles = fs.listStatus(copyDir);
-    assertEquals(filesExpected > 0 ? 2 : 1, rootFiles.length);
-    for (FileStatus fileStatus: rootFiles) {
-      String name = fileStatus.getPath().getName();
-      assertTrue(fileStatus.isDirectory());
-      assertTrue(name.equals(HConstants.SNAPSHOT_DIR_NAME) ||
-                 name.equals(HConstants.HFILE_ARCHIVE_DIRECTORY));
-    }
-
-    // compare the snapshot metadata and verify the hfiles
-    final FileSystem hdfs = FileSystem.get(hdfsUri, conf);
-    final Path snapshotDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(snapshotName));
-    final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
-    verifySnapshotDir(hdfs, new Path(sourceDir, snapshotDir),
-        fs, new Path(copyDir, targetDir));
-    Set<String> snapshotFiles = verifySnapshot(conf, fs, copyDir, tableName,
-      Bytes.toString(targetName), bypassregionPredicate);
-    assertEquals(filesExpected, snapshotFiles.size());
-  }
-
-  /**
-   * Check that ExportSnapshot will succeed if something fails but the retry succeed.
-   */
-  @Test
-  public void testExportRetry() throws Exception {
-    Path copyDir = getLocalDestinationDir();
-    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
-    copyDir = copyDir.makeQualified(fs);
-    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
-    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
-    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
-    conf.setInt("mapreduce.map.maxattempts", 3);
-    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
-        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
-  }
-
-  /**
-   * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
-   */
-  @Test
-  public void testExportFailure() throws Exception {
-    Path copyDir = getLocalDestinationDir();
-    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
-    copyDir = copyDir.makeQualified(fs);
-    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
-    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
-    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
-    conf.setInt("mapreduce.map.maxattempts", 3);
-    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
-        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
-  }
-
-  /*
-   * verify if the snapshot folder on file-system 1 match the one on file-system 2
-   */
-  protected static void verifySnapshotDir(final FileSystem fs1, final Path root1,
-      final FileSystem fs2, final Path root2) throws IOException {
-    assertEquals(listFiles(fs1, root1, root1), listFiles(fs2, root2, root2));
-  }
-
-  protected Set<String> verifySnapshot(final FileSystem fs, final Path rootDir,
-      final TableName tableName, final String snapshotName) throws IOException {
-    return verifySnapshot(TEST_UTIL.getConfiguration(), fs, rootDir, tableName,
-      snapshotName, getBypassRegionPredicate());
-  }
-
-  /*
-   * Verify if the files exists
-   */
-  protected static Set<String> verifySnapshot(final Configuration conf, final FileSystem fs,
-      final Path rootDir, final TableName tableName, final String snapshotName,
-      final RegionPredicate bypassregionPredicate) throws IOException {
-    final Path exportedSnapshot = new Path(rootDir,
-      new Path(HConstants.SNAPSHOT_DIR_NAME, snapshotName));
-    final Set<String> snapshotFiles = new HashSet<>();
-    final Path exportedArchive = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
-    SnapshotReferenceUtil.visitReferencedFiles(conf, fs, exportedSnapshot,
-          new SnapshotReferenceUtil.SnapshotVisitor() {
-        @Override
-        public void storeFile(final HRegionInfo regionInfo, final String family,
-            final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
-          if (bypassregionPredicate != null && bypassregionPredicate.evaluate(regionInfo))
-            return;
-
-          String hfile = storeFile.getName();
-          snapshotFiles.add(hfile);
-          if (storeFile.hasReference()) {
-            // Nothing to do here, we have already the reference embedded
-          } else {
-            verifyNonEmptyFile(new Path(exportedArchive,
-              new Path(FSUtils.getTableDir(new Path("./"), tableName),
-                  new Path(regionInfo.getEncodedName(), new Path(family, hfile)))));
-          }
-        }
-
-        private void verifyNonEmptyFile(final Path path) throws IOException {
-          assertTrue(path + " should exists", fs.exists(path));
-          assertTrue(path + " should not be empty", fs.getFileStatus(path).getLen() > 0);
-        }
-    });
-
-    // Verify Snapshot description
-    SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, exportedSnapshot);
-    assertTrue(desc.getName().equals(snapshotName));
-    assertTrue(desc.getTable().equals(tableName.getNameAsString()));
-    return snapshotFiles;
-  }
-
-  private static Set<String> listFiles(final FileSystem fs, final Path root, final Path dir)
-      throws IOException {
-    Set<String> files = new HashSet<>();
-    int rootPrefix = root.makeQualified(fs).toString().length();
-    FileStatus[] list = FSUtils.listStatus(fs, dir);
-    if (list != null) {
-      for (FileStatus fstat: list) {
-        LOG.debug(fstat.getPath());
-        if (fstat.isDirectory()) {
-          files.addAll(listFiles(fs, root, fstat.getPath()));
-        } else {
-          files.add(fstat.getPath().makeQualified(fs).toString().substring(rootPrefix));
-        }
-      }
-    }
-    return files;
-  }
-
-  private Path getHdfsDestinationDir() {
-    Path rootDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
-    Path path = new Path(new Path(rootDir, "export-test"), "export-" + System.currentTimeMillis());
-    LOG.info("HDFS export destination path: " + path);
-    return path;
-  }
-
-  private Path getLocalDestinationDir() {
-    Path path = TEST_UTIL.getDataTestDir("local-export-" + System.currentTimeMillis());
-    LOG.info("Local export destination path: " + path);
-    return path;
-  }
-
-  private static void removeExportDir(final Path path) throws IOException {
-    FileSystem fs = FileSystem.get(path.toUri(), new Configuration());
-    fs.delete(path, true);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
deleted file mode 100644
index e31e81e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
-import org.apache.hadoop.hbase.testclassification.RegionServerTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Export Snapshot Tool helpers
- */
-@Category({RegionServerTests.class, SmallTests.class})
-public class TestExportSnapshotHelpers {
-  /**
-   * Verfy the result of getBalanceSplits() method.
-   * The result are groups of files, used as input list for the "export" mappers.
-   * All the groups should have similar amount of data.
-   *
-   * The input list is a pair of file path and length.
-   * The getBalanceSplits() function sort it by length,
-   * and assign to each group a file, going back and forth through the groups.
-   */
-  @Test
-  public void testBalanceSplit() throws Exception {
-    // Create a list of files
-    List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>(21);
-    for (long i = 0; i <= 20; i++) {
-      SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
-        .setType(SnapshotFileInfo.Type.HFILE)
-        .setHfile("file-" + i)
-        .build();
-      files.add(new Pair<>(fileInfo, i));
-    }
-
-    // Create 5 groups (total size 210)
-    //    group 0: 20, 11, 10,  1 (total size: 42)
-    //    group 1: 19, 12,  9,  2 (total size: 42)
-    //    group 2: 18, 13,  8,  3 (total size: 42)
-    //    group 3: 17, 12,  7,  4 (total size: 42)
-    //    group 4: 16, 11,  6,  5 (total size: 42)
-    List<List<Pair<SnapshotFileInfo, Long>>> splits = ExportSnapshot.getBalancedSplits(files, 5);
-    assertEquals(5, splits.size());
-
-    String[] split0 = new String[] {"file-20", "file-11", "file-10", "file-1", "file-0"};
-    verifyBalanceSplit(splits.get(0), split0, 42);
-    String[] split1 = new String[] {"file-19", "file-12", "file-9",  "file-2"};
-    verifyBalanceSplit(splits.get(1), split1, 42);
-    String[] split2 = new String[] {"file-18", "file-13", "file-8",  "file-3"};
-    verifyBalanceSplit(splits.get(2), split2, 42);
-    String[] split3 = new String[] {"file-17", "file-14", "file-7",  "file-4"};
-    verifyBalanceSplit(splits.get(3), split3, 42);
-    String[] split4 = new String[] {"file-16", "file-15", "file-6",  "file-5"};
-    verifyBalanceSplit(splits.get(4), split4, 42);
-  }
-
-  private void verifyBalanceSplit(final List<Pair<SnapshotFileInfo, Long>> split,
-      final String[] expected, final long expectedSize) {
-    assertEquals(expected.length, split.size());
-    long totalSize = 0;
-    for (int i = 0; i < expected.length; ++i) {
-      Pair<SnapshotFileInfo, Long> fileInfo = split.get(i);
-      assertEquals(expected[i], fileInfo.getFirst().getHfile());
-      totalSize += fileInfo.getSecond();
-    }
-    assertEquals(expectedSize, totalSize);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
deleted file mode 100644
index 00778502..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Test Export Snapshot Tool
- */
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestExportSnapshotNoCluster {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private static final Log LOG = LogFactory.getLog(TestExportSnapshotNoCluster.class);
-
-  protected final static HBaseCommonTestingUtility TEST_UTIL = new HBaseCommonTestingUtility();
-
-  private static FileSystem fs;
-  private static Path testDir;
-
-  public static void setUpBaseConf(Configuration conf) {
-    conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
-    conf.setInt("hbase.regionserver.msginterval", 100);
-    conf.setInt("hbase.client.pause", 250);
-    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
-    conf.setBoolean("hbase.master.enabletable.roundrobin", true);
-    conf.setInt("mapreduce.map.maxattempts", 10);
-    conf.set(HConstants.HBASE_DIR, testDir.toString());
-  }
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    testDir = TEST_UTIL.getDataTestDir();
-    fs = testDir.getFileSystem(TEST_UTIL.getConfiguration());
-
-    setUpBaseConf(TEST_UTIL.getConfiguration());
-  }
-
-  /**
-   * Mock a snapshot with files in the archive dir,
-   * two regions, and one reference file.
-   */
-  @Test
-  public void testSnapshotWithRefsExportFileSystemState() throws Exception {
-    SnapshotMock snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
-    SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2("tableWithRefsV1",
-      "tableWithRefsV1");
-    testSnapshotWithRefsExportFileSystemState(builder);
-
-    snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
-    builder = snapshotMock.createSnapshotV2("tableWithRefsV2", "tableWithRefsV2");
-    testSnapshotWithRefsExportFileSystemState(builder);
-  }
-
-  /**
-   * Generates a couple of regions for the specified SnapshotMock,
-   * and then it will run the export and verification.
-   */
-  private void testSnapshotWithRefsExportFileSystemState(SnapshotMock.SnapshotBuilder builder)
-      throws Exception {
-    Path[] r1Files = builder.addRegion();
-    Path[] r2Files = builder.addRegion();
-    builder.commit();
-    int snapshotFilesCount = r1Files.length + r2Files.length;
-
-    byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName());
-    TableName tableName = builder.getTableDescriptor().getTableName();
-    TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
-      tableName, snapshotName, snapshotName, snapshotFilesCount,
-      testDir, getDestinationDir(), false, null, true);
-  }
-
-  private Path getDestinationDir() {
-    Path path = new Path(new Path(testDir, "export-test"), "export-" + System.currentTimeMillis());
-    LOG.info("HDFS export destination path: " + path);
-    return path;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
deleted file mode 100644
index 7407a7d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.mob.MobConstants;
-import org.apache.hadoop.hbase.mob.MobUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Export Snapshot Tool
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestMobExportSnapshot extends TestExportSnapshot {
-
-  public static void setUpBaseConf(Configuration conf) {
-    TestExportSnapshot.setUpBaseConf(conf);
-    conf.setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0);
-  }
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    setUpBaseConf(TEST_UTIL.getConfiguration());
-    TEST_UTIL.startMiniCluster(1, 3);
-    TEST_UTIL.startMiniMapReduceCluster();
-  }
-
-  @Override
-  protected void createTable() throws Exception {
-    MobSnapshotTestingUtils.createPreSplitMobTable(TEST_UTIL, tableName, 2, FAMILY);
-  }
-
-  @Override
-  protected RegionPredicate getBypassRegionPredicate() {
-    return new RegionPredicate() {
-      @Override
-      public boolean evaluate(final HRegionInfo regionInfo) {
-        return MobUtils.isMobRegionInfo(regionInfo);
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
deleted file mode 100644
index 98d03c0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-
-/**
- * Reruns TestMobExportSnapshot using MobExportSnapshot in secure mode.
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    setUpBaseConf(TEST_UTIL.getConfiguration());
-    // Setup separate test-data directory for MR cluster and set corresponding configurations.
-    // Otherwise, different test classes running MR cluster can step on each other.
-    TEST_UTIL.getDataTestDir();
-
-    // set the always on security provider
-    UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
-      HadoopSecurityEnabledUserProviderForTesting.class);
-
-    // setup configuration
-    SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
-
-    TEST_UTIL.startMiniCluster(1, 3);
-    TEST_UTIL.startMiniMapReduceCluster();
-
-    // Wait for the ACL table to become available
-    TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
deleted file mode 100644
index 7d4832c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Reruns TestExportSnapshot using ExportSnapshot in secure mode.
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestSecureExportSnapshot extends TestExportSnapshot {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    setUpBaseConf(TEST_UTIL.getConfiguration());
-    // Setup separate test-data directory for MR cluster and set corresponding configurations.
-    // Otherwise, different test classes running MR cluster can step on each other.
-    TEST_UTIL.getDataTestDir();
-
-    // set the always on security provider
-    UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
-      HadoopSecurityEnabledUserProviderForTesting.class);
-
-    // setup configuration
-    SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
-
-    TEST_UTIL.startMiniCluster(1, 3);
-    TEST_UTIL.startMiniMapReduceCluster();
-
-    // Wait for the ACL table to become available
-    TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
index 236994a..0487bf4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
@@ -22,6 +22,8 @@ import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Locale;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -29,6 +31,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ArrayBackedTag;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.TagType;
@@ -50,6 +53,17 @@ import org.apache.hadoop.hbase.regionserver.StoreFile;
  */
 public class HFileTestUtil {
 
+  public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
+    "Encoding algorithm (e.g. prefix "
+        + "compression) to use for data blocks in the test column family, "
+        + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
+  public static final String OPT_DATA_BLOCK_ENCODING =
+      HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase(Locale.ROOT);
+  /** Column family used by the test */
+  public static byte[] DEFAULT_COLUMN_FAMILY = Bytes.toBytes("test_cf");
+  /** Column families used by the test */
+  public static final byte[][] DEFAULT_COLUMN_FAMILIES = { DEFAULT_COLUMN_FAMILY };
+
   /**
    * Create an HFile with the given number of rows between a given
    * start key and end key @ family:qualifier.  The value will be the key value.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
index 2ea01bb..0b3c612 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.ArrayBackedTag;
 import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.util.MultiThreadedAction.DefaultDataGenerator;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 
 @InterfaceAudience.Private
 public class LoadTestDataGeneratorWithTags extends DefaultDataGenerator {
@@ -74,7 +75,7 @@ public class LoadTestDataGeneratorWithTags extends DefaultDataGenerator {
       List<Tag> tags;
       for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
         Cell cell = cellScanner.current();
-        byte[] tag = LoadTestTool.generateData(random,
+        byte[] tag = LoadTestDataGenerator.generateData(random,
             minTagLength + random.nextInt(maxTagLength - minTagLength));
         tags = new ArrayList<>();
         for (int n = 0; n < numTags; n++) {

[38/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
new file mode 100644
index 0000000..3c3060b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
@@ -0,0 +1,140 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * This is used to partition the output keys into groups of keys.
+ * Keys are grouped according to the regions that currently exist
+ * so that each reducer fills a single region so load is distributed.
+ *
+ * <p>This class is not suitable as partitioner creating hfiles
+ * for incremental bulk loads as region spread will likely change between time of
+ * hfile creation and load time. See {@link LoadIncrementalHFiles}
+ * and <a href="http://hbase.apache.org/book.html#arch.bulk.load">Bulk Load</a>.</p>
+ *
+ * @param <KEY>  The type of the key.
+ * @param <VALUE>  The type of the value.
+ */
+@InterfaceAudience.Public
+public class HRegionPartitioner<KEY, VALUE>
+extends Partitioner<ImmutableBytesWritable, VALUE>
+implements Configurable {
+
+  private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
+  private Configuration conf = null;
+  // Connection and locator are not cleaned up; they just die when partitioner is done.
+  private Connection connection;
+  private RegionLocator locator;
+  private byte[][] startKeys;
+
+  /**
+   * Gets the partition number for a given key (hence record) given the total
+   * number of partitions i.e. number of reduce-tasks for the job.
+   *
+   * <p>Typically a hash function on a all or a subset of the key.</p>
+   *
+   * @param key  The key to be partitioned.
+   * @param value  The entry value.
+   * @param numPartitions  The total number of partitions.
+   * @return The partition number for the <code>key</code>.
+   * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
+   *   java.lang.Object, java.lang.Object, int)
+   */
+  @Override
+  public int getPartition(ImmutableBytesWritable key,
+      VALUE value, int numPartitions) {
+    byte[] region = null;
+    // Only one region return 0
+    if (this.startKeys.length == 1){
+      return 0;
+    }
+    try {
+      // Not sure if this is cached after a split so we could have problems
+      // here if a region splits while mapping
+      region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+    for (int i = 0; i < this.startKeys.length; i++){
+      if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
+        if (i >= numPartitions-1){
+          // cover if we have less reduces then regions.
+          return (Integer.toString(i).hashCode()
+              & Integer.MAX_VALUE) % numPartitions;
+        }
+        return i;
+      }
+    }
+    // if above fails to find start key that match we need to return something
+    return 0;
+  }
+
+  /**
+   * Returns the current configuration.
+   *
+   * @return The current configuration.
+   * @see org.apache.hadoop.conf.Configurable#getConf()
+   */
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /**
+   * Sets the configuration. This is used to determine the start keys for the
+   * given table.
+   *
+   * @param configuration  The configuration to set.
+   * @see org.apache.hadoop.conf.Configurable#setConf(
+   *   org.apache.hadoop.conf.Configuration)
+   */
+  @Override
+  public void setConf(Configuration configuration) {
+    this.conf = HBaseConfiguration.create(configuration);
+    try {
+      this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(conf));
+      TableName tableName = TableName.valueOf(conf.get(TableOutputFormat.OUTPUT_TABLE));
+      this.locator = this.connection.getRegionLocator(tableName);
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+    try {
+      this.startKeys = this.locator.getStartKeys();
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
new file mode 100644
index 0000000..2c8caf5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
@@ -0,0 +1,747 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Charsets;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Ordering;
+
+public class HashTable extends Configured implements Tool {
+
+  private static final Log LOG = LogFactory.getLog(HashTable.class);
+
+  private static final int DEFAULT_BATCH_SIZE = 8000;
+
+  private final static String HASH_BATCH_SIZE_CONF_KEY = "hash.batch.size";
+  final static String PARTITIONS_FILE_NAME = "partitions";
+  final static String MANIFEST_FILE_NAME = "manifest";
+  final static String HASH_DATA_DIR = "hashes";
+  final static String OUTPUT_DATA_FILE_PREFIX = "part-r-";
+  private final static String TMP_MANIFEST_FILE_NAME = "manifest.tmp";
+
+  TableHash tableHash = new TableHash();
+  Path destPath;
+
+  public HashTable(Configuration conf) {
+    super(conf);
+  }
+
+  public static class TableHash {
+
+    Path hashDir;
+
+    String tableName;
+    String families = null;
+    long batchSize = DEFAULT_BATCH_SIZE;
+    int numHashFiles = 0;
+    byte[] startRow = HConstants.EMPTY_START_ROW;
+    byte[] stopRow = HConstants.EMPTY_END_ROW;
+    int scanBatch = 0;
+    int versions = -1;
+    long startTime = 0;
+    long endTime = 0;
+
+    List<ImmutableBytesWritable> partitions;
+
+    public static TableHash read(Configuration conf, Path hashDir) throws IOException {
+      TableHash tableHash = new TableHash();
+      FileSystem fs = hashDir.getFileSystem(conf);
+      tableHash.hashDir = hashDir;
+      tableHash.readPropertiesFile(fs, new Path(hashDir, MANIFEST_FILE_NAME));
+      tableHash.readPartitionFile(fs, conf, new Path(hashDir, PARTITIONS_FILE_NAME));
+      return tableHash;
+    }
+
+    void writePropertiesFile(FileSystem fs, Path path) throws IOException {
+      Properties p = new Properties();
+      p.setProperty("table", tableName);
+      if (families != null) {
+        p.setProperty("columnFamilies", families);
+      }
+      p.setProperty("targetBatchSize", Long.toString(batchSize));
+      p.setProperty("numHashFiles", Integer.toString(numHashFiles));
+      if (!isTableStartRow(startRow)) {
+        p.setProperty("startRowHex", Bytes.toHex(startRow));
+      }
+      if (!isTableEndRow(stopRow)) {
+        p.setProperty("stopRowHex", Bytes.toHex(stopRow));
+      }
+      if (scanBatch > 0) {
+        p.setProperty("scanBatch", Integer.toString(scanBatch));
+      }
+      if (versions >= 0) {
+        p.setProperty("versions", Integer.toString(versions));
+      }
+      if (startTime != 0) {
+        p.setProperty("startTimestamp", Long.toString(startTime));
+      }
+      if (endTime != 0) {
+        p.setProperty("endTimestamp", Long.toString(endTime));
+      }
+
+      try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) {
+        p.store(osw, null);
+      }
+    }
+
+    void readPropertiesFile(FileSystem fs, Path path) throws IOException {
+      Properties p = new Properties();
+      try (FSDataInputStream in = fs.open(path)) {
+        try (InputStreamReader isr = new InputStreamReader(in, Charsets.UTF_8)) {
+          p.load(isr);
+        }
+      }
+      tableName = p.getProperty("table");
+      families = p.getProperty("columnFamilies");
+      batchSize = Long.parseLong(p.getProperty("targetBatchSize"));
+      numHashFiles = Integer.parseInt(p.getProperty("numHashFiles"));
+
+      String startRowHex = p.getProperty("startRowHex");
+      if (startRowHex != null) {
+        startRow = Bytes.fromHex(startRowHex);
+      }
+      String stopRowHex = p.getProperty("stopRowHex");
+      if (stopRowHex != null) {
+        stopRow = Bytes.fromHex(stopRowHex);
+      }
+
+      String scanBatchString = p.getProperty("scanBatch");
+      if (scanBatchString != null) {
+        scanBatch = Integer.parseInt(scanBatchString);
+      }
+
+      String versionString = p.getProperty("versions");
+      if (versionString != null) {
+        versions = Integer.parseInt(versionString);
+      }
+
+      String startTimeString = p.getProperty("startTimestamp");
+      if (startTimeString != null) {
+        startTime = Long.parseLong(startTimeString);
+      }
+
+      String endTimeString = p.getProperty("endTimestamp");
+      if (endTimeString != null) {
+        endTime = Long.parseLong(endTimeString);
+      }
+    }
+
+    Scan initScan() throws IOException {
+      Scan scan = new Scan();
+      scan.setCacheBlocks(false);
+      if (startTime != 0 || endTime != 0) {
+        scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
+      }
+      if (scanBatch > 0) {
+        scan.setBatch(scanBatch);
+      }
+      if (versions >= 0) {
+        scan.setMaxVersions(versions);
+      }
+      if (!isTableStartRow(startRow)) {
+        scan.setStartRow(startRow);
+      }
+      if (!isTableEndRow(stopRow)) {
+        scan.setStopRow(stopRow);
+      }
+      if(families != null) {
+        for(String fam : families.split(",")) {
+          scan.addFamily(Bytes.toBytes(fam));
+        }
+      }
+      return scan;
+    }
+
+    /**
+     * Choose partitions between row ranges to hash to a single output file
+     * Selects region boundaries that fall within the scan range, and groups them
+     * into the desired number of partitions.
+     */
+    void selectPartitions(Pair<byte[][], byte[][]> regionStartEndKeys) {
+      List<byte[]> startKeys = new ArrayList<>();
+      for (int i = 0; i < regionStartEndKeys.getFirst().length; i++) {
+        byte[] regionStartKey = regionStartEndKeys.getFirst()[i];
+        byte[] regionEndKey = regionStartEndKeys.getSecond()[i];
+
+        // if scan begins after this region, or starts before this region, then drop this region
+        // in other words:
+        //   IF (scan begins before the end of this region
+        //      AND scan ends before the start of this region)
+        //   THEN include this region
+        if ((isTableStartRow(startRow) || isTableEndRow(regionEndKey)
+            || Bytes.compareTo(startRow, regionEndKey) < 0)
+          && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey)
+            || Bytes.compareTo(stopRow, regionStartKey) > 0)) {
+          startKeys.add(regionStartKey);
+        }
+      }
+
+      int numRegions = startKeys.size();
+      if (numHashFiles == 0) {
+        numHashFiles = numRegions / 100;
+      }
+      if (numHashFiles == 0) {
+        numHashFiles = 1;
+      }
+      if (numHashFiles > numRegions) {
+        // can't partition within regions
+        numHashFiles = numRegions;
+      }
+
+      // choose a subset of start keys to group regions into ranges
+      partitions = new ArrayList<>(numHashFiles - 1);
+      // skip the first start key as it is not a partition between ranges.
+      for (long i = 1; i < numHashFiles; i++) {
+        int splitIndex = (int) (numRegions * i / numHashFiles);
+        partitions.add(new ImmutableBytesWritable(startKeys.get(splitIndex)));
+      }
+    }
+
+    void writePartitionFile(Configuration conf, Path path) throws IOException {
+      FileSystem fs = path.getFileSystem(conf);
+      @SuppressWarnings("deprecation")
+      SequenceFile.Writer writer = SequenceFile.createWriter(
+        fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
+
+      for (int i = 0; i < partitions.size(); i++) {
+        writer.append(partitions.get(i), NullWritable.get());
+      }
+      writer.close();
+    }
+
+    private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
+         throws IOException {
+      @SuppressWarnings("deprecation")
+      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+      ImmutableBytesWritable key = new ImmutableBytesWritable();
+      partitions = new ArrayList<>();
+      while (reader.next(key)) {
+        partitions.add(new ImmutableBytesWritable(key.copyBytes()));
+      }
+      reader.close();
+
+      if (!Ordering.natural().isOrdered(partitions)) {
+        throw new IOException("Partitions are not ordered!");
+      }
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("tableName=").append(tableName);
+      if (families != null) {
+        sb.append(", families=").append(families);
+      }
+      sb.append(", batchSize=").append(batchSize);
+      sb.append(", numHashFiles=").append(numHashFiles);
+      if (!isTableStartRow(startRow)) {
+        sb.append(", startRowHex=").append(Bytes.toHex(startRow));
+      }
+      if (!isTableEndRow(stopRow)) {
+        sb.append(", stopRowHex=").append(Bytes.toHex(stopRow));
+      }
+      if (scanBatch >= 0) {
+        sb.append(", scanBatch=").append(scanBatch);
+      }
+      if (versions >= 0) {
+        sb.append(", versions=").append(versions);
+      }
+      if (startTime != 0) {
+        sb.append("startTime=").append(startTime);
+      }
+      if (endTime != 0) {
+        sb.append("endTime=").append(endTime);
+      }
+      return sb.toString();
+    }
+
+    static String getDataFileName(int hashFileIndex) {
+      return String.format(HashTable.OUTPUT_DATA_FILE_PREFIX + "%05d", hashFileIndex);
+    }
+
+    /**
+     * Open a TableHash.Reader starting at the first hash at or after the given key.
+     * @throws IOException
+     */
+    public Reader newReader(Configuration conf, ImmutableBytesWritable startKey)
+        throws IOException {
+      return new Reader(conf, startKey);
+    }
+
+    public class Reader implements java.io.Closeable {
+      private final Configuration conf;
+
+      private int hashFileIndex;
+      private MapFile.Reader mapFileReader;
+
+      private boolean cachedNext;
+      private ImmutableBytesWritable key;
+      private ImmutableBytesWritable hash;
+
+      Reader(Configuration conf, ImmutableBytesWritable startKey) throws IOException {
+        this.conf = conf;
+        int partitionIndex = Collections.binarySearch(partitions, startKey);
+        if (partitionIndex >= 0) {
+          // if the key is equal to a partition, then go the file after that partition
+          hashFileIndex = partitionIndex+1;
+        } else {
+          // if the key is between partitions, then go to the file between those partitions
+          hashFileIndex = -1-partitionIndex;
+        }
+        openHashFile();
+
+        // MapFile's don't make it easy to seek() so that the subsequent next() returns
+        // the desired key/value pair.  So we cache it for the first call of next().
+        hash = new ImmutableBytesWritable();
+        key = (ImmutableBytesWritable) mapFileReader.getClosest(startKey, hash);
+        if (key == null) {
+          cachedNext = false;
+          hash = null;
+        } else {
+          cachedNext = true;
+        }
+      }
+
+      /**
+       * Read the next key/hash pair.
+       * Returns true if such a pair exists and false when at the end of the data.
+       */
+      public boolean next() throws IOException {
+        if (cachedNext) {
+          cachedNext = false;
+          return true;
+        }
+        key = new ImmutableBytesWritable();
+        hash = new ImmutableBytesWritable();
+        while (true) {
+          boolean hasNext = mapFileReader.next(key, hash);
+          if (hasNext) {
+            return true;
+          }
+          hashFileIndex++;
+          if (hashFileIndex < TableHash.this.numHashFiles) {
+            mapFileReader.close();
+            openHashFile();
+          } else {
+            key = null;
+            hash = null;
+            return false;
+          }
+        }
+      }
+
+      /**
+       * Get the current key
+       * @return the current key or null if there is no current key
+       */
+      public ImmutableBytesWritable getCurrentKey() {
+        return key;
+      }
+
+      /**
+       * Get the current hash
+       * @return the current hash or null if there is no current hash
+       */
+      public ImmutableBytesWritable getCurrentHash() {
+        return hash;
+      }
+
+      private void openHashFile() throws IOException {
+        if (mapFileReader != null) {
+          mapFileReader.close();
+        }
+        Path dataDir = new Path(TableHash.this.hashDir, HASH_DATA_DIR);
+        Path dataFile = new Path(dataDir, getDataFileName(hashFileIndex));
+        mapFileReader = new MapFile.Reader(dataFile, conf);
+      }
+
+      @Override
+      public void close() throws IOException {
+        mapFileReader.close();
+      }
+    }
+  }
+
+  static boolean isTableStartRow(byte[] row) {
+    return Bytes.equals(HConstants.EMPTY_START_ROW, row);
+  }
+
+  static boolean isTableEndRow(byte[] row) {
+    return Bytes.equals(HConstants.EMPTY_END_ROW, row);
+  }
+
+  public Job createSubmittableJob(String[] args) throws IOException {
+    Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
+    generatePartitions(partitionsPath);
+
+    Job job = Job.getInstance(getConf(),
+          getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
+    Configuration jobConf = job.getConfiguration();
+    jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
+    job.setJarByClass(HashTable.class);
+
+    TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
+        HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+
+    // use a TotalOrderPartitioner and reducers to group region output into hash files
+    job.setPartitionerClass(TotalOrderPartitioner.class);
+    TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
+    job.setReducerClass(Reducer.class);  // identity reducer
+    job.setNumReduceTasks(tableHash.numHashFiles);
+    job.setOutputKeyClass(ImmutableBytesWritable.class);
+    job.setOutputValueClass(ImmutableBytesWritable.class);
+    job.setOutputFormatClass(MapFileOutputFormat.class);
+    FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
+
+    return job;
+  }
+
+  private void generatePartitions(Path partitionsPath) throws IOException {
+    Connection connection = ConnectionFactory.createConnection(getConf());
+    Pair<byte[][], byte[][]> regionKeys
+      = connection.getRegionLocator(TableName.valueOf(tableHash.tableName)).getStartEndKeys();
+    connection.close();
+
+    tableHash.selectPartitions(regionKeys);
+    LOG.info("Writing " + tableHash.partitions.size() + " partition keys to " + partitionsPath);
+
+    tableHash.writePartitionFile(getConf(), partitionsPath);
+  }
+
+  static class ResultHasher {
+    private MessageDigest digest;
+
+    private boolean batchStarted = false;
+    private ImmutableBytesWritable batchStartKey;
+    private ImmutableBytesWritable batchHash;
+    private long batchSize = 0;
+
+
+    public ResultHasher() {
+      try {
+        digest = MessageDigest.getInstance("MD5");
+      } catch (NoSuchAlgorithmException e) {
+        Throwables.propagate(e);
+      }
+    }
+
+    public void startBatch(ImmutableBytesWritable row) {
+      if (batchStarted) {
+        throw new RuntimeException("Cannot start new batch without finishing existing one.");
+      }
+      batchStarted = true;
+      batchSize = 0;
+      batchStartKey = row;
+      batchHash = null;
+    }
+
+    public void hashResult(Result result) {
+      if (!batchStarted) {
+        throw new RuntimeException("Cannot add to batch that has not been started.");
+      }
+      for (Cell cell : result.rawCells()) {
+        int rowLength = cell.getRowLength();
+        int familyLength = cell.getFamilyLength();
+        int qualifierLength = cell.getQualifierLength();
+        int valueLength = cell.getValueLength();
+        digest.update(cell.getRowArray(), cell.getRowOffset(), rowLength);
+        digest.update(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);
+        digest.update(cell.getQualifierArray(), cell.getQualifierOffset(), qualifierLength);
+        long ts = cell.getTimestamp();
+        for (int i = 8; i > 0; i--) {
+          digest.update((byte) ts);
+          ts >>>= 8;
+        }
+        digest.update(cell.getValueArray(), cell.getValueOffset(), valueLength);
+
+        batchSize += rowLength + familyLength + qualifierLength + 8 + valueLength;
+      }
+    }
+
+    public void finishBatch() {
+      if (!batchStarted) {
+        throw new RuntimeException("Cannot finish batch that has not started.");
+      }
+      batchStarted = false;
+      batchHash = new ImmutableBytesWritable(digest.digest());
+    }
+
+    public boolean isBatchStarted() {
+      return batchStarted;
+    }
+
+    public ImmutableBytesWritable getBatchStartKey() {
+      return batchStartKey;
+    }
+
+    public ImmutableBytesWritable getBatchHash() {
+      return batchHash;
+    }
+
+    public long getBatchSize() {
+      return batchSize;
+    }
+  }
+
+  public static class HashMapper
+    extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+    private ResultHasher hasher;
+    private long targetBatchSize;
+
+    private ImmutableBytesWritable currentRow;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      targetBatchSize = context.getConfiguration()
+          .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
+      hasher = new ResultHasher();
+
+      TableSplit split = (TableSplit) context.getInputSplit();
+      hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
+    }
+
+    @Override
+    protected void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException, InterruptedException {
+
+      if (currentRow == null || !currentRow.equals(key)) {
+        currentRow = new ImmutableBytesWritable(key); // not immutable
+
+        if (hasher.getBatchSize() >= targetBatchSize) {
+          hasher.finishBatch();
+          context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
+          hasher.startBatch(currentRow);
+        }
+      }
+
+      hasher.hashResult(value);
+    }
+
+    @Override
+    protected void cleanup(Context context) throws IOException, InterruptedException {
+      hasher.finishBatch();
+      context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
+    }
+  }
+
+  private void writeTempManifestFile() throws IOException {
+    Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
+    FileSystem fs = tempManifestPath.getFileSystem(getConf());
+    tableHash.writePropertiesFile(fs, tempManifestPath);
+  }
+
+  private void completeManifest() throws IOException {
+    Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
+    Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME);
+    FileSystem fs = tempManifestPath.getFileSystem(getConf());
+    fs.rename(tempManifestPath, manifestPath);
+  }
+
+  private static final int NUM_ARGS = 2;
+  private static void printUsage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+      System.err.println();
+    }
+    System.err.println("Usage: HashTable [options] <tablename> <outputpath>");
+    System.err.println();
+    System.err.println("Options:");
+    System.err.println(" batchsize     the target amount of bytes to hash in each batch");
+    System.err.println("               rows are added to the batch until this size is reached");
+    System.err.println("               (defaults to " + DEFAULT_BATCH_SIZE + " bytes)");
+    System.err.println(" numhashfiles  the number of hash files to create");
+    System.err.println("               if set to fewer than number of regions then");
+    System.err.println("               the job will create this number of reducers");
+    System.err.println("               (defaults to 1/100 of regions -- at least 1)");
+    System.err.println(" startrow      the start row");
+    System.err.println(" stoprow       the stop row");
+    System.err.println(" starttime     beginning of the time range (unixtime in millis)");
+    System.err.println("               without endtime means from starttime to forever");
+    System.err.println(" endtime       end of the time range.  Ignored if no starttime specified.");
+    System.err.println(" scanbatch     scanner batch size to support intra row scans");
+    System.err.println(" versions      number of cell versions to include");
+    System.err.println(" families      comma-separated list of families to include");
+    System.err.println();
+    System.err.println("Args:");
+    System.err.println(" tablename     Name of the table to hash");
+    System.err.println(" outputpath    Filesystem path to put the output data");
+    System.err.println();
+    System.err.println("Examples:");
+    System.err.println(" To hash 'TestTable' in 32kB batches for a 1 hour window into 50 files:");
+    System.err.println(" $ hbase " +
+        "org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=32000 --numhashfiles=50"
+        + " --starttime=1265875194289 --endtime=1265878794289 --families=cf2,cf3"
+        + " TestTable /hashes/testTable");
+  }
+
+  private boolean doCommandLine(final String[] args) {
+    if (args.length < NUM_ARGS) {
+      printUsage(null);
+      return false;
+    }
+    try {
+
+      tableHash.tableName = args[args.length-2];
+      destPath = new Path(args[args.length-1]);
+
+      for (int i = 0; i < args.length - NUM_ARGS; i++) {
+        String cmd = args[i];
+        if (cmd.equals("-h") || cmd.startsWith("--h")) {
+          printUsage(null);
+          return false;
+        }
+
+        final String batchSizeArgKey = "--batchsize=";
+        if (cmd.startsWith(batchSizeArgKey)) {
+          tableHash.batchSize = Long.parseLong(cmd.substring(batchSizeArgKey.length()));
+          continue;
+        }
+
+        final String numHashFilesArgKey = "--numhashfiles=";
+        if (cmd.startsWith(numHashFilesArgKey)) {
+          tableHash.numHashFiles = Integer.parseInt(cmd.substring(numHashFilesArgKey.length()));
+          continue;
+        }
+
+        final String startRowArgKey = "--startrow=";
+        if (cmd.startsWith(startRowArgKey)) {
+          tableHash.startRow = Bytes.fromHex(cmd.substring(startRowArgKey.length()));
+          continue;
+        }
+
+        final String stopRowArgKey = "--stoprow=";
+        if (cmd.startsWith(stopRowArgKey)) {
+          tableHash.stopRow = Bytes.fromHex(cmd.substring(stopRowArgKey.length()));
+          continue;
+        }
+
+        final String startTimeArgKey = "--starttime=";
+        if (cmd.startsWith(startTimeArgKey)) {
+          tableHash.startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+          continue;
+        }
+
+        final String endTimeArgKey = "--endtime=";
+        if (cmd.startsWith(endTimeArgKey)) {
+          tableHash.endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+          continue;
+        }
+
+        final String scanBatchArgKey = "--scanbatch=";
+        if (cmd.startsWith(scanBatchArgKey)) {
+          tableHash.scanBatch = Integer.parseInt(cmd.substring(scanBatchArgKey.length()));
+          continue;
+        }
+
+        final String versionsArgKey = "--versions=";
+        if (cmd.startsWith(versionsArgKey)) {
+          tableHash.versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+          continue;
+        }
+
+        final String familiesArgKey = "--families=";
+        if (cmd.startsWith(familiesArgKey)) {
+          tableHash.families = cmd.substring(familiesArgKey.length());
+          continue;
+        }
+
+        printUsage("Invalid argument '" + cmd + "'");
+        return false;
+      }
+      if ((tableHash.startTime != 0 || tableHash.endTime != 0)
+          && (tableHash.startTime >= tableHash.endTime)) {
+        printUsage("Invalid time range filter: starttime="
+            + tableHash.startTime + " >=  endtime=" + tableHash.endTime);
+        return false;
+      }
+
+    } catch (Exception e) {
+      e.printStackTrace();
+      printUsage("Can't start because " + e.getMessage());
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Main entry point.
+   */
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args);
+    System.exit(ret);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
+    if (!doCommandLine(otherArgs)) {
+      return 1;
+    }
+
+    Job job = createSubmittableJob(otherArgs);
+    writeTempManifestFile();
+    if (!job.waitForCompletion(true)) {
+      LOG.info("Map-reduce job failed!");
+      return 1;
+    }
+    completeManifest();
+    return 0;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
new file mode 100644
index 0000000..7103ef8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
@@ -0,0 +1,67 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Pass the given key and record as-is to the reduce phase.
+ */
+@InterfaceAudience.Public
+public class IdentityTableMapper
+extends TableMapper<ImmutableBytesWritable, Result> {
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name.
+   * @param scan  The scan with the columns to scan.
+   * @param mapper  The mapper class.
+   * @param job  The job configuration.
+   * @throws IOException When setting up the job fails.
+   */
+  @SuppressWarnings("rawtypes")
+  public static void initJob(String table, Scan scan,
+    Class<? extends TableMapper> mapper, Job job) throws IOException {
+    TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
+      ImmutableBytesWritable.class, Result.class, job);
+  }
+
+  /**
+   * Pass the key, value to reduce.
+   *
+   * @param key  The current key.
+   * @param value  The current value.
+   * @param context  The current context.
+   * @throws IOException When writing the record fails.
+   * @throws InterruptedException When the job is aborted.
+   */
+  public void map(ImmutableBytesWritable key, Result value, Context context)
+  throws IOException, InterruptedException {
+    context.write(key, value);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
new file mode 100644
index 0000000..73475db
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
@@ -0,0 +1,79 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Convenience class that simply writes all values (which must be
+ * {@link org.apache.hadoop.hbase.client.Put Put} or
+ * {@link org.apache.hadoop.hbase.client.Delete Delete} instances)
+ * passed to it out to the configured HBase table. This works in combination
+ * with {@link TableOutputFormat} which actually does the writing to HBase.<p>
+ *
+ * Keys are passed along but ignored in TableOutputFormat.  However, they can
+ * be used to control how your values will be divided up amongst the specified
+ * number of reducers. <p>
+ *
+ * You can also use the {@link TableMapReduceUtil} class to set up the two
+ * classes in one step:
+ * <blockquote><code>
+ * TableMapReduceUtil.initTableReducerJob("table", IdentityTableReducer.class, job);
+ * </code></blockquote>
+ * This will also set the proper {@link TableOutputFormat} which is given the
+ * <code>table</code> parameter. The
+ * {@link org.apache.hadoop.hbase.client.Put Put} or
+ * {@link org.apache.hadoop.hbase.client.Delete Delete} define the
+ * row and columns implicitly.
+ */
+@InterfaceAudience.Public
+public class IdentityTableReducer
+extends TableReducer<Writable, Mutation, Writable> {
+
+  @SuppressWarnings("unused")
+  private static final Log LOG = LogFactory.getLog(IdentityTableReducer.class);
+
+  /**
+   * Writes each given record, consisting of the row key and the given values,
+   * to the configured {@link org.apache.hadoop.mapreduce.OutputFormat}.
+   * It is emitting the row key and each {@link org.apache.hadoop.hbase.client.Put Put}
+   * or {@link org.apache.hadoop.hbase.client.Delete Delete} as separate pairs.
+   *
+   * @param key  The current row key.
+   * @param values  The {@link org.apache.hadoop.hbase.client.Put Put} or
+   *   {@link org.apache.hadoop.hbase.client.Delete Delete} list for the given
+   *   row.
+   * @param context  The context of the reduce.
+   * @throws IOException When writing the record fails.
+   * @throws InterruptedException When the job gets interrupted.
+   */
+  @Override
+  public void reduce(Writable key, Iterable<Mutation> values, Context context)
+  throws IOException, InterruptedException {
+    for(Mutation putOrDelete : values) {
+      context.write(key, putOrDelete);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
new file mode 100644
index 0000000..18dcf35
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
@@ -0,0 +1,780 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.zookeeper.KeeperException;
+
+
+/**
+ * Import data written by {@link Export}.
+ */
+@InterfaceAudience.Public
+public class Import extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(Import.class);
+  final static String NAME = "import";
+  public final static String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
+  public final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
+  public final static String FILTER_CLASS_CONF_KEY = "import.filter.class";
+  public final static String FILTER_ARGS_CONF_KEY = "import.filter.args";
+  public final static String TABLE_NAME = "import.table.name";
+  public final static String WAL_DURABILITY = "import.wal.durability";
+  public final static String HAS_LARGE_RESULT= "import.bulk.hasLargeResult";
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  public static class KeyValueWritableComparablePartitioner
+      extends Partitioner<KeyValueWritableComparable, KeyValue> {
+    private static KeyValueWritableComparable[] START_KEYS = null;
+    @Override
+    public int getPartition(KeyValueWritableComparable key, KeyValue value,
+        int numPartitions) {
+      for (int i = 0; i < START_KEYS.length; ++i) {
+        if (key.compareTo(START_KEYS[i]) <= 0) {
+          return i;
+        }
+      }
+      return START_KEYS.length;
+    }
+
+  }
+
+  public static class KeyValueWritableComparable
+      implements WritableComparable<KeyValueWritableComparable> {
+
+    private KeyValue kv = null;
+
+    static {
+      // register this comparator
+      WritableComparator.define(KeyValueWritableComparable.class,
+          new KeyValueWritableComparator());
+    }
+
+    public KeyValueWritableComparable() {
+    }
+
+    public KeyValueWritableComparable(KeyValue kv) {
+      this.kv = kv;
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      KeyValue.write(kv, out);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      kv = KeyValue.create(in);
+    }
+
+    @Override
+    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
+      justification="This is wrong, yes, but we should be purging Writables, not fixing them")
+    public int compareTo(KeyValueWritableComparable o) {
+      return CellComparator.COMPARATOR.compare(this.kv, ((KeyValueWritableComparable)o).kv);
+    }
+
+    public static class KeyValueWritableComparator extends WritableComparator {
+
+      @Override
+      public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+        try {
+          KeyValueWritableComparable kv1 = new KeyValueWritableComparable();
+          kv1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
+          KeyValueWritableComparable kv2 = new KeyValueWritableComparable();
+          kv2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
+          return compare(kv1, kv2);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+    }
+
+  }
+
+  public static class KeyValueReducer
+      extends
+      Reducer<KeyValueWritableComparable, KeyValue, ImmutableBytesWritable, KeyValue> {
+    protected void reduce(
+        KeyValueWritableComparable row,
+        Iterable<KeyValue> kvs,
+        Reducer<KeyValueWritableComparable,
+          KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
+        throws java.io.IOException, InterruptedException {
+      int index = 0;
+      for (KeyValue kv : kvs) {
+        context.write(new ImmutableBytesWritable(kv.getRowArray()), kv);
+        if (++index % 100 == 0)
+          context.setStatus("Wrote " + index + " KeyValues, "
+              + "and the rowkey whose is being wrote is " + Bytes.toString(kv.getRowArray()));
+      }
+    }
+  }
+
+  public static class KeyValueSortImporter
+      extends TableMapper<KeyValueWritableComparable, KeyValue> {
+    private Map<byte[], byte[]> cfRenameMap;
+    private Filter filter;
+    private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
+
+    /**
+     * @param row  The current table row key.
+     * @param value  The columns.
+     * @param context  The current context.
+     * @throws IOException When something is broken with the data.
+     */
+    @Override
+    public void map(ImmutableBytesWritable row, Result value,
+      Context context)
+    throws IOException {
+      try {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Considering the row."
+              + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
+        }
+        if (filter == null
+            || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
+                (short) row.getLength()))) {
+          for (Cell kv : value.rawCells()) {
+            kv = filterKv(filter, kv);
+            // skip if we filtered it out
+            if (kv == null) continue;
+            // TODO get rid of ensureKeyValue
+            KeyValue ret = KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap));
+            context.write(new KeyValueWritableComparable(ret.createKeyOnly(false)), ret);
+          }
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+
+    @Override
+    public void setup(Context context) throws IOException {
+      cfRenameMap = createCfRenameMap(context.getConfiguration());
+      filter = instantiateFilter(context.getConfiguration());
+      int reduceNum = context.getNumReduceTasks();
+      Configuration conf = context.getConfiguration();
+      TableName tableName = TableName.valueOf(context.getConfiguration().get(TABLE_NAME));
+      try (Connection conn = ConnectionFactory.createConnection(conf);
+          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+        byte[][] startKeys = regionLocator.getStartKeys();
+        if (startKeys.length != reduceNum) {
+          throw new IOException("Region split after job initialization");
+        }
+        KeyValueWritableComparable[] startKeyWraps =
+            new KeyValueWritableComparable[startKeys.length - 1];
+        for (int i = 1; i < startKeys.length; ++i) {
+          startKeyWraps[i - 1] =
+              new KeyValueWritableComparable(KeyValueUtil.createFirstOnRow(startKeys[i]));
+        }
+        KeyValueWritableComparablePartitioner.START_KEYS = startKeyWraps;
+      }
+    }
+  }
+
+  /**
+   * A mapper that just writes out KeyValues.
+   */
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
+      justification="Writables are going away and this has been this way forever")
+  public static class KeyValueImporter extends TableMapper<ImmutableBytesWritable, KeyValue> {
+    private Map<byte[], byte[]> cfRenameMap;
+    private Filter filter;
+    private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
+
+    /**
+     * @param row  The current table row key.
+     * @param value  The columns.
+     * @param context  The current context.
+     * @throws IOException When something is broken with the data.
+     */
+    @Override
+    public void map(ImmutableBytesWritable row, Result value,
+      Context context)
+    throws IOException {
+      try {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Considering the row."
+              + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
+        }
+        if (filter == null
+            || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
+                (short) row.getLength()))) {
+          for (Cell kv : value.rawCells()) {
+            kv = filterKv(filter, kv);
+            // skip if we filtered it out
+            if (kv == null) continue;
+            // TODO get rid of ensureKeyValue
+            context.write(row, KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap)));
+          }
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+
+    @Override
+    public void setup(Context context) {
+      cfRenameMap = createCfRenameMap(context.getConfiguration());
+      filter = instantiateFilter(context.getConfiguration());
+    }
+  }
+
+  /**
+   * Write table content out to files in hdfs.
+   */
+  public static class Importer extends TableMapper<ImmutableBytesWritable, Mutation> {
+    private Map<byte[], byte[]> cfRenameMap;
+    private List<UUID> clusterIds;
+    private Filter filter;
+    private Durability durability;
+
+    /**
+     * @param row  The current table row key.
+     * @param value  The columns.
+     * @param context  The current context.
+     * @throws IOException When something is broken with the data.
+     */
+    @Override
+    public void map(ImmutableBytesWritable row, Result value,
+      Context context)
+    throws IOException {
+      try {
+        writeResult(row, value, context);
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+
+    private void writeResult(ImmutableBytesWritable key, Result result, Context context)
+    throws IOException, InterruptedException {
+      Put put = null;
+      Delete delete = null;
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Considering the row."
+            + Bytes.toString(key.get(), key.getOffset(), key.getLength()));
+      }
+      if (filter == null
+          || !filter.filterRowKey(CellUtil.createFirstOnRow(key.get(), key.getOffset(),
+              (short) key.getLength()))) {
+        processKV(key, result, context, put, delete);
+      }
+    }
+
+    protected void processKV(ImmutableBytesWritable key, Result result, Context context, Put put,
+        Delete delete) throws IOException, InterruptedException {
+      for (Cell kv : result.rawCells()) {
+        kv = filterKv(filter, kv);
+        // skip if we filter it out
+        if (kv == null) continue;
+
+        kv = convertKv(kv, cfRenameMap);
+        // Deletes and Puts are gathered and written when finished
+        /*
+         * If there are sequence of mutations and tombstones in an Export, and after Import the same
+         * sequence should be restored as it is. If we combine all Delete tombstones into single
+         * request then there is chance of ignoring few DeleteFamily tombstones, because if we
+         * submit multiple DeleteFamily tombstones in single Delete request then we are maintaining
+         * only newest in hbase table and ignoring other. Check - HBASE-12065
+         */
+        if (CellUtil.isDeleteFamily(kv)) {
+          Delete deleteFamily = new Delete(key.get());
+          deleteFamily.add(kv);
+          if (durability != null) {
+            deleteFamily.setDurability(durability);
+          }
+          deleteFamily.setClusterIds(clusterIds);
+          context.write(key, deleteFamily);
+        } else if (CellUtil.isDelete(kv)) {
+          if (delete == null) {
+            delete = new Delete(key.get());
+          }
+          delete.add(kv);
+        } else {
+          if (put == null) {
+            put = new Put(key.get());
+          }
+          addPutToKv(put, kv);
+        }
+      }
+      if (put != null) {
+        if (durability != null) {
+          put.setDurability(durability);
+        }
+        put.setClusterIds(clusterIds);
+        context.write(key, put);
+      }
+      if (delete != null) {
+        if (durability != null) {
+          delete.setDurability(durability);
+        }
+        delete.setClusterIds(clusterIds);
+        context.write(key, delete);
+      }
+    }
+
+    protected void addPutToKv(Put put, Cell kv) throws IOException {
+      put.add(kv);
+    }
+
+    @Override
+    public void setup(Context context) {
+      LOG.info("Setting up " + getClass() + " mapper.");
+      Configuration conf = context.getConfiguration();
+      cfRenameMap = createCfRenameMap(conf);
+      filter = instantiateFilter(conf);
+      String durabilityStr = conf.get(WAL_DURABILITY);
+      if(durabilityStr != null){
+        durability = Durability.valueOf(durabilityStr.toUpperCase(Locale.ROOT));
+        LOG.info("setting WAL durability to " + durability);
+      } else {
+        LOG.info("setting WAL durability to default.");
+      }
+      // TODO: This is kind of ugly doing setup of ZKW just to read the clusterid.
+      ZooKeeperWatcher zkw = null;
+      Exception ex = null;
+      try {
+        zkw = new ZooKeeperWatcher(conf, context.getTaskAttemptID().toString(), null);
+        clusterIds = Collections.singletonList(ZKClusterId.getUUIDForCluster(zkw));
+      } catch (ZooKeeperConnectionException e) {
+        ex = e;
+        LOG.error("Problem connecting to ZooKeper during task setup", e);
+      } catch (KeeperException e) {
+        ex = e;
+        LOG.error("Problem reading ZooKeeper data during task setup", e);
+      } catch (IOException e) {
+        ex = e;
+        LOG.error("Problem setting up task", e);
+      } finally {
+        if (zkw != null) zkw.close();
+      }
+      if (clusterIds == null) {
+        // exit early if setup fails
+        throw new RuntimeException(ex);
+      }
+    }
+  }
+
+  /**
+   * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
+   * optionally not include in the job output
+   * @param conf {@link Configuration} from which to load the filter
+   * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
+   * @throws IllegalArgumentException if the filter is misconfigured
+   */
+  public static Filter instantiateFilter(Configuration conf) {
+    // get the filter, if it was configured
+    Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
+    if (filterClass == null) {
+      LOG.debug("No configured filter class, accepting all keyvalues.");
+      return null;
+    }
+    LOG.debug("Attempting to create filter:" + filterClass);
+    String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
+    ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
+    try {
+      Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
+      return (Filter) m.invoke(null, quotedArgs);
+    } catch (IllegalAccessException e) {
+      LOG.error("Couldn't instantiate filter!", e);
+      throw new RuntimeException(e);
+    } catch (SecurityException e) {
+      LOG.error("Couldn't instantiate filter!", e);
+      throw new RuntimeException(e);
+    } catch (NoSuchMethodException e) {
+      LOG.error("Couldn't instantiate filter!", e);
+      throw new RuntimeException(e);
+    } catch (IllegalArgumentException e) {
+      LOG.error("Couldn't instantiate filter!", e);
+      throw new RuntimeException(e);
+    } catch (InvocationTargetException e) {
+      LOG.error("Couldn't instantiate filter!", e);
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static ArrayList<byte[]> toQuotedByteArrays(String... stringArgs) {
+    ArrayList<byte[]> quotedArgs = new ArrayList<>();
+    for (String stringArg : stringArgs) {
+      // all the filters' instantiation methods expected quoted args since they are coming from
+      // the shell, so add them here, though it shouldn't really be needed :-/
+      quotedArgs.add(Bytes.toBytes("'" + stringArg + "'"));
+    }
+    return quotedArgs;
+  }
+
+  /**
+   * Attempt to filter out the keyvalue
+   * @param kv {@link KeyValue} on which to apply the filter
+   * @return <tt>null</tt> if the key should not be written, otherwise returns the original
+   *         {@link KeyValue}
+   */
+  public static Cell filterKv(Filter filter, Cell kv) throws IOException {
+    // apply the filter and skip this kv if the filter doesn't apply
+    if (filter != null) {
+      Filter.ReturnCode code = filter.filterKeyValue(kv);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Filter returned:" + code + " for the key value:" + kv);
+      }
+      // if its not an accept type, then skip this kv
+      if (!(code.equals(Filter.ReturnCode.INCLUDE) || code
+          .equals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL))) {
+        return null;
+      }
+    }
+    return kv;
+  }
+
+  // helper: create a new KeyValue based on CF rename map
+  private static Cell convertKv(Cell kv, Map<byte[], byte[]> cfRenameMap) {
+    if(cfRenameMap != null) {
+      // If there's a rename mapping for this CF, create a new KeyValue
+      byte[] newCfName = cfRenameMap.get(CellUtil.cloneFamily(kv));
+      if(newCfName != null) {
+          kv = new KeyValue(kv.getRowArray(), // row buffer
+                  kv.getRowOffset(),        // row offset
+                  kv.getRowLength(),        // row length
+                  newCfName,                // CF buffer
+                  0,                        // CF offset
+                  newCfName.length,         // CF length
+                  kv.getQualifierArray(),   // qualifier buffer
+                  kv.getQualifierOffset(),  // qualifier offset
+                  kv.getQualifierLength(),  // qualifier length
+                  kv.getTimestamp(),        // timestamp
+                  KeyValue.Type.codeToType(kv.getTypeByte()), // KV Type
+                  kv.getValueArray(),       // value buffer
+                  kv.getValueOffset(),      // value offset
+                  kv.getValueLength());     // value length
+      }
+    }
+    return kv;
+  }
+
+  // helper: make a map from sourceCfName to destCfName by parsing a config key
+  private static Map<byte[], byte[]> createCfRenameMap(Configuration conf) {
+    Map<byte[], byte[]> cfRenameMap = null;
+    String allMappingsPropVal = conf.get(CF_RENAME_PROP);
+    if(allMappingsPropVal != null) {
+      // The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
+      String[] allMappings = allMappingsPropVal.split(",");
+      for (String mapping: allMappings) {
+        if(cfRenameMap == null) {
+            cfRenameMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+        }
+        String [] srcAndDest = mapping.split(":");
+        if(srcAndDest.length != 2) {
+            continue;
+        }
+        cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
+      }
+    }
+    return cfRenameMap;
+  }
+
+  /**
+   * <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
+   * the mapper how to rename column families.
+   *
+   * <p>Alternately, instead of calling this function, you could set the configuration key
+   * {@link #CF_RENAME_PROP} yourself. The value should look like
+   * <pre>srcCf1:destCf1,srcCf2:destCf2,....</pre>. This would have the same effect on
+   * the mapper behavior.
+   *
+   * @param conf the Configuration in which the {@link #CF_RENAME_PROP} key will be
+   *  set
+   * @param renameMap a mapping from source CF names to destination CF names
+   */
+  static public void configureCfRenaming(Configuration conf,
+          Map<String, String> renameMap) {
+    StringBuilder sb = new StringBuilder();
+    for(Map.Entry<String,String> entry: renameMap.entrySet()) {
+      String sourceCf = entry.getKey();
+      String destCf = entry.getValue();
+
+      if(sourceCf.contains(":") || sourceCf.contains(",") ||
+              destCf.contains(":") || destCf.contains(",")) {
+        throw new IllegalArgumentException("Illegal character in CF names: "
+              + sourceCf + ", " + destCf);
+      }
+
+      if(sb.length() != 0) {
+        sb.append(",");
+      }
+      sb.append(sourceCf + ":" + destCf);
+    }
+    conf.set(CF_RENAME_PROP, sb.toString());
+  }
+
+  /**
+   * Add a Filter to be instantiated on import
+   * @param conf Configuration to update (will be passed to the job)
+   * @param clazz {@link Filter} subclass to instantiate on the server.
+   * @param filterArgs List of arguments to pass to the filter on instantiation
+   */
+  public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz,
+      List<String> filterArgs) throws IOException {
+    conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName());
+    conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()]));
+  }
+
+  /**
+   * Sets up the actual job.
+   * @param conf The current configuration.
+   * @param args The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public static Job createSubmittableJob(Configuration conf, String[] args)
+  throws IOException {
+    TableName tableName = TableName.valueOf(args[0]);
+    conf.set(TABLE_NAME, tableName.getNameAsString());
+    Path inputDir = new Path(args[1]);
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJarByClass(Importer.class);
+    FileInputFormat.setInputPaths(job, inputDir);
+    job.setInputFormatClass(SequenceFileInputFormat.class);
+    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+
+    // make sure we get the filter in the jars
+    try {
+      Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
+      if (filter != null) {
+        TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
+      }
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+
+    if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
+      LOG.info("Use Large Result!!");
+      try (Connection conn = ConnectionFactory.createConnection(conf);
+          Table table = conn.getTable(tableName);
+          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+        job.setMapperClass(KeyValueSortImporter.class);
+        job.setReducerClass(KeyValueReducer.class);
+        Path outputDir = new Path(hfileOutPath);
+        FileOutputFormat.setOutputPath(job, outputDir);
+        job.setMapOutputKeyClass(KeyValueWritableComparable.class);
+        job.setMapOutputValueClass(KeyValue.class);
+        job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class",
+            KeyValueWritableComparable.KeyValueWritableComparator.class,
+            RawComparator.class);
+        Path partitionsPath =
+            new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
+        FileSystem fs = FileSystem.get(job.getConfiguration());
+        fs.deleteOnExit(partitionsPath);
+        job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
+        job.setNumReduceTasks(regionLocator.getStartKeys().length);
+        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+            org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+      }
+    } else if (hfileOutPath != null) {
+      LOG.info("writing to hfiles for bulk load.");
+      job.setMapperClass(KeyValueImporter.class);
+      try (Connection conn = ConnectionFactory.createConnection(conf);
+          Table table = conn.getTable(tableName);
+          RegionLocator regionLocator = conn.getRegionLocator(tableName)){
+        job.setReducerClass(KeyValueSortReducer.class);
+        Path outputDir = new Path(hfileOutPath);
+        FileOutputFormat.setOutputPath(job, outputDir);
+        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+        job.setMapOutputValueClass(KeyValue.class);
+        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+            org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+      }
+    } else {
+      LOG.info("writing directly to table from Mapper.");
+      // No reducers.  Just write straight to table.  Call initTableReducerJob
+      // because it sets up the TableOutputFormat.
+      job.setMapperClass(Importer.class);
+      TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
+      job.setNumReduceTasks(0);
+    }
+    return job;
+  }
+
+  /*
+   * @param errorMsg Error message.  Can be null.
+   */
+  private static void usage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    System.err.println("Usage: Import [options] <tablename> <inputdir>");
+    System.err.println("By default Import will load data directly into HBase. To instead generate");
+    System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
+    System.err.println("  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
+    System.err.println("If there is a large result that includes too much KeyValue "
+        + "whitch can occur OOME caused by the memery sort in reducer, pass the option:");
+    System.err.println("  -D" + HAS_LARGE_RESULT + "=true");
+    System.err
+        .println(" To apply a generic org.apache.hadoop.hbase.filter.Filter to the input, use");
+    System.err.println("  -D" + FILTER_CLASS_CONF_KEY + "=<name of filter class>");
+    System.err.println("  -D" + FILTER_ARGS_CONF_KEY + "=<comma separated list of args for filter");
+    System.err.println(" NOTE: The filter will be applied BEFORE doing key renames via the "
+        + CF_RENAME_PROP + " property. Futher, filters will only use the"
+        + " Filter#filterRowKey(byte[] buffer, int offset, int length) method to identify "
+        + " whether the current row needs to be ignored completely for processing and "
+        + " Filter#filterKeyValue(KeyValue) method to determine if the KeyValue should be added;"
+        + " Filter.ReturnCode#INCLUDE and #INCLUDE_AND_NEXT_COL will be considered as including"
+        + " the KeyValue.");
+    System.err.println("To import data exported from HBase 0.94, use");
+    System.err.println("  -Dhbase.import.version=0.94");
+    System.err.println("  -D " + JOB_NAME_CONF_KEY
+        + "=jobName - use the specified mapreduce job name for the import");
+    System.err.println("For performance consider the following options:\n"
+        + "  -Dmapreduce.map.speculative=false\n"
+        + "  -Dmapreduce.reduce.speculative=false\n"
+        + "  -D" + WAL_DURABILITY + "=<Used while writing data to hbase."
+            +" Allowed values are the supported durability values"
+            +" like SKIP_WAL/ASYNC_WAL/SYNC_WAL/...>");
+  }
+
+  /**
+   * If the durability is set to {@link Durability#SKIP_WAL} and the data is imported to hbase, we
+   * need to flush all the regions of the table as the data is held in memory and is also not
+   * present in the Write Ahead Log to replay in scenarios of a crash. This method flushes all the
+   * regions of the table in the scenarios of import data to hbase with {@link Durability#SKIP_WAL}
+   */
+  public static void flushRegionsIfNecessary(Configuration conf) throws IOException,
+      InterruptedException {
+    String tableName = conf.get(TABLE_NAME);
+    Admin hAdmin = null;
+    Connection connection = null;
+    String durability = conf.get(WAL_DURABILITY);
+    // Need to flush if the data is written to hbase and skip wal is enabled.
+    if (conf.get(BULK_OUTPUT_CONF_KEY) == null && durability != null
+        && Durability.SKIP_WAL.name().equalsIgnoreCase(durability)) {
+      LOG.info("Flushing all data that skipped the WAL.");
+      try {
+        connection = ConnectionFactory.createConnection(conf);
+        hAdmin = connection.getAdmin();
+        hAdmin.flush(TableName.valueOf(tableName));
+      } finally {
+        if (hAdmin != null) {
+          hAdmin.close();
+        }
+        if (connection != null) {
+          connection.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 2) {
+      usage("Wrong number of arguments: " + args.length);
+      return -1;
+    }
+    String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER);
+    if (inputVersionString != null) {
+      getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString);
+    }
+    Job job = createSubmittableJob(getConf(), args);
+    boolean isJobSuccessful = job.waitForCompletion(true);
+    if(isJobSuccessful){
+      // Flush all the regions of the table
+      flushRegionsIfNecessary(getConf());
+    }
+    long inputRecords = job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
+    long outputRecords = job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue();
+    if (outputRecords < inputRecords) {
+      System.err.println("Warning, not all records were imported (maybe filtered out).");
+      if (outputRecords == 0) {
+        System.err.println("If the data was exported from HBase 0.94 "+
+            "consider using -Dhbase.import.version=0.94.");
+      }
+    }
+
+    return (isJobSuccessful ? 0 : 1);
+  }
+
+  /**
+   * Main entry point.
+   * @param args The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int errCode = ToolRunner.run(HBaseConfiguration.create(), new Import(), args);
+    System.exit(errCode);
+  }
+
+}

[14/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
deleted file mode 100644
index ff458ff..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
+++ /dev/null
@@ -1,1027 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.token.TokenUtil;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.zookeeper.ZKConfig;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.StringUtils;
-
-import com.codahale.metrics.MetricRegistry;
-
-/**
- * Utility for {@link TableMapper} and {@link TableReducer}
- */
-@SuppressWarnings({ "rawtypes", "unchecked" })
-@InterfaceAudience.Public
-public class TableMapReduceUtil {
-  private static final Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table  The table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(String table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job)
-  throws IOException {
-    initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
-        job, true);
-  }
-
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table  The table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(TableName table,
-      Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass,
-      Job job) throws IOException {
-    initTableMapperJob(table.getNameAsString(),
-        scan,
-        mapper,
-        outputKeyClass,
-        outputValueClass,
-        job,
-        true);
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table Binary representation of the table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @throws IOException When setting up the details fails.
-   */
-   public static void initTableMapperJob(byte[] table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job)
-  throws IOException {
-      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
-              job, true);
-  }
-
-   /**
-    * Use this before submitting a TableMap job. It will appropriately set up
-    * the job.
-    *
-    * @param table  The table name to read from.
-    * @param scan  The scan instance with the columns, time range etc.
-    * @param mapper  The mapper class to use.
-    * @param outputKeyClass  The class of the output key.
-    * @param outputValueClass  The class of the output value.
-    * @param job  The current job to adjust.  Make sure the passed job is
-    * carrying all necessary HBase configuration.
-    * @param addDependencyJars upload HBase jars and jars for any of the configured
-    *           job classes via the distributed cache (tmpjars).
-    * @throws IOException When setting up the details fails.
-    */
-   public static void initTableMapperJob(String table, Scan scan,
-       Class<? extends TableMapper> mapper,
-       Class<?> outputKeyClass,
-       Class<?> outputValueClass, Job job,
-       boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
-   throws IOException {
-     initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
-         addDependencyJars, true, inputFormatClass);
-   }
-
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table  The table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @param initCredentials whether to initialize hbase auth credentials for the job
-   * @param inputFormatClass the input format
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(String table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars, boolean initCredentials,
-      Class<? extends InputFormat> inputFormatClass)
-  throws IOException {
-    job.setInputFormatClass(inputFormatClass);
-    if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
-    if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
-    job.setMapperClass(mapper);
-    if (Put.class.equals(outputValueClass)) {
-      job.setCombinerClass(PutCombiner.class);
-    }
-    Configuration conf = job.getConfiguration();
-    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
-    conf.set(TableInputFormat.INPUT_TABLE, table);
-    conf.set(TableInputFormat.SCAN, convertScanToString(scan));
-    conf.setStrings("io.serializations", conf.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
-        KeyValueSerialization.class.getName());
-    if (addDependencyJars) {
-      addDependencyJars(job);
-    }
-    if (initCredentials) {
-      initCredentials(job);
-    }
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table Binary representation of the table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @param inputFormatClass The class of the input format
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(byte[] table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
-  throws IOException {
-      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
-              outputValueClass, job, addDependencyJars, inputFormatClass);
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table Binary representation of the table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(byte[] table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars)
-  throws IOException {
-      initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
-              outputValueClass, job, addDependencyJars, TableInputFormat.class);
-  }
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table The table name to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(String table, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars)
-  throws IOException {
-      initTableMapperJob(table, scan, mapper, outputKeyClass,
-              outputValueClass, job, addDependencyJars, TableInputFormat.class);
-  }
-
-  /**
-   * Enable a basic on-heap cache for these jobs. Any BlockCache implementation based on
-   * direct memory will likely cause the map tasks to OOM when opening the region. This
-   * is done here instead of in TableSnapshotRegionRecordReader in case an advanced user
-   * wants to override this behavior in their job.
-   */
-  public static void resetCacheConfig(Configuration conf) {
-    conf.setFloat(
-      HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
-    conf.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0f);
-    conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
-  }
-
-  /**
-   * Sets up the job for reading from one or more table snapshots, with one or more scans
-   * per snapshot.
-   * It bypasses hbase servers and read directly from snapshot files.
-   *
-   * @param snapshotScans     map of snapshot name to scans on that snapshot.
-   * @param mapper            The mapper class to use.
-   * @param outputKeyClass    The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job               The current job to adjust.  Make sure the passed job is
-   *                          carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *                          job classes via the distributed cache (tmpjars).
-   */
-  public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
-      Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
-      Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
-    MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);
-
-    job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
-    if (outputValueClass != null) {
-      job.setMapOutputValueClass(outputValueClass);
-    }
-    if (outputKeyClass != null) {
-      job.setMapOutputKeyClass(outputKeyClass);
-    }
-    job.setMapperClass(mapper);
-    Configuration conf = job.getConfiguration();
-    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
-
-    if (addDependencyJars) {
-      addDependencyJars(job);
-      addDependencyJarsForClasses(job.getConfiguration(), MetricRegistry.class);
-    }
-
-    resetCacheConfig(job.getConfiguration());
-  }
-
-  /**
-   * Sets up the job for reading from a table snapshot. It bypasses hbase servers
-   * and read directly from snapshot files.
-   *
-   * @param snapshotName The name of the snapshot (of a table) to read from.
-   * @param scan  The scan instance with the columns, time range etc.
-   * @param mapper  The mapper class to use.
-   * @param outputKeyClass  The class of the output key.
-   * @param outputValueClass  The class of the output value.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   *
-   * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restore directory can be deleted.
-   * @throws IOException When setting up the details fails.
-   * @see TableSnapshotInputFormat
-   */
-  public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars, Path tmpRestoreDir)
-  throws IOException {
-    TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
-    initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
-        outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
-    resetCacheConfig(job.getConfiguration());
-  }
-
-  /**
-   * Use this before submitting a Multi TableMap job. It will appropriately set
-   * up the job.
-   *
-   * @param scans The list of {@link Scan} objects to read from.
-   * @param mapper The mapper class to use.
-   * @param outputKeyClass The class of the output key.
-   * @param outputValueClass The class of the output value.
-   * @param job The current job to adjust. Make sure the passed job is carrying
-   *          all necessary HBase configuration.
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(List<Scan> scans,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job) throws IOException {
-    initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
-        true);
-  }
-
-  /**
-   * Use this before submitting a Multi TableMap job. It will appropriately set
-   * up the job.
-   *
-   * @param scans The list of {@link Scan} objects to read from.
-   * @param mapper The mapper class to use.
-   * @param outputKeyClass The class of the output key.
-   * @param outputValueClass The class of the output value.
-   * @param job The current job to adjust. Make sure the passed job is carrying
-   *          all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the
-   *          configured job classes via the distributed cache (tmpjars).
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(List<Scan> scans,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars) throws IOException {
-    initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
-      addDependencyJars, true);
-  }
-
-  /**
-   * Use this before submitting a Multi TableMap job. It will appropriately set
-   * up the job.
-   *
-   * @param scans The list of {@link Scan} objects to read from.
-   * @param mapper The mapper class to use.
-   * @param outputKeyClass The class of the output key.
-   * @param outputValueClass The class of the output value.
-   * @param job The current job to adjust. Make sure the passed job is carrying
-   *          all necessary HBase configuration.
-   * @param addDependencyJars upload HBase jars and jars for any of the
-   *          configured job classes via the distributed cache (tmpjars).
-   * @param initCredentials whether to initialize hbase auth credentials for the job
-   * @throws IOException When setting up the details fails.
-   */
-  public static void initTableMapperJob(List<Scan> scans,
-      Class<? extends TableMapper> mapper,
-      Class<?> outputKeyClass,
-      Class<?> outputValueClass, Job job,
-      boolean addDependencyJars,
-      boolean initCredentials) throws IOException {
-    job.setInputFormatClass(MultiTableInputFormat.class);
-    if (outputValueClass != null) {
-      job.setMapOutputValueClass(outputValueClass);
-    }
-    if (outputKeyClass != null) {
-      job.setMapOutputKeyClass(outputKeyClass);
-    }
-    job.setMapperClass(mapper);
-    Configuration conf = job.getConfiguration();
-    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
-    List<String> scanStrings = new ArrayList<>();
-
-    for (Scan scan : scans) {
-      scanStrings.add(convertScanToString(scan));
-    }
-    job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
-      scanStrings.toArray(new String[scanStrings.size()]));
-
-    if (addDependencyJars) {
-      addDependencyJars(job);
-    }
-
-    if (initCredentials) {
-      initCredentials(job);
-    }
-  }
-
-  public static void initCredentials(Job job) throws IOException {
-    UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
-    if (userProvider.isHadoopSecurityEnabled()) {
-      // propagate delegation related props from launcher job to MR job
-      if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
-        job.getConfiguration().set("mapreduce.job.credentials.binary",
-                                   System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
-      }
-    }
-
-    if (userProvider.isHBaseSecurityEnabled()) {
-      try {
-        // init credentials for remote cluster
-        String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
-        User user = userProvider.getCurrent();
-        if (quorumAddress != null) {
-          Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
-              quorumAddress, TableOutputFormat.OUTPUT_CONF_PREFIX);
-          Connection peerConn = ConnectionFactory.createConnection(peerConf);
-          try {
-            TokenUtil.addTokenForJob(peerConn, user, job);
-          } finally {
-            peerConn.close();
-          }
-        }
-
-        Connection conn = ConnectionFactory.createConnection(job.getConfiguration());
-        try {
-          TokenUtil.addTokenForJob(conn, user, job);
-        } finally {
-          conn.close();
-        }
-      } catch (InterruptedException ie) {
-        LOG.info("Interrupted obtaining user authentication token");
-        Thread.currentThread().interrupt();
-      }
-    }
-  }
-
-  /**
-   * Obtain an authentication token, for the specified cluster, on behalf of the current user
-   * and add it to the credentials for the given map reduce job.
-   *
-   * The quorumAddress is the key to the ZK ensemble, which contains:
-   * hbase.zookeeper.quorum, hbase.zookeeper.client.port and
-   * zookeeper.znode.parent
-   *
-   * @param job The job that requires the permission.
-   * @param quorumAddress string that contains the 3 required configuratins
-   * @throws IOException When the authentication token cannot be obtained.
-   * @deprecated Since 1.2.0, use {@link #initCredentialsForCluster(Job, Configuration)} instead.
-   */
-  @Deprecated
-  public static void initCredentialsForCluster(Job job, String quorumAddress)
-      throws IOException {
-    Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
-        quorumAddress);
-    initCredentialsForCluster(job, peerConf);
-  }
-
-  /**
-   * Obtain an authentication token, for the specified cluster, on behalf of the current user
-   * and add it to the credentials for the given map reduce job.
-   *
-   * @param job The job that requires the permission.
-   * @param conf The configuration to use in connecting to the peer cluster
-   * @throws IOException When the authentication token cannot be obtained.
-   */
-  public static void initCredentialsForCluster(Job job, Configuration conf)
-      throws IOException {
-    UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
-    if (userProvider.isHBaseSecurityEnabled()) {
-      try {
-        Connection peerConn = ConnectionFactory.createConnection(conf);
-        try {
-          TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
-        } finally {
-          peerConn.close();
-        }
-      } catch (InterruptedException e) {
-        LOG.info("Interrupted obtaining user authentication token");
-        Thread.interrupted();
-      }
-    }
-  }
-
-  /**
-   * Writes the given scan into a Base64 encoded string.
-   *
-   * @param scan  The scan to write out.
-   * @return The scan saved in a Base64 encoded string.
-   * @throws IOException When writing the scan fails.
-   */
-  public static String convertScanToString(Scan scan) throws IOException {
-    ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
-    return Base64.encodeBytes(proto.toByteArray());
-  }
-
-  /**
-   * Converts the given Base64 string back into a Scan instance.
-   *
-   * @param base64  The scan details.
-   * @return The newly created Scan instance.
-   * @throws IOException When reading the scan instance fails.
-   */
-  public static Scan convertStringToScan(String base64) throws IOException {
-    byte [] decoded = Base64.decode(base64);
-    return ProtobufUtil.toScan(ClientProtos.Scan.parseFrom(decoded));
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job to adjust.
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReducerJob(String table,
-    Class<? extends TableReducer> reducer, Job job)
-  throws IOException {
-    initTableReducerJob(table, reducer, job, null);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job to adjust.
-   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
-   * default partitioner.
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReducerJob(String table,
-    Class<? extends TableReducer> reducer, Job job,
-    Class partitioner) throws IOException {
-    initTableReducerJob(table, reducer, job, partitioner, null, null, null);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
-   * default partitioner.
-   * @param quorumAddress Distant cluster to write to; default is null for
-   * output to the cluster that is designated in <code>hbase-site.xml</code>.
-   * Set this String to the zookeeper ensemble of an alternate remote cluster
-   * when you would have the reduce write a cluster that is other than the
-   * default; e.g. copying tables between clusters, the source would be
-   * designated by <code>hbase-site.xml</code> and this param would have the
-   * ensemble address of the remote cluster.  The format to pass is particular.
-   * Pass <code> &lt;hbase.zookeeper.quorum&gt;:&lt;
-   *             hbase.zookeeper.client.port&gt;:&lt;zookeeper.znode.parent&gt;
-   * </code> such as <code>server,server2,server3:2181:/hbase</code>.
-   * @param serverClass redefined hbase.regionserver.class
-   * @param serverImpl redefined hbase.regionserver.impl
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReducerJob(String table,
-    Class<? extends TableReducer> reducer, Job job,
-    Class partitioner, String quorumAddress, String serverClass,
-    String serverImpl) throws IOException {
-    initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
-        serverClass, serverImpl, true);
-  }
-
-  /**
-   * Use this before submitting a TableReduce job. It will
-   * appropriately set up the JobConf.
-   *
-   * @param table  The output table.
-   * @param reducer  The reducer class to use.
-   * @param job  The current job to adjust.  Make sure the passed job is
-   * carrying all necessary HBase configuration.
-   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
-   * default partitioner.
-   * @param quorumAddress Distant cluster to write to; default is null for
-   * output to the cluster that is designated in <code>hbase-site.xml</code>.
-   * Set this String to the zookeeper ensemble of an alternate remote cluster
-   * when you would have the reduce write a cluster that is other than the
-   * default; e.g. copying tables between clusters, the source would be
-   * designated by <code>hbase-site.xml</code> and this param would have the
-   * ensemble address of the remote cluster.  The format to pass is particular.
-   * Pass <code> &lt;hbase.zookeeper.quorum&gt;:&lt;
-   *             hbase.zookeeper.client.port&gt;:&lt;zookeeper.znode.parent&gt;
-   * </code> such as <code>server,server2,server3:2181:/hbase</code>.
-   * @param serverClass redefined hbase.regionserver.class
-   * @param serverImpl redefined hbase.regionserver.impl
-   * @param addDependencyJars upload HBase jars and jars for any of the configured
-   *           job classes via the distributed cache (tmpjars).
-   * @throws IOException When determining the region count fails.
-   */
-  public static void initTableReducerJob(String table,
-    Class<? extends TableReducer> reducer, Job job,
-    Class partitioner, String quorumAddress, String serverClass,
-    String serverImpl, boolean addDependencyJars) throws IOException {
-
-    Configuration conf = job.getConfiguration();
-    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
-    job.setOutputFormatClass(TableOutputFormat.class);
-    if (reducer != null) job.setReducerClass(reducer);
-    conf.set(TableOutputFormat.OUTPUT_TABLE, table);
-    conf.setStrings("io.serializations", conf.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName());
-    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
-    if (quorumAddress != null) {
-      // Calling this will validate the format
-      ZKConfig.validateClusterKey(quorumAddress);
-      conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
-    }
-    if (serverClass != null && serverImpl != null) {
-      conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
-      conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
-    }
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(Writable.class);
-    if (partitioner == HRegionPartitioner.class) {
-      job.setPartitionerClass(HRegionPartitioner.class);
-      int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
-      if (job.getNumReduceTasks() > regions) {
-        job.setNumReduceTasks(regions);
-      }
-    } else if (partitioner != null) {
-      job.setPartitionerClass(partitioner);
-    }
-
-    if (addDependencyJars) {
-      addDependencyJars(job);
-    }
-
-    initCredentials(job);
-  }
-
-  /**
-   * Ensures that the given number of reduce tasks for the given job
-   * configuration does not exceed the number of regions for the given table.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  public static void limitNumReduceTasks(String table, Job job)
-  throws IOException {
-    int regions =
-      MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table));
-    if (job.getNumReduceTasks() > regions)
-      job.setNumReduceTasks(regions);
-  }
-
-  /**
-   * Sets the number of reduce tasks for the given job configuration to the
-   * number of regions the given table has.
-   *
-   * @param table  The table to get the region count for.
-   * @param job  The current job to adjust.
-   * @throws IOException When retrieving the table details fails.
-   */
-  public static void setNumReduceTasks(String table, Job job)
-  throws IOException {
-    job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),
-       TableName.valueOf(table)));
-  }
-
-  /**
-   * Sets the number of rows to return and cache with each scanner iteration.
-   * Higher caching values will enable faster mapreduce jobs at the expense of
-   * requiring more heap to contain the cached rows.
-   *
-   * @param job The current job to adjust.
-   * @param batchSize The number of rows to return in batch with each scanner
-   * iteration.
-   */
-  public static void setScannerCaching(Job job, int batchSize) {
-    job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
-  }
-
-  /**
-   * Add HBase and its dependencies (only) to the job configuration.
-   * <p>
-   * This is intended as a low-level API, facilitating code reuse between this
-   * class and its mapred counterpart. It also of use to external tools that
-   * need to build a MapReduce job that interacts with HBase but want
-   * fine-grained control over the jars shipped to the cluster.
-   * </p>
-   * @param conf The Configuration object to extend with dependencies.
-   * @see org.apache.hadoop.hbase.mapred.TableMapReduceUtil
-   * @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
-   */
-  public static void addHBaseDependencyJars(Configuration conf) throws IOException {
-
-    // PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
-    // dependencies, MR jobs that write encoded hfiles will fail.
-    // We used reflection here so to prevent a circular module dependency.
-    // TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
-    Class prefixTreeCodecClass = null;
-    try {
-      prefixTreeCodecClass =
-          Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
-    } catch (ClassNotFoundException e) {
-      // this will show up in unit tests but should not show in real deployments
-      LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
-          "  Continuing without it.");
-    }
-
-    addDependencyJarsForClasses(conf,
-      // explicitly pull a class from each module
-      org.apache.hadoop.hbase.HConstants.class,                      // hbase-common
-      org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // hbase-protocol
-      org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.class, // hbase-protocol-shaded
-      org.apache.hadoop.hbase.client.Put.class,                      // hbase-client
-      org.apache.hadoop.hbase.CompatibilityFactory.class,            // hbase-hadoop-compat
-      org.apache.hadoop.hbase.mapreduce.JobUtil.class,               // hbase-hadoop2-compat
-      org.apache.hadoop.hbase.mapreduce.TableMapper.class,           // hbase-server
-      org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class,  // hbase-metrics
-      org.apache.hadoop.hbase.metrics.Snapshot.class,                // hbase-metrics-api
-      prefixTreeCodecClass, //  hbase-prefix-tree (if null will be skipped)
-      // pull necessary dependencies
-      org.apache.zookeeper.ZooKeeper.class,
-      org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
-      com.google.protobuf.Message.class,
-      org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists.class,
-      org.apache.htrace.Trace.class,
-      com.codahale.metrics.MetricRegistry.class);
-  }
-
-  /**
-   * Returns a classpath string built from the content of the "tmpjars" value in {@code conf}.
-   * Also exposed to shell scripts via `bin/hbase mapredcp`.
-   */
-  public static String buildDependencyClasspath(Configuration conf) {
-    if (conf == null) {
-      throw new IllegalArgumentException("Must provide a configuration object.");
-    }
-    Set<String> paths = new HashSet<>(conf.getStringCollection("tmpjars"));
-    if (paths.isEmpty()) {
-      throw new IllegalArgumentException("Configuration contains no tmpjars.");
-    }
-    StringBuilder sb = new StringBuilder();
-    for (String s : paths) {
-      // entries can take the form 'file:/path/to/file.jar'.
-      int idx = s.indexOf(":");
-      if (idx != -1) s = s.substring(idx + 1);
-      if (sb.length() > 0) sb.append(File.pathSeparator);
-      sb.append(s);
-    }
-    return sb.toString();
-  }
-
-  /**
-   * Add the HBase dependency jars as well as jars for any of the configured
-   * job classes to the job configuration, so that JobClient will ship them
-   * to the cluster and add them to the DistributedCache.
-   */
-  public static void addDependencyJars(Job job) throws IOException {
-    addHBaseDependencyJars(job.getConfiguration());
-    try {
-      addDependencyJarsForClasses(job.getConfiguration(),
-          // when making changes here, consider also mapred.TableMapReduceUtil
-          // pull job classes
-          job.getMapOutputKeyClass(),
-          job.getMapOutputValueClass(),
-          job.getInputFormatClass(),
-          job.getOutputKeyClass(),
-          job.getOutputValueClass(),
-          job.getOutputFormatClass(),
-          job.getPartitionerClass(),
-          job.getCombinerClass());
-    } catch (ClassNotFoundException e) {
-      throw new IOException(e);
-    }
-  }
-
-  /**
-   * Add the jars containing the given classes to the job's configuration
-   * such that JobClient will ship them to the cluster and add them to
-   * the DistributedCache.
-   * @deprecated rely on {@link #addDependencyJars(Job)} instead.
-   */
-  @Deprecated
-  public static void addDependencyJars(Configuration conf,
-      Class<?>... classes) throws IOException {
-    LOG.warn("The addDependencyJars(Configuration, Class<?>...) method has been deprecated since it"
-             + " is easy to use incorrectly. Most users should rely on addDependencyJars(Job) " +
-             "instead. See HBASE-8386 for more details.");
-    addDependencyJarsForClasses(conf, classes);
-  }
-
-  /**
-   * Add the jars containing the given classes to the job's configuration
-   * such that JobClient will ship them to the cluster and add them to
-   * the DistributedCache.
-   *
-   * N.B. that this method at most adds one jar per class given. If there is more than one
-   * jar available containing a class with the same name as a given class, we don't define
-   * which of those jars might be chosen.
-   *
-   * @param conf The Hadoop Configuration to modify
-   * @param classes will add just those dependencies needed to find the given classes
-   * @throws IOException if an underlying library call fails.
-   */
-  @InterfaceAudience.Private
-  public static void addDependencyJarsForClasses(Configuration conf,
-      Class<?>... classes) throws IOException {
-
-    FileSystem localFs = FileSystem.getLocal(conf);
-    Set<String> jars = new HashSet<>();
-    // Add jars that are already in the tmpjars variable
-    jars.addAll(conf.getStringCollection("tmpjars"));
-
-    // add jars as we find them to a map of contents jar name so that we can avoid
-    // creating new jars for classes that have already been packaged.
-    Map<String, String> packagedClasses = new HashMap<>();
-
-    // Add jars containing the specified classes
-    for (Class<?> clazz : classes) {
-      if (clazz == null) continue;
-
-      Path path = findOrCreateJar(clazz, localFs, packagedClasses);
-      if (path == null) {
-        LOG.warn("Could not find jar for class " + clazz +
-                 " in order to ship it to the cluster.");
-        continue;
-      }
-      if (!localFs.exists(path)) {
-        LOG.warn("Could not validate jar file " + path + " for class "
-                 + clazz);
-        continue;
-      }
-      jars.add(path.toString());
-    }
-    if (jars.isEmpty()) return;
-
-    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
-  }
-
-  /**
-   * Finds the Jar for a class or creates it if it doesn't exist. If the class is in
-   * a directory in the classpath, it creates a Jar on the fly with the
-   * contents of the directory and returns the path to that Jar. If a Jar is
-   * created, it is created in the system temporary directory. Otherwise,
-   * returns an existing jar that contains a class of the same name. Maintains
-   * a mapping from jar contents to the tmp jar created.
-   * @param my_class the class to find.
-   * @param fs the FileSystem with which to qualify the returned path.
-   * @param packagedClasses a map of class name to path.
-   * @return a jar file that contains the class.
-   * @throws IOException
-   */
-  private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
-      Map<String, String> packagedClasses)
-  throws IOException {
-    // attempt to locate an existing jar for the class.
-    String jar = findContainingJar(my_class, packagedClasses);
-    if (null == jar || jar.isEmpty()) {
-      jar = getJar(my_class);
-      updateMap(jar, packagedClasses);
-    }
-
-    if (null == jar || jar.isEmpty()) {
-      return null;
-    }
-
-    LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
-    return new Path(jar).makeQualified(fs);
-  }
-
-  /**
-   * Add entries to <code>packagedClasses</code> corresponding to class files
-   * contained in <code>jar</code>.
-   * @param jar The jar who's content to list.
-   * @param packagedClasses map[class -> jar]
-   */
-  private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
-    if (null == jar || jar.isEmpty()) {
-      return;
-    }
-    ZipFile zip = null;
-    try {
-      zip = new ZipFile(jar);
-      for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
-        ZipEntry entry = iter.nextElement();
-        if (entry.getName().endsWith("class")) {
-          packagedClasses.put(entry.getName(), jar);
-        }
-      }
-    } finally {
-      if (null != zip) zip.close();
-    }
-  }
-
-  /**
-   * Find a jar that contains a class of the same name, if any. It will return
-   * a jar file, even if that is not the first thing on the class path that
-   * has a class with the same name. Looks first on the classpath and then in
-   * the <code>packagedClasses</code> map.
-   * @param my_class the class to find.
-   * @return a jar file that contains the class, or null.
-   * @throws IOException
-   */
-  private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
-      throws IOException {
-    ClassLoader loader = my_class.getClassLoader();
-
-    String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
-
-    if (loader != null) {
-      // first search the classpath
-      for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
-        URL url = itr.nextElement();
-        if ("jar".equals(url.getProtocol())) {
-          String toReturn = url.getPath();
-          if (toReturn.startsWith("file:")) {
-            toReturn = toReturn.substring("file:".length());
-          }
-          // URLDecoder is a misnamed class, since it actually decodes
-          // x-www-form-urlencoded MIME type rather than actual
-          // URL encoding (which the file path has). Therefore it would
-          // decode +s to ' 's which is incorrect (spaces are actually
-          // either unencoded or encoded as "%20"). Replace +s first, so
-          // that they are kept sacred during the decoding process.
-          toReturn = toReturn.replaceAll("\\+", "%2B");
-          toReturn = URLDecoder.decode(toReturn, "UTF-8");
-          return toReturn.replaceAll("!.*$", "");
-        }
-      }
-    }
-
-    // now look in any jars we've packaged using JarFinder. Returns null when
-    // no jar is found.
-    return packagedClasses.get(class_file);
-  }
-
-  /**
-   * Invoke 'getJar' on a custom JarFinder implementation. Useful for some job
-   * configuration contexts (HBASE-8140) and also for testing on MRv2.
-   * check if we have HADOOP-9426.
-   * @param my_class the class to find.
-   * @return a jar file that contains the class, or null.
-   */
-  private static String getJar(Class<?> my_class) {
-    String ret = null;
-    try {
-      ret = JarFinder.getJar(my_class);
-    } catch (Exception e) {
-      // toss all other exceptions, related to reflection failure
-      throw new RuntimeException("getJar invocation failed.", e);
-    }
-
-    return ret;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
deleted file mode 100644
index 9a7dcb7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-
-/**
- * Extends the base <code>Mapper</code> class to add the required input key
- * and value classes.
- *
- * @param <KEYOUT>  The type of the key.
- * @param <VALUEOUT>  The type of the value.
- * @see org.apache.hadoop.mapreduce.Mapper
- */
-@InterfaceAudience.Public
-public abstract class TableMapper<KEYOUT, VALUEOUT>
-extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
deleted file mode 100644
index 749fd85..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Small committer class that does not do anything.
- */
-@InterfaceAudience.Public
-public class TableOutputCommitter extends OutputCommitter {
-
-  @Override
-  public void abortTask(TaskAttemptContext arg0) throws IOException {
-  }
-
-  @Override
-  public void cleanupJob(JobContext arg0) throws IOException {
-  }
-
-  @Override
-  public void commitTask(TaskAttemptContext arg0) throws IOException {
-  }
-
-  @Override
-  public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
-    return false;
-  }
-
-  @Override
-  public void setupJob(JobContext arg0) throws IOException {
-  }
-
-  @Override
-  public void setupTask(TaskAttemptContext arg0) throws IOException {
-  }
-
-  public boolean isRecoverySupported() {
-    return true;
-  }
-
-  public void recoverTask(TaskAttemptContext taskContext)
-  throws IOException
-  {
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
deleted file mode 100644
index 5986df8..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
- * while the output value <u>must</u> be either a {@link Put} or a
- * {@link Delete} instance.
- */
-@InterfaceAudience.Public
-public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
-implements Configurable {
-
-  private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
-
-  /** Job parameter that specifies the output table. */
-  public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
-
-  /**
-   * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
-   * For keys matching this prefix, the prefix is stripped, and the value is set in the
-   * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
-   * would be set in the configuration as "key1 = value1".  Use this to set properties
-   * which should only be applied to the {@code TableOutputFormat} configuration and not the
-   * input configuration.
-   */
-  public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
-
-  /**
-   * Optional job parameter to specify a peer cluster.
-   * Used specifying remote cluster when copying between hbase clusters (the
-   * source is picked up from <code>hbase-site.xml</code>).
-   * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
-   */
-  public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
-
-  /** Optional job parameter to specify peer cluster's ZK client port */
-  public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
-
-  /** Optional specification of the rs class name of the peer cluster */
-  public static final String
-      REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
-  /** Optional specification of the rs impl name of the peer cluster */
-  public static final String
-      REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
-
-  /** The configuration. */
-  private Configuration conf = null;
-
-  /**
-   * Writes the reducer output to an HBase table.
-   */
-  protected class TableRecordWriter
-  extends RecordWriter<KEY, Mutation> {
-
-    private Connection connection;
-    private BufferedMutator mutator;
-
-    /**
-     * @throws IOException 
-     * 
-     */
-    public TableRecordWriter() throws IOException {
-      String tableName = conf.get(OUTPUT_TABLE);
-      this.connection = ConnectionFactory.createConnection(conf);
-      this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
-      LOG.info("Created table instance for "  + tableName);
-    }
-    /**
-     * Closes the writer, in this case flush table commits.
-     *
-     * @param context  The context.
-     * @throws IOException When closing the writer fails.
-     * @see RecordWriter#close(TaskAttemptContext)
-     */
-    @Override
-    public void close(TaskAttemptContext context) throws IOException {
-      try {
-        if (mutator != null) {
-          mutator.close();
-        }
-      } finally {
-        if (connection != null) {
-          connection.close();
-        }
-      }
-    }
-
-    /**
-     * Writes a key/value pair into the table.
-     *
-     * @param key  The key.
-     * @param value  The value.
-     * @throws IOException When writing fails.
-     * @see RecordWriter#write(Object, Object)
-     */
-    @Override
-    public void write(KEY key, Mutation value)
-    throws IOException {
-      if (!(value instanceof Put) && !(value instanceof Delete)) {
-        throw new IOException("Pass a Delete or a Put");
-      }
-      mutator.mutate(value);
-    }
-  }
-
-  /**
-   * Creates a new record writer.
-   * 
-   * Be aware that the baseline javadoc gives the impression that there is a single
-   * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
-   * RecordWriter per call of this method. You must close the returned RecordWriter when done.
-   * Failure to do so will drop writes.
-   *
-   * @param context  The current task context.
-   * @return The newly created writer instance.
-   * @throws IOException When creating the writer fails.
-   * @throws InterruptedException When the jobs is cancelled.
-   */
-  @Override
-  public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
-  throws IOException, InterruptedException {
-    return new TableRecordWriter();
-  }
-
-  /**
-   * Checks if the output table exists and is enabled.
-   *
-   * @param context  The current context.
-   * @throws IOException When the check fails.
-   * @throws InterruptedException When the job is aborted.
-   * @see OutputFormat#checkOutputSpecs(JobContext)
-   */
-  @Override
-  public void checkOutputSpecs(JobContext context) throws IOException,
-      InterruptedException {
-
-    try (Admin admin = ConnectionFactory.createConnection(getConf()).getAdmin()) {
-      TableName tableName = TableName.valueOf(this.conf.get(OUTPUT_TABLE));
-      if (!admin.tableExists(tableName)) {
-        throw new TableNotFoundException("Can't write, table does not exist:" +
-            tableName.getNameAsString());
-      }
-
-      if (!admin.isTableEnabled(tableName)) {
-        throw new TableNotEnabledException("Can't write, table is not enabled: " +
-            tableName.getNameAsString());
-      }
-    }
-  }
-
-  /**
-   * Returns the output committer.
-   *
-   * @param context  The current context.
-   * @return The committer.
-   * @throws IOException When creating the committer fails.
-   * @throws InterruptedException When the job is aborted.
-   * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
-   */
-  @Override
-  public OutputCommitter getOutputCommitter(TaskAttemptContext context)
-  throws IOException, InterruptedException {
-    return new TableOutputCommitter();
-  }
-
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  @Override
-  public void setConf(Configuration otherConf) {
-    String tableName = otherConf.get(OUTPUT_TABLE);
-    if(tableName == null || tableName.length() <= 0) {
-      throw new IllegalArgumentException("Must specify table name");
-    }
-
-    String address = otherConf.get(QUORUM_ADDRESS);
-    int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
-    String serverClass = otherConf.get(REGION_SERVER_CLASS);
-    String serverImpl = otherConf.get(REGION_SERVER_IMPL);
-
-    try {
-      this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
-
-      if (serverClass != null) {
-        this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
-      }
-      if (zkClientPort != 0) {
-        this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
-      }
-    } catch(IOException e) {
-      LOG.error(e);
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
deleted file mode 100644
index f66520b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
- * pairs.
- */
-@InterfaceAudience.Public
-public class TableRecordReader
-extends RecordReader<ImmutableBytesWritable, Result> {
-
-  private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
-
-  /**
-   * Restart from survivable exceptions by creating a new scanner.
-   *
-   * @param firstRow  The first row to start at.
-   * @throws IOException When restarting fails.
-   */
-  public void restart(byte[] firstRow) throws IOException {
-    this.recordReaderImpl.restart(firstRow);
-  }
-
-  /**
-   * @param table the {@link Table} to scan.
-   */
-  public void setTable(Table table) {
-    this.recordReaderImpl.setHTable(table);
-  }
-
-  /**
-   * Sets the scan defining the actual details like columns etc.
-   *
-   * @param scan  The scan to set.
-   */
-  public void setScan(Scan scan) {
-    this.recordReaderImpl.setScan(scan);
-  }
-
-  /**
-   * Closes the split.
-   *
-   * @see org.apache.hadoop.mapreduce.RecordReader#close()
-   */
-  @Override
-  public void close() {
-    this.recordReaderImpl.close();
-  }
-
-  /**
-   * Returns the current key.
-   *
-   * @return The current key.
-   * @throws IOException
-   * @throws InterruptedException When the job is aborted.
-   * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey()
-   */
-  @Override
-  public ImmutableBytesWritable getCurrentKey() throws IOException,
-      InterruptedException {
-    return this.recordReaderImpl.getCurrentKey();
-  }
-
-  /**
-   * Returns the current value.
-   *
-   * @return The current value.
-   * @throws IOException When the value is faulty.
-   * @throws InterruptedException When the job is aborted.
-   * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue()
-   */
-  @Override
-  public Result getCurrentValue() throws IOException, InterruptedException {
-    return this.recordReaderImpl.getCurrentValue();
-  }
-
-  /**
-   * Initializes the reader.
-   *
-   * @param inputsplit  The split to work with.
-   * @param context  The current task context.
-   * @throws IOException When setting up the reader fails.
-   * @throws InterruptedException When the job is aborted.
-   * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
-   *   org.apache.hadoop.mapreduce.InputSplit,
-   *   org.apache.hadoop.mapreduce.TaskAttemptContext)
-   */
-  @Override
-  public void initialize(InputSplit inputsplit,
-      TaskAttemptContext context) throws IOException,
-      InterruptedException {
-    this.recordReaderImpl.initialize(inputsplit, context);
-  }
-
-  /**
-   * Positions the record reader to the next record.
-   *
-   * @return <code>true</code> if there was another record.
-   * @throws IOException When reading the record failed.
-   * @throws InterruptedException When the job was aborted.
-   * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue()
-   */
-  @Override
-  public boolean nextKeyValue() throws IOException, InterruptedException {
-    return this.recordReaderImpl.nextKeyValue();
-  }
-
-  /**
-   * The current progress of the record reader through its data.
-   *
-   * @return A number between 0.0 and 1.0, the fraction of the data read.
-   * @see org.apache.hadoop.mapreduce.RecordReader#getProgress()
-   */
-  @Override
-  public float getProgress() {
-    return this.recordReaderImpl.getProgress();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
deleted file mode 100644
index 9a1c98e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
+++ /dev/null
@@ -1,315 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.ScannerCallable;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.util.StringUtils;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
- * pairs.
- */
-@InterfaceAudience.Public
-public class TableRecordReaderImpl {
-  public static final String LOG_PER_ROW_COUNT
-    = "hbase.mapreduce.log.scanner.rowcount";
-
-  private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
-
-  // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase
-  @VisibleForTesting
-  static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
-  private ResultScanner scanner = null;
-  private Scan scan = null;
-  private Scan currentScan = null;
-  private Table htable = null;
-  private byte[] lastSuccessfulRow = null;
-  private ImmutableBytesWritable key = null;
-  private Result value = null;
-  private TaskAttemptContext context = null;
-  private Method getCounter = null;
-  private long numRestarts = 0;
-  private long numStale = 0;
-  private long timestamp;
-  private int rowcount;
-  private boolean logScannerActivity = false;
-  private int logPerRowCount = 100;
-
-  /**
-   * Restart from survivable exceptions by creating a new scanner.
-   *
-   * @param firstRow  The first row to start at.
-   * @throws IOException When restarting fails.
-   */
-  public void restart(byte[] firstRow) throws IOException {
-    currentScan = new Scan(scan);
-    currentScan.withStartRow(firstRow);
-    currentScan.setScanMetricsEnabled(true);
-    if (this.scanner != null) {
-      if (logScannerActivity) {
-        LOG.info("Closing the previously opened scanner object.");
-      }
-      this.scanner.close();
-    }
-    this.scanner = this.htable.getScanner(currentScan);
-    if (logScannerActivity) {
-      LOG.info("Current scan=" + currentScan.toString());
-      timestamp = System.currentTimeMillis();
-      rowcount = 0;
-    }
-  }
-
-  /**
-   * In new mapreduce APIs, TaskAttemptContext has two getCounter methods
-   * Check if getCounter(String, String) method is available.
-   * @return The getCounter method or null if not available.
-   * @throws IOException
-   */
-  protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
-  throws IOException {
-    Method m = null;
-    try {
-      m = context.getClass().getMethod("getCounter",
-        new Class [] {String.class, String.class});
-    } catch (SecurityException e) {
-      throw new IOException("Failed test for getCounter", e);
-    } catch (NoSuchMethodException e) {
-      // Ignore
-    }
-    return m;
-  }
-
-  /**
-   * Sets the HBase table.
-   *
-   * @param htable  The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
-   */
-  public void setHTable(Table htable) {
-    Configuration conf = htable.getConfiguration();
-    logScannerActivity = conf.getBoolean(
-      ScannerCallable.LOG_SCANNER_ACTIVITY, false);
-    logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
-    this.htable = htable;
-  }
-
-  /**
-   * Sets the scan defining the actual details like columns etc.
-   *
-   * @param scan  The scan to set.
-   */
-  public void setScan(Scan scan) {
-    this.scan = scan;
-  }
-
-  /**
-   * Build the scanner. Not done in constructor to allow for extension.
-   *
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  public void initialize(InputSplit inputsplit,
-      TaskAttemptContext context) throws IOException,
-      InterruptedException {
-    if (context != null) {
-      this.context = context;
-      getCounter = retrieveGetCounterWithStringsParams(context);
-    }
-    restart(scan.getStartRow());
-  }
-
-  /**
-   * Closes the split.
-   *
-   *
-   */
-  public void close() {
-    if (this.scanner != null) {
-      this.scanner.close();
-    }
-    try {
-      this.htable.close();
-    } catch (IOException ioe) {
-      LOG.warn("Error closing table", ioe);
-    }
-  }
-
-  /**
-   * Returns the current key.
-   *
-   * @return The current key.
-   * @throws IOException
-   * @throws InterruptedException When the job is aborted.
-   */
-  public ImmutableBytesWritable getCurrentKey() throws IOException,
-      InterruptedException {
-    return key;
-  }
-
-  /**
-   * Returns the current value.
-   *
-   * @return The current value.
-   * @throws IOException When the value is faulty.
-   * @throws InterruptedException When the job is aborted.
-   */
-  public Result getCurrentValue() throws IOException, InterruptedException {
-    return value;
-  }
-
-
-  /**
-   * Positions the record reader to the next record.
-   *
-   * @return <code>true</code> if there was another record.
-   * @throws IOException When reading the record failed.
-   * @throws InterruptedException When the job was aborted.
-   */
-  public boolean nextKeyValue() throws IOException, InterruptedException {
-    if (key == null) key = new ImmutableBytesWritable();
-    if (value == null) value = new Result();
-    try {
-      try {
-        value = this.scanner.next();
-        if (value != null && value.isStale()) numStale++;
-        if (logScannerActivity) {
-          rowcount ++;
-          if (rowcount >= logPerRowCount) {
-            long now = System.currentTimeMillis();
-            LOG.info("Mapper took " + (now-timestamp)
-              + "ms to process " + rowcount + " rows");
-            timestamp = now;
-            rowcount = 0;
-          }
-        }
-      } catch (IOException e) {
-        // do not retry if the exception tells us not to do so
-        if (e instanceof DoNotRetryIOException) {
-          throw e;
-        }
-        // try to handle all other IOExceptions by restarting
-        // the scanner, if the second call fails, it will be rethrown
-        LOG.info("recovered from " + StringUtils.stringifyException(e));
-        if (lastSuccessfulRow == null) {
-          LOG.warn("We are restarting the first next() invocation," +
-              " if your mapper has restarted a few other times like this" +
-              " then you should consider killing this job and investigate" +
-              " why it's taking so long.");
-        }
-        if (lastSuccessfulRow == null) {
-          restart(scan.getStartRow());
-        } else {
-          restart(lastSuccessfulRow);
-          scanner.next();    // skip presumed already mapped row
-        }
-        value = scanner.next();
-        if (value != null && value.isStale()) numStale++;
-        numRestarts++;
-      }
-      if (value != null && value.size() > 0) {
-        key.set(value.getRow());
-        lastSuccessfulRow = key.get();
-        return true;
-      }
-
-      updateCounters();
-      return false;
-    } catch (IOException ioe) {
-      if (logScannerActivity) {
-        long now = System.currentTimeMillis();
-        LOG.info("Mapper took " + (now-timestamp)
-          + "ms to process " + rowcount + " rows");
-        LOG.info(ioe);
-        String lastRow = lastSuccessfulRow == null ?
-          "null" : Bytes.toStringBinary(lastSuccessfulRow);
-        LOG.info("lastSuccessfulRow=" + lastRow);
-      }
-      throw ioe;
-    }
-  }
-
-  /**
-   * If hbase runs on new version of mapreduce, RecordReader has access to
-   * counters thus can update counters based on scanMetrics.
-   * If hbase runs on old version of mapreduce, it won't be able to get
-   * access to counters and TableRecorderReader can't update counter values.
-   * @throws IOException
-   */
-  private void updateCounters() throws IOException {
-    ScanMetrics scanMetrics = scanner.getScanMetrics();
-    if (scanMetrics == null) {
-      return;
-    }
-
-    updateCounters(scanMetrics, numRestarts, getCounter, context, numStale);
-  }
-
-  protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
-      Method getCounter, TaskAttemptContext context, long numStale) {
-    // we can get access to counters only if hbase uses new mapreduce APIs
-    if (getCounter == null) {
-      return;
-    }
-
-    try {
-      for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
-        Counter ct = (Counter)getCounter.invoke(context,
-            HBASE_COUNTER_GROUP_NAME, entry.getKey());
-
-        ct.increment(entry.getValue());
-      }
-      ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
-          "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
-      ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
-          "NUM_SCAN_RESULTS_STALE")).increment(numStale);
-    } catch (Exception e) {
-      LOG.debug("can't update counter." + StringUtils.stringifyException(e));
-    }
-  }
-
-  /**
-   * The current progress of the record reader through its data.
-   *
-   * @return A number between 0.0 and 1.0, the fraction of the data read.
-   */
-  public float getProgress() {
-    // Depends on the total number of tuples
-    return 0;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
deleted file mode 100644
index f0bfc74..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Extends the basic <code>Reducer</code> class to add the required key and
- * value input/output classes. While the input key and value as well as the
- * output key can be anything handed in from the previous map phase the output
- * value <u>must</u> be either a {@link org.apache.hadoop.hbase.client.Put Put}
- * or a {@link org.apache.hadoop.hbase.client.Delete Delete} instance when
- * using the {@link TableOutputFormat} class.
- * <p>
- * This class is extended by {@link IdentityTableReducer} but can also be
- * subclassed to implement similar features or any custom code needed. It has
- * the advantage to enforce the output value to a specific basic type.
- *
- * @param <KEYIN>  The type of the input key.
- * @param <VALUEIN>  The type of the input value.
- * @param <KEYOUT>  The type of the output key.
- * @see org.apache.hadoop.mapreduce.Reducer
- */
-@InterfaceAudience.Public
-public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT>
-extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
deleted file mode 100644
index 7e59c3b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. The job
- * bypasses HBase servers, and directly accesses the underlying files (hfile, recovered edits,
- * wals, etc) directly to provide maximum performance. The snapshot is not required to be
- * restored to the live cluster or cloned. This also allows to run the mapreduce job from an
- * online or offline hbase cluster. The snapshot files can be exported by using the
- * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool, to a pure-hdfs cluster, 
- * and this InputFormat can be used to run the mapreduce job directly over the snapshot files. 
- * The snapshot should not be deleted while there are jobs reading from snapshot files.
- * <p>
- * Usage is similar to TableInputFormat, and
- * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job,
- *   boolean, Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Scan scan = new Scan();
- * TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- *      scan, MyTableMapper.class, MyMapKeyOutput.class,
- *      MyMapOutputValueWritable.class, job, true);
- * }
- * </pre>
- * <p>
- * Internally, this input format restores the snapshot into the given tmp directory. Similar to
- * {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading
- * from each RecordReader. An internal RegionScanner is used to execute the 
- * {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
- * <p>
- * HBase owns all the data and snapshot files on the filesystem. Only the 'hbase' user can read from
- * snapshot files and data files.
- * To read from snapshot files directly from the file system, the user who is running the MR job
- * must have sufficient permissions to access snapshot and reference files.
- * This means that to run mapreduce over snapshot files, the MR job has to be run as the HBase
- * user or the user must have group or other privileges in the filesystem (See HBASE-8369).
- * Note that, given other users access to read from snapshot/data files will completely circumvent
- * the access control enforced by HBase.
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
-
-  public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
-    private TableSnapshotInputFormatImpl.InputSplit delegate;
-
-    // constructor for mapreduce framework / Writable
-    public TableSnapshotRegionSplit() {
-      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
-    }
-
-    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
-      this.delegate = delegate;
-    }
-
-    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
-        List<String> locations, Scan scan, Path restoreDir) {
-      this.delegate =
-          new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
-    }
-
-    @Override
-    public long getLength() throws IOException, InterruptedException {
-      return delegate.getLength();
-    }
-
-    @Override
-    public String[] getLocations() throws IOException, InterruptedException {
-      return delegate.getLocations();
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-      delegate.write(out);
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      delegate.readFields(in);
-    }
-
-    public HRegionInfo getRegionInfo() {
-      return delegate.getRegionInfo();
-    }
-
-  }
-
-  @VisibleForTesting
-  static class TableSnapshotRegionRecordReader extends
-      RecordReader<ImmutableBytesWritable, Result> {
-    private TableSnapshotInputFormatImpl.RecordReader delegate =
-      new TableSnapshotInputFormatImpl.RecordReader();
-    private TaskAttemptContext context;
-    private Method getCounter;
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
-        InterruptedException {
-      this.context = context;
-      getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
-      delegate.initialize(
-        ((TableSnapshotRegionSplit) split).delegate,
-        context.getConfiguration());
-    }
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      boolean result = delegate.nextKeyValue();
-      if (result) {
-        ScanMetrics scanMetrics = delegate.getScanner().getScanMetrics();
-        if (scanMetrics != null && context != null) {
-          TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context, 0);
-        }
-      }
-      return result;
-    }
-
-    @Override
-    public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
-      return delegate.getCurrentKey();
-    }
-
-    @Override
-    public Result getCurrentValue() throws IOException, InterruptedException {
-      return delegate.getCurrentValue();
-    }
-
-    @Override
-    public float getProgress() throws IOException, InterruptedException {
-      return delegate.getProgress();
-    }
-
-    @Override
-    public void close() throws IOException {
-      delegate.close();
-    }
-  }
-
-  @Override
-  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
-      InputSplit split, TaskAttemptContext context) throws IOException {
-    return new TableSnapshotRegionRecordReader();
-  }
-
-  @Override
-  public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
-    List<InputSplit> results = new ArrayList<>();
-    for (TableSnapshotInputFormatImpl.InputSplit split :
-        TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) {
-      results.add(new TableSnapshotRegionSplit(split));
-    }
-    return results;
-  }
-
-  /**
-   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
-   * @param job the job to configure
-   * @param snapshotName the name of the snapshot to read from
-   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restoreDir can be deleted.
-   * @throws IOException if an error occurs
-   */
-  public static void setInput(Job job, String snapshotName, Path restoreDir)
-      throws IOException {
-    TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
-  }
-}

[09/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
deleted file mode 100644
index e669f14..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase;
-
-import java.io.IOException;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.TableSnapshotScanner;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapper;
-import org.apache.hadoop.hbase.util.AbstractHBaseTool;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Stopwatch;
-
-/**
- * A simple performance evaluation tool for single client and MR scans
- * and snapshot scans.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class ScanPerformanceEvaluation extends AbstractHBaseTool {
-
-  private static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
-
-  private String type;
-  private String file;
-  private String tablename;
-  private String snapshotName;
-  private String restoreDir;
-  private String caching;
-
-  @Override
-  public void setConf(Configuration conf) {
-    super.setConf(conf);
-    Path rootDir;
-    try {
-      rootDir = FSUtils.getRootDir(conf);
-      rootDir.getFileSystem(conf);
-    } catch (IOException ex) {
-      throw new RuntimeException(ex);
-    }
-  }
-
-  @Override
-  protected void addOptions() {
-    this.addRequiredOptWithArg("t", "type", "the type of the test. One of the following: streaming|scan|snapshotscan|scanmapreduce|snapshotscanmapreduce");
-    this.addOptWithArg("f", "file", "the filename to read from");
-    this.addOptWithArg("tn", "table", "the tablename to read from");
-    this.addOptWithArg("sn", "snapshot", "the snapshot name to read from");
-    this.addOptWithArg("rs", "restoredir", "the directory to restore the snapshot");
-    this.addOptWithArg("ch", "caching", "scanner caching value");
-  }
-
-  @Override
-  protected void processOptions(CommandLine cmd) {
-    type = cmd.getOptionValue("type");
-    file = cmd.getOptionValue("file");
-    tablename = cmd.getOptionValue("table");
-    snapshotName = cmd.getOptionValue("snapshot");
-    restoreDir = cmd.getOptionValue("restoredir");
-    caching = cmd.getOptionValue("caching");
-  }
-
-  protected void testHdfsStreaming(Path filename) throws IOException {
-    byte[] buf = new byte[1024];
-    FileSystem fs = filename.getFileSystem(getConf());
-
-    // read the file from start to finish
-    Stopwatch fileOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch streamTimer = Stopwatch.createUnstarted();
-
-    fileOpenTimer.start();
-    FSDataInputStream in = fs.open(filename);
-    fileOpenTimer.stop();
-
-    long totalBytes = 0;
-    streamTimer.start();
-    while (true) {
-      int read = in.read(buf);
-      if (read < 0) {
-        break;
-      }
-      totalBytes += read;
-    }
-    streamTimer.stop();
-
-    double throughput = (double)totalBytes / streamTimer.elapsed(TimeUnit.SECONDS);
-
-    System.out.println("HDFS streaming: ");
-    System.out.println("total time to open: " +
-      fileOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to read: " + streamTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total bytes: " + totalBytes + " bytes ("
-        + StringUtils.humanReadableInt(totalBytes) + ")");
-    System.out.println("throghput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
-  }
-
-  private Scan getScan() {
-    Scan scan = new Scan(); // default scan settings
-    scan.setCacheBlocks(false);
-    scan.setMaxVersions(1);
-    scan.setScanMetricsEnabled(true);
-    if (caching != null) {
-      scan.setCaching(Integer.parseInt(caching));
-    }
-
-    return scan;
-  }
-
-  public void testScan() throws IOException {
-    Stopwatch tableOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch scanTimer = Stopwatch.createUnstarted();
-
-    tableOpenTimer.start();
-    Connection connection = ConnectionFactory.createConnection(getConf());
-    Table table = connection.getTable(TableName.valueOf(tablename));
-    tableOpenTimer.stop();
-
-    Scan scan = getScan();
-    scanOpenTimer.start();
-    ResultScanner scanner = table.getScanner(scan);
-    scanOpenTimer.stop();
-
-    long numRows = 0;
-    long numCells = 0;
-    scanTimer.start();
-    while (true) {
-      Result result = scanner.next();
-      if (result == null) {
-        break;
-      }
-      numRows++;
-
-      numCells += result.rawCells().length;
-    }
-    scanTimer.stop();
-    scanner.close();
-    table.close();
-    connection.close();
-
-    ScanMetrics metrics = scan.getScanMetrics();
-    long totalBytes = metrics.countOfBytesInResults.get();
-    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
-    System.out.println("HBase scan: ");
-    System.out.println("total time to open table: " +
-      tableOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to open scanner: " +
-      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to scan: " +
-      scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
-    System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
-
-    System.out.println("total bytes: " + totalBytes + " bytes ("
-        + StringUtils.humanReadableInt(totalBytes) + ")");
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
-    System.out.println("total rows  : " + numRows);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
-    System.out.println("total cells : " + numCells);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
-  }
-
-
-  public void testSnapshotScan() throws IOException {
-    Stopwatch snapshotRestoreTimer = Stopwatch.createUnstarted();
-    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch scanTimer = Stopwatch.createUnstarted();
-
-    Path restoreDir = new Path(this.restoreDir);
-
-    snapshotRestoreTimer.start();
-    restoreDir.getFileSystem(conf).delete(restoreDir, true);
-    snapshotRestoreTimer.stop();
-
-    Scan scan = getScan();
-    scanOpenTimer.start();
-    TableSnapshotScanner scanner = new TableSnapshotScanner(conf, restoreDir, snapshotName, scan);
-    scanOpenTimer.stop();
-
-    long numRows = 0;
-    long numCells = 0;
-    scanTimer.start();
-    while (true) {
-      Result result = scanner.next();
-      if (result == null) {
-        break;
-      }
-      numRows++;
-
-      numCells += result.rawCells().length;
-    }
-    scanTimer.stop();
-    scanner.close();
-
-    ScanMetrics metrics = scanner.getScanMetrics();
-    long totalBytes = metrics.countOfBytesInResults.get();
-    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
-    System.out.println("HBase scan snapshot: ");
-    System.out.println("total time to restore snapshot: " +
-      snapshotRestoreTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to open scanner: " +
-      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to scan: " +
-      scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
-    System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
-
-    System.out.println("total bytes: " + totalBytes + " bytes ("
-        + StringUtils.humanReadableInt(totalBytes) + ")");
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
-    System.out.println("total rows  : " + numRows);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
-    System.out.println("total cells : " + numCells);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
-
-  }
-
-  public static enum ScanCounter {
-    NUM_ROWS,
-    NUM_CELLS,
-  }
-
-  public static class MyMapper<KEYOUT, VALUEOUT> extends TableMapper<KEYOUT, VALUEOUT> {
-    @Override
-    protected void map(ImmutableBytesWritable key, Result value,
-        Context context) throws IOException,
-        InterruptedException {
-      context.getCounter(ScanCounter.NUM_ROWS).increment(1);
-      context.getCounter(ScanCounter.NUM_CELLS).increment(value.rawCells().length);
-    }
-  }
-
-  public void testScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
-    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch scanTimer = Stopwatch.createUnstarted();
-
-    Scan scan = getScan();
-
-    String jobName = "testScanMapReduce";
-
-    Job job = new Job(conf);
-    job.setJobName(jobName);
-
-    job.setJarByClass(getClass());
-
-    TableMapReduceUtil.initTableMapperJob(
-        this.tablename,
-        scan,
-        MyMapper.class,
-        NullWritable.class,
-        NullWritable.class,
-        job
-    );
-
-    job.setNumReduceTasks(0);
-    job.setOutputKeyClass(NullWritable.class);
-    job.setOutputValueClass(NullWritable.class);
-    job.setOutputFormatClass(NullOutputFormat.class);
-
-    scanTimer.start();
-    job.waitForCompletion(true);
-    scanTimer.stop();
-
-    Counters counters = job.getCounters();
-    long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
-    long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
-
-    long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
-    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
-    System.out.println("HBase scan mapreduce: ");
-    System.out.println("total time to open scanner: " +
-      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
-    System.out.println("total bytes: " + totalBytes + " bytes ("
-        + StringUtils.humanReadableInt(totalBytes) + ")");
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
-    System.out.println("total rows  : " + numRows);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
-    System.out.println("total cells : " + numCells);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
-  }
-
-  public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
-    Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
-    Stopwatch scanTimer = Stopwatch.createUnstarted();
-
-    Scan scan = getScan();
-
-    String jobName = "testSnapshotScanMapReduce";
-
-    Job job = new Job(conf);
-    job.setJobName(jobName);
-
-    job.setJarByClass(getClass());
-
-    TableMapReduceUtil.initTableSnapshotMapperJob(
-        this.snapshotName,
-        scan,
-        MyMapper.class,
-        NullWritable.class,
-        NullWritable.class,
-        job,
-        true,
-        new Path(restoreDir)
-    );
-
-    job.setNumReduceTasks(0);
-    job.setOutputKeyClass(NullWritable.class);
-    job.setOutputValueClass(NullWritable.class);
-    job.setOutputFormatClass(NullOutputFormat.class);
-
-    scanTimer.start();
-    job.waitForCompletion(true);
-    scanTimer.stop();
-
-    Counters counters = job.getCounters();
-    long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
-    long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
-
-    long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
-    double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
-    double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
-    System.out.println("HBase scan mapreduce: ");
-    System.out.println("total time to open scanner: " +
-      scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-    System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
-    System.out.println("total bytes: " + totalBytes + " bytes ("
-        + StringUtils.humanReadableInt(totalBytes) + ")");
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
-    System.out.println("total rows  : " + numRows);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
-    System.out.println("total cells : " + numCells);
-    System.out.println("throughput  : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
-  }
-
-  @Override
-  protected int doWork() throws Exception {
-    if (type.equals("streaming")) {
-      testHdfsStreaming(new Path(file));
-    } else if (type.equals("scan")){
-      testScan();
-    } else if (type.equals("snapshotscan")) {
-      testSnapshotScan();
-    } else if (type.equals("scanmapreduce")) {
-      testScanMapReduce();
-    } else if (type.equals("snapshotscanmapreduce")) {
-      testSnapshotScanMapReduce();
-    }
-    return 0;
-  }
-
-  public static void main (String[] args) throws Exception {
-    int ret = ToolRunner.run(HBaseConfiguration.create(), new ScanPerformanceEvaluation(), args);
-    System.exit(ret);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
deleted file mode 100644
index 86a3d3f..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase;
-
-import static org.junit.Assert.*;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.util.NoSuchElementException;
-import java.util.Queue;
-import java.util.Random;
-import java.util.LinkedList;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.PerformanceEvaluation.RandomReadTest;
-import org.apache.hadoop.hbase.PerformanceEvaluation.TestOptions;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.codehaus.jackson.JsonGenerationException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.Snapshot;
-import com.codahale.metrics.UniformReservoir;
-
-@Category({MiscTests.class, SmallTests.class})
-public class TestPerformanceEvaluation {
-  private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
-
-  @Test
-  public void testSerialization()
-  throws JsonGenerationException, JsonMappingException, IOException {
-    PerformanceEvaluation.TestOptions options = new PerformanceEvaluation.TestOptions();
-    assertTrue(!options.isAutoFlush());
-    options.setAutoFlush(true);
-    ObjectMapper mapper = new ObjectMapper();
-    String optionsString = mapper.writeValueAsString(options);
-    PerformanceEvaluation.TestOptions optionsDeserialized =
-        mapper.readValue(optionsString, PerformanceEvaluation.TestOptions.class);
-    assertTrue(optionsDeserialized.isAutoFlush());
-  }
-
-  /**
-   * Exercise the mr spec writing.  Simple assertions to make sure it is basically working.
-   * @throws IOException
-   */
-  @Ignore @Test
-  public void testWriteInputFile() throws IOException {
-    TestOptions opts = new PerformanceEvaluation.TestOptions();
-    final int clients = 10;
-    opts.setNumClientThreads(clients);
-    opts.setPerClientRunRows(10);
-    Path dir =
-      PerformanceEvaluation.writeInputFile(HTU.getConfiguration(), opts, HTU.getDataTestDir());
-    FileSystem fs = FileSystem.get(HTU.getConfiguration());
-    Path p = new Path(dir, PerformanceEvaluation.JOB_INPUT_FILENAME);
-    long len = fs.getFileStatus(p).getLen();
-    assertTrue(len > 0);
-    byte [] content = new byte[(int)len];
-    FSDataInputStream dis = fs.open(p);
-    try {
-      dis.readFully(content);
-      BufferedReader br =
-        new BufferedReader(new InputStreamReader(new ByteArrayInputStream(content)));
-      int count = 0;
-      while (br.readLine() != null) {
-        count++;
-      }
-      assertEquals(clients, count);
-    } finally {
-      dis.close();
-    }
-  }
-
-  @Test
-  public void testSizeCalculation() {
-    TestOptions opts = new PerformanceEvaluation.TestOptions();
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    int rows = opts.getPerClientRunRows();
-    // Default row count
-    final int defaultPerClientRunRows = 1024 * 1024;
-    assertEquals(defaultPerClientRunRows, rows);
-    // If size is 2G, then twice the row count.
-    opts.setSize(2.0f);
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
-    // If two clients, then they get half the rows each.
-    opts.setNumClientThreads(2);
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    assertEquals(defaultPerClientRunRows, opts.getPerClientRunRows());
-    // What if valueSize is 'random'? Then half of the valueSize so twice the rows.
-    opts.valueRandom = true;
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
-  }
-
-  @Test
-  public void testRandomReadCalculation() {
-    TestOptions opts = new PerformanceEvaluation.TestOptions();
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    int rows = opts.getPerClientRunRows();
-    // Default row count
-    final int defaultPerClientRunRows = 1024 * 1024;
-    assertEquals(defaultPerClientRunRows, rows);
-    // If size is 2G, then twice the row count.
-    opts.setSize(2.0f);
-    opts.setPerClientRunRows(1000);
-    opts.setCmdName(PerformanceEvaluation.RANDOM_READ);
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    assertEquals(1000, opts.getPerClientRunRows());
-    // If two clients, then they get half the rows each.
-    opts.setNumClientThreads(2);
-    opts = PerformanceEvaluation.calculateRowsAndSize(opts);
-    assertEquals(1000, opts.getPerClientRunRows());
-    Random random = new Random();
-    // assuming we will get one before this loop expires
-    boolean foundValue = false;
-    for (int i = 0; i < 10000000; i++) {
-      int randomRow = PerformanceEvaluation.generateRandomRow(random, opts.totalRows);
-      if (randomRow > 1000) {
-        foundValue = true;
-        break;
-      }
-    }
-    assertTrue("We need to get a value more than 1000", foundValue);
-  }
-
-  @Test
-  public void testZipfian()
-  throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException,
-      IllegalArgumentException, InvocationTargetException {
-    TestOptions opts = new PerformanceEvaluation.TestOptions();
-    opts.setValueZipf(true);
-    final int valueSize = 1024;
-    opts.setValueSize(valueSize);
-    RandomReadTest rrt = new RandomReadTest(null, opts, null);
-    Constructor<?> ctor =
-      Histogram.class.getDeclaredConstructor(com.codahale.metrics.Reservoir.class);
-    ctor.setAccessible(true);
-    Histogram histogram = (Histogram)ctor.newInstance(new UniformReservoir(1024 * 500));
-    for (int i = 0; i < 100; i++) {
-      histogram.update(rrt.getValueLength(null));
-    }
-    Snapshot snapshot = histogram.getSnapshot();
-    double stddev = snapshot.getStdDev();
-    assertTrue(stddev != 0 && stddev != 1.0);
-    assertTrue(snapshot.getStdDev() != 0);
-    double median = snapshot.getMedian();
-    assertTrue(median != 0 && median != 1 && median != valueSize);
-  }
-
-  @Test
-  public void testParseOptsWithThreads() {
-    Queue<String> opts = new LinkedList<>();
-    String cmdName = "sequentialWrite";
-    int threads = 1;
-    opts.offer(cmdName);
-    opts.offer(String.valueOf(threads));
-    PerformanceEvaluation.TestOptions options = PerformanceEvaluation.parseOpts(opts);
-    assertNotNull(options);
-    assertNotNull(options.getCmdName());
-    assertEquals(cmdName, options.getCmdName());
-    assertEquals(threads, options.getNumClientThreads());
-  }
-
-  @Test
-  public void testParseOptsWrongThreads() {
-    Queue<String> opts = new LinkedList<>();
-    String cmdName = "sequentialWrite";
-    opts.offer(cmdName);
-    opts.offer("qq");
-    try {
-      PerformanceEvaluation.parseOpts(opts);
-    } catch (IllegalArgumentException e) {
-      System.out.println(e.getMessage());
-      assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
-      assertTrue(e.getCause() instanceof NumberFormatException);
-    }
-  }
-
-  @Test
-  public void testParseOptsNoThreads() {
-    Queue<String> opts = new LinkedList<>();
-    String cmdName = "sequentialWrite";
-    try {
-      PerformanceEvaluation.parseOpts(opts);
-    } catch (IllegalArgumentException e) {
-      System.out.println(e.getMessage());
-      assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
-      assertTrue(e.getCause() instanceof NoSuchElementException);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
index 3322e6c..535a34d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -29,8 +30,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat;
 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
@@ -45,7 +46,7 @@ import org.junit.experimental.categories.Category;
 @Category({LargeTests.class, ClientTests.class})
 public class TestTableSnapshotScanner {
 
-  private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
+  private static final Log LOG = LogFactory.getLog(TestTableSnapshotScanner.class);
   private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
   private static final int NUM_REGION_SERVERS = 2;
   private static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
@@ -55,6 +56,17 @@ public class TestTableSnapshotScanner {
   private FileSystem fs;
   private Path rootDir;
 
+  public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
+      int expectedRegionSize) throws Exception {
+    for (int i = 0; i < 100; i++) {
+      List<HRegionInfo> hRegionInfoList = util.getAdmin().getTableRegions(tableName);
+      if (hRegionInfoList.size() >= expectedRegionSize) {
+        break;
+      }
+      Thread.sleep(1000);
+    }
+  }
+
   public void setupCluster() throws Exception {
     setupConf(UTIL.getConfiguration());
     UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
@@ -129,7 +141,7 @@ public class TestTableSnapshotScanner {
 
       // split to 2 regions
       admin.split(tableName, Bytes.toBytes("eee"));
-      TestTableSnapshotInputFormat.blockUntilSplitFinished(UTIL, tableName, 2);
+      blockUntilSplitFinished(UTIL, tableName, 2);
 
       Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
       FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
deleted file mode 100644
index ab6a86d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.util.ProgramDriver;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestDriver {
-
-  @Test
-  public void testDriverMainMethod() throws Throwable {
-    ProgramDriver programDriverMock = mock(ProgramDriver.class);
-    Driver.setProgramDriver(programDriverMock);
-    Driver.main(new String[]{});
-    verify(programDriverMock).driver(Mockito.any(String[].class));    
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
deleted file mode 100644
index 36e45e4..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyZeroInteractions;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestGroupingTableMap {
-
-  @Test
-  @SuppressWarnings({ "deprecation", "unchecked" })
-  public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes()
-      throws Exception {
-    GroupingTableMap gTableMap = null;
-    try {
-      Result result = mock(Result.class);
-      Reporter reporter = mock(Reporter.class);
-      gTableMap = new GroupingTableMap();
-      Configuration cfg = new Configuration();
-      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
-      JobConf jobConf = new JobConf(cfg);
-      gTableMap.configure(jobConf);
-  
-      byte[] row = {};
-      List<Cell> keyValues = ImmutableList.<Cell>of(
-          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
-          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")),
-          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
-      when(result.listCells()).thenReturn(keyValues);
-      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
-          mock(OutputCollector.class);
-      gTableMap.map(null, result, outputCollectorMock, reporter);
-      verify(result).listCells();
-      verifyZeroInteractions(outputCollectorMock);
-    } finally {
-      if (gTableMap != null)
-        gTableMap.close();    
-    }
-  }
-
-  @Test
-  @SuppressWarnings({ "deprecation", "unchecked" })
-  public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
-    GroupingTableMap gTableMap = null;
-    try {
-      Result result = mock(Result.class);
-      Reporter reporter = mock(Reporter.class);
-      gTableMap = new GroupingTableMap();
-      Configuration cfg = new Configuration();
-      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
-      JobConf jobConf = new JobConf(cfg);
-      gTableMap.configure(jobConf);
-  
-      byte[] row = {};
-      List<Cell> keyValues = ImmutableList.<Cell>of(
-          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
-          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")),
-          new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
-      when(result.listCells()).thenReturn(keyValues);
-      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
-          mock(OutputCollector.class);
-      gTableMap.map(null, result, outputCollectorMock, reporter);
-      verify(result).listCells();
-      verify(outputCollectorMock, times(1))
-        .collect(any(ImmutableBytesWritable.class), any(Result.class));
-      verifyNoMoreInteractions(outputCollectorMock);
-    } finally {
-      if (gTableMap != null)
-        gTableMap.close();
-    }
-  }
-
-  @Test
-  @SuppressWarnings({ "deprecation" })
-  public void shouldCreateNewKey() throws Exception {
-    GroupingTableMap gTableMap = null;  
-    try {
-      Result result = mock(Result.class);
-      Reporter reporter = mock(Reporter.class);
-      final byte[] bSeparator = Bytes.toBytes(" ");
-      gTableMap = new GroupingTableMap();
-      Configuration cfg = new Configuration();
-      cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
-      JobConf jobConf = new JobConf(cfg);
-      gTableMap.configure(jobConf);
-  
-      final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
-      final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
-      byte[] row = {};
-      List<Cell> cells = ImmutableList.<Cell>of(
-          new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), firstPartKeyValue),
-          new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), secondPartKeyValue));
-      when(result.listCells()).thenReturn(cells);
-  
-      final AtomicBoolean outputCollected = new AtomicBoolean();
-      OutputCollector<ImmutableBytesWritable, Result> outputCollector =
-          new OutputCollector<ImmutableBytesWritable, Result>() {
-        @Override
-        public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
-          assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
-            Bytes.concat(firstPartKeyValue, bSeparator,
-              secondPartKeyValue), arg.copyBytes());
-          outputCollected.set(true);
-        }
-      };
-      
-      gTableMap.map(null, result, outputCollector, reporter);
-      verify(result).listCells();
-      Assert.assertTrue("Output not received", outputCollected.get());
-  
-      final byte[] firstPartValue = Bytes.toBytes("238947928");
-      final byte[] secondPartValue = Bytes.toBytes("4678456942345");
-      byte[][] data = { firstPartValue, secondPartValue };
-      ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
-      assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
-        Bytes.concat(firstPartValue,
-          bSeparator, secondPartValue), byteWritable.get());
-    } finally {
-      if (gTableMap != null)
-        gTableMap.close();
-    }
-  }
-
-  @Test
-  @SuppressWarnings({ "deprecation" })
-  public void shouldReturnNullFromCreateGroupKey() throws Exception {
-    GroupingTableMap gTableMap = null;
-    try {
-      gTableMap = new GroupingTableMap();
-      assertNull(gTableMap.createGroupKey(null));
-    } finally {
-      if(gTableMap != null)
-        gTableMap.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
deleted file mode 100644
index 3fad1fe..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestIdentityTableMap {
-
-  @Test
-  @SuppressWarnings({ "deprecation", "unchecked" })
-  public void shouldCollectPredefinedTimes() throws IOException {
-    int recordNumber = 999;
-    Result resultMock = mock(Result.class);
-    IdentityTableMap identityTableMap = null;
-    try {
-      Reporter reporterMock = mock(Reporter.class);
-      identityTableMap = new IdentityTableMap();
-      ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class);
-      OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
-          mock(OutputCollector.class);
-  
-      for (int i = 0; i < recordNumber; i++)
-        identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock,
-            reporterMock);
-  
-      verify(outputCollectorMock, times(recordNumber)).collect(
-          Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class));
-    } finally {
-      if (identityTableMap != null)
-        identityTableMap.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
deleted file mode 100644
index 665c547..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
-import static org.junit.Assert.assertTrue;
-
-@Category({ VerySlowMapReduceTests.class, LargeTests.class })
-public class TestMultiTableSnapshotInputFormat
-    extends org.apache.hadoop.hbase.mapreduce.TestMultiTableSnapshotInputFormat {
-
-  private static final Log LOG = LogFactory.getLog(TestMultiTableSnapshotInputFormat.class);
-
-  @Override
-  protected void runJob(String jobName, Configuration c, List<Scan> scans)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    JobConf job = new JobConf(TEST_UTIL.getConfiguration());
-
-    job.setJobName(jobName);
-    job.setMapperClass(Mapper.class);
-    job.setReducerClass(Reducer.class);
-
-    TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
-        ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
-
-    TableMapReduceUtil.addDependencyJars(job);
-
-    job.setReducerClass(Reducer.class);
-    job.setNumReduceTasks(1); // one to get final "first" and "last" key
-    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
-    LOG.info("Started " + job.getJobName());
-
-    RunningJob runningJob = JobClient.runJob(job);
-    runningJob.waitForCompletion();
-    assertTrue(runningJob.isSuccessful());
-    LOG.info("After map/reduce completion - job " + jobName);
-  }
-
-  public static class Mapper extends TestMultiTableSnapshotInputFormat.ScanMapper
-      implements TableMap<ImmutableBytesWritable, ImmutableBytesWritable> {
-
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-        OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector,
-        Reporter reporter) throws IOException {
-      makeAssertions(key, value);
-      outputCollector.collect(key, key);
-    }
-
-    /**
-     * Closes this stream and releases any system resources associated
-     * with it. If the stream is already closed then invoking this
-     * method has no effect.
-     *
-     * @throws IOException if an I/O error occurs
-     */
-    @Override
-    public void close() throws IOException {
-    }
-
-    @Override
-    public void configure(JobConf jobConf) {
-
-    }
-  }
-
-  public static class Reducer extends TestMultiTableSnapshotInputFormat.ScanReducer implements
-      org.apache.hadoop.mapred.Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
-          NullWritable, NullWritable> {
-
-    private JobConf jobConf;
-
-    @Override
-    public void reduce(ImmutableBytesWritable key, Iterator<ImmutableBytesWritable> values,
-        OutputCollector<NullWritable, NullWritable> outputCollector, Reporter reporter)
-        throws IOException {
-      makeAssertions(key, Lists.newArrayList(values));
-    }
-
-    /**
-     * Closes this stream and releases any system resources associated
-     * with it. If the stream is already closed then invoking this
-     * method has no effect.
-     *
-     * @throws IOException if an I/O error occurs
-     */
-    @Override
-    public void close() throws IOException {
-      super.cleanup(this.jobConf);
-    }
-
-    @Override
-    public void configure(JobConf jobConf) {
-      this.jobConf = jobConf;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
deleted file mode 100644
index 4ebd8bf..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapred.RowCounter.RowCounterMapper;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestRowCounter {
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void shouldPrintUsage() throws Exception {
-    String expectedOutput = "rowcounter <outputdir> <tablename> <column1> [<column2>...]";
-    String result = new OutputReader(System.out) {
-      @Override
-      void doRead() {
-        assertEquals(-1, RowCounter.printUsage());
-      }
-    }.read();
-
-    assertTrue(result.startsWith(expectedOutput));
-  }
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void shouldExitAndPrintUsageSinceParameterNumberLessThanThree()
-      throws Exception {
-    final String[] args = new String[] { "one", "two" };
-    String line = "ERROR: Wrong number of parameters: " + args.length;
-    String result = new OutputReader(System.err) {
-      @Override
-      void doRead() throws Exception {
-        assertEquals(-1, new RowCounter().run(args));
-      }
-    }.read();
-
-    assertTrue(result.startsWith(line));
-  }
-
-  @Test
-  @SuppressWarnings({ "deprecation", "unchecked" })
-  public void shouldRegInReportEveryIncomingRow() throws IOException {
-    int iterationNumber = 999;
-    RowCounter.RowCounterMapper mapper = new RowCounter.RowCounterMapper();
-    Reporter reporter = mock(Reporter.class);
-    for (int i = 0; i < iterationNumber; i++)
-      mapper.map(mock(ImmutableBytesWritable.class), mock(Result.class),
-          mock(OutputCollector.class), reporter);
-
-    Mockito.verify(reporter, times(iterationNumber)).incrCounter(
-        any(Enum.class), anyInt());
-  }
-
-  @Test
-  @SuppressWarnings({ "deprecation" })
-  public void shouldCreateAndRunSubmittableJob() throws Exception {
-    RowCounter rCounter = new RowCounter();
-    rCounter.setConf(HBaseConfiguration.create());
-    String[] args = new String[] { "\temp", "tableA", "column1", "column2",
-        "column3" };
-    JobConf jobConfig = rCounter.createSubmittableJob(args);
-
-    assertNotNull(jobConfig);
-    assertEquals(0, jobConfig.getNumReduceTasks());
-    assertEquals("rowcounter", jobConfig.getJobName());
-    assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
-    assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
-    assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
-        .join("column1", "column2", "column3"));
-    assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
-  }
-
-  enum Outs {
-    OUT, ERR
-  }
-
-  private static abstract class OutputReader {
-    private final PrintStream ps;
-    private PrintStream oldPrintStream;
-    private Outs outs;
-
-    protected OutputReader(PrintStream ps) {
-      this.ps = ps;
-    }
-
-    protected String read() throws Exception {
-      ByteArrayOutputStream outBytes = new ByteArrayOutputStream();
-      if (ps == System.out) {
-        oldPrintStream = System.out;
-        outs = Outs.OUT;
-        System.setOut(new PrintStream(outBytes));
-      } else if (ps == System.err) {
-        oldPrintStream = System.err;
-        outs = Outs.ERR;
-        System.setErr(new PrintStream(outBytes));
-      } else {
-        throw new IllegalStateException("OutputReader: unsupported PrintStream");
-      }
-
-      try {
-        doRead();
-        return new String(outBytes.toByteArray());
-      } finally {
-        switch (outs) {
-        case OUT: {
-          System.setOut(oldPrintStream);
-          break;
-        }
-        case ERR: {
-          System.setErr(oldPrintStream);
-          break;
-        }
-        default:
-          throw new IllegalStateException(
-              "OutputReader: unsupported PrintStream");
-        }
-      }
-    }
-
-    abstract void doRead() throws Exception;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
deleted file mode 100644
index 2655ac2..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestSplitTable {
-  @Rule
-  public TestName name = new TestName();
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void testSplitTableCompareTo() {
-    TableSplit aTableSplit = new TableSplit(Bytes.toBytes("tableA"),
-        Bytes.toBytes("aaa"), Bytes.toBytes("ddd"), "locationA");
-
-    TableSplit bTableSplit = new TableSplit(Bytes.toBytes("tableA"),
-        Bytes.toBytes("iii"), Bytes.toBytes("kkk"), "locationA");
-
-    TableSplit cTableSplit = new TableSplit(Bytes.toBytes("tableA"),
-        Bytes.toBytes("lll"), Bytes.toBytes("zzz"), "locationA");
-
-    assertTrue(aTableSplit.compareTo(aTableSplit) == 0);
-    assertTrue(bTableSplit.compareTo(bTableSplit) == 0);
-    assertTrue(cTableSplit.compareTo(cTableSplit) == 0);
-
-    assertTrue(aTableSplit.compareTo(bTableSplit) < 0);
-    assertTrue(bTableSplit.compareTo(aTableSplit) > 0);
-
-    assertTrue(aTableSplit.compareTo(cTableSplit) < 0);
-    assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
-
-    assertTrue(bTableSplit.compareTo(cTableSplit) < 0);
-    assertTrue(cTableSplit.compareTo(bTableSplit) > 0);
-
-    assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
-  }
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void testSplitTableEquals() {
-    byte[] tableA = Bytes.toBytes("tableA");
-    byte[] aaa = Bytes.toBytes("aaa");
-    byte[] ddd = Bytes.toBytes("ddd");
-    String locationA = "locationA";
-
-    TableSplit tablesplit = new TableSplit(tableA, aaa, ddd, locationA);
-
-    TableSplit tableB = new TableSplit(Bytes.toBytes("tableB"), aaa, ddd, locationA);
-    assertNotEquals(tablesplit.hashCode(), tableB.hashCode());
-    assertNotEquals(tablesplit, tableB);
-
-    TableSplit startBbb = new TableSplit(tableA, Bytes.toBytes("bbb"), ddd, locationA);
-    assertNotEquals(tablesplit.hashCode(), startBbb.hashCode());
-    assertNotEquals(tablesplit, startBbb);
-
-    TableSplit endEee = new TableSplit(tableA, aaa, Bytes.toBytes("eee"), locationA);
-    assertNotEquals(tablesplit.hashCode(), endEee.hashCode());
-    assertNotEquals(tablesplit, endEee);
-
-    TableSplit locationB = new TableSplit(tableA, aaa, ddd, "locationB");
-    assertNotEquals(tablesplit.hashCode(), locationB.hashCode());
-    assertNotEquals(tablesplit, locationB);
-
-    TableSplit same = new TableSplit(tableA, aaa, ddd, locationA);
-    assertEquals(tablesplit.hashCode(), same.hashCode());
-    assertEquals(tablesplit, same);
-  }
-
-  @Test
-  @SuppressWarnings("deprecation")
-  public void testToString() {
-    TableSplit split =
-        new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
-            "location");
-    String str =
-        "HBase table split(table name: " + name.getMethodName() + ", start row: row-start, "
-            + "end row: row-end, region location: location)";
-    Assert.assertEquals(str, split.toString());
-
-    split = new TableSplit((TableName) null, null, null, null);
-    str =
-        "HBase table split(table name: null, start row: null, "
-            + "end row: null, region location: null)";
-    Assert.assertEquals(str, split.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
deleted file mode 100644
index 4b93843..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
+++ /dev/null
@@ -1,461 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.anyObject;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.spy;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * This tests the TableInputFormat and its recovery semantics
- * 
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTableInputFormat {
-
-  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
-
-  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
-  static final byte[] FAMILY = Bytes.toBytes("family");
-
-  private static final byte[][] columns = new byte[][] { FAMILY };
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void before() throws IOException {
-    LOG.info("before");
-    UTIL.ensureSomeRegionServersAvailable(1);
-    LOG.info("before done");
-  }
-
-  /**
-   * Setup a table with two rows and values.
-   * 
-   * @param tableName
-   * @return
-   * @throws IOException
-   */
-  public static Table createTable(byte[] tableName) throws IOException {
-    return createTable(tableName, new byte[][] { FAMILY });
-  }
-
-  /**
-   * Setup a table with two rows and values per column family.
-   * 
-   * @param tableName
-   * @return
-   * @throws IOException
-   */
-  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
-    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
-    Put p = new Put("aaa".getBytes());
-    for (byte[] family : families) {
-      p.addColumn(family, null, "value aaa".getBytes());
-    }
-    table.put(p);
-    p = new Put("bbb".getBytes());
-    for (byte[] family : families) {
-      p.addColumn(family, null, "value bbb".getBytes());
-    }
-    table.put(p);
-    return table;
-  }
-
-  /**
-   * Verify that the result and key have expected values.
-   * 
-   * @param r
-   * @param key
-   * @param expectedKey
-   * @param expectedValue
-   * @return
-   */
-  static boolean checkResult(Result r, ImmutableBytesWritable key,
-      byte[] expectedKey, byte[] expectedValue) {
-    assertEquals(0, key.compareTo(expectedKey));
-    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
-    byte[] value = vals.values().iterator().next();
-    assertTrue(Arrays.equals(value, expectedValue));
-    return true; // if succeed
-  }
-
-  /**
-   * Create table data and run tests on specified htable using the
-   * o.a.h.hbase.mapred API.
-   * 
-   * @param table
-   * @throws IOException
-   */
-  static void runTestMapred(Table table) throws IOException {
-    org.apache.hadoop.hbase.mapred.TableRecordReader trr = 
-        new org.apache.hadoop.hbase.mapred.TableRecordReader();
-    trr.setStartRow("aaa".getBytes());
-    trr.setEndRow("zzz".getBytes());
-    trr.setHTable(table);
-    trr.setInputColumns(columns);
-
-    trr.init();
-    Result r = new Result();
-    ImmutableBytesWritable key = new ImmutableBytesWritable();
-
-    boolean more = trr.next(key, r);
-    assertTrue(more);
-    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
-
-    more = trr.next(key, r);
-    assertTrue(more);
-    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
-
-    // no more data
-    more = trr.next(key, r);
-    assertFalse(more);
-  }
-
-  /**
-   * Create a table that IOE's on first scanner next call
-   * 
-   * @throws IOException
-   */
-  static Table createIOEScannerTable(byte[] name, final int failCnt)
-      throws IOException {
-    // build up a mock scanner stuff to fail the first time
-    Answer<ResultScanner> a = new Answer<ResultScanner>() {
-      int cnt = 0;
-
-      @Override
-      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
-        // first invocation return the busted mock scanner
-        if (cnt++ < failCnt) {
-          // create mock ResultScanner that always fails.
-          Scan scan = mock(Scan.class);
-          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
-          ResultScanner scanner = mock(ResultScanner.class);
-          // simulate TimeoutException / IOException
-          doThrow(new IOException("Injected exception")).when(scanner).next();
-          return scanner;
-        }
-
-        // otherwise return the real scanner.
-        return (ResultScanner) invocation.callRealMethod();
-      }
-    };
-
-    Table htable = spy(createTable(name));
-    doAnswer(a).when(htable).getScanner((Scan) anyObject());
-    return htable;
-  }
-
-  /**
-   * Create a table that throws a DoNoRetryIOException on first scanner next
-   * call
-   * 
-   * @throws IOException
-   */
-  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
-      throws IOException {
-    // build up a mock scanner stuff to fail the first time
-    Answer<ResultScanner> a = new Answer<ResultScanner>() {
-      int cnt = 0;
-
-      @Override
-      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
-        // first invocation return the busted mock scanner
-        if (cnt++ < failCnt) {
-          // create mock ResultScanner that always fails.
-          Scan scan = mock(Scan.class);
-          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
-          ResultScanner scanner = mock(ResultScanner.class);
-
-          invocation.callRealMethod(); // simulate NotServingRegionException
-          doThrow(
-              new NotServingRegionException("Injected simulated TimeoutException"))
-              .when(scanner).next();
-          return scanner;
-        }
-
-        // otherwise return the real scanner.
-        return (ResultScanner) invocation.callRealMethod();
-      }
-    };
-
-    Table htable = spy(createTable(name));
-    doAnswer(a).when(htable).getScanner((Scan) anyObject());
-    return htable;
-  }
-
-  /**
-   * Run test assuming no errors using mapred api.
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testTableRecordReader() throws IOException {
-    Table table = createTable("table1".getBytes());
-    runTestMapred(table);
-  }
-
-  /**
-   * Run test assuming Scanner IOException failure using mapred api,
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testTableRecordReaderScannerFail() throws IOException {
-    Table htable = createIOEScannerTable("table2".getBytes(), 1);
-    runTestMapred(htable);
-  }
-
-  /**
-   * Run test assuming Scanner IOException failure using mapred api,
-   * 
-   * @throws IOException
-   */
-  @Test(expected = IOException.class)
-  public void testTableRecordReaderScannerFailTwice() throws IOException {
-    Table htable = createIOEScannerTable("table3".getBytes(), 2);
-    runTestMapred(htable);
-  }
-
-  /**
-   * Run test assuming NotServingRegionException using mapred api.
-   * 
-   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
-   */
-  @Test
-  public void testTableRecordReaderScannerTimeout() throws IOException {
-    Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
-    runTestMapred(htable);
-  }
-
-  /**
-   * Run test assuming NotServingRegionException using mapred api.
-   * 
-   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
-   */
-  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
-  public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
-    Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
-    runTestMapred(htable);
-  }
-
-  /**
-   * Verify the example we present in javadocs on TableInputFormatBase
-   */
-  @Test
-  public void testExtensionOfTableInputFormatBase() throws IOException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
-    final Table table = createTable(Bytes.toBytes("exampleTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleTIF.class);
-  }
-
-  @Test
-  public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
-        + "as it was given in 0.98.");
-    final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleDeprecatedTIF.class);
-  }
-
-  @Test
-  public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
-    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
-        + "using JobConfigurable.");
-    final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
-      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
-    testInputFormat(ExampleJobConfigurableTIF.class);
-  }
-
-  void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
-    Configuration conf = UTIL.getConfiguration();
-    final JobConf job = new JobConf(conf);
-    job.setInputFormat(clazz);
-    job.setOutputFormat(NullOutputFormat.class);
-    job.setMapperClass(ExampleVerifier.class);
-    job.setNumReduceTasks(0);
-    LOG.debug("submitting job.");
-    final RunningJob run = JobClient.runJob(job);
-    assertTrue("job failed!", run.isSuccessful());
-    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
-    assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
-    assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
-    assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
-    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
-    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
-        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
-  }
-
-  public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
-
-    @Override
-    public void configure(JobConf conf) {
-    }
-
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-        OutputCollector<NullWritable,NullWritable> output,
-        Reporter reporter) throws IOException {
-      for (Cell cell : value.listCells()) {
-        reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
-            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
-            .increment(1l);
-        reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
-            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
-            .increment(1l);
-        reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
-            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
-            .increment(1l);
-      }
-    }
-
-    @Override
-    public void close() {
-    }
-
-  }
-
-  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
-
-    @Override
-    public void configure(JobConf job) {
-      try {
-        Connection connection = ConnectionFactory.createConnection(job);
-        Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
-        // mandatory
-        initializeTable(connection, exampleTable.getName());
-        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-          Bytes.toBytes("columnB") };
-        // mandatory
-        setInputColumns(inputColumns);
-        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-        // optional
-        setRowFilter(exampleFilter);
-      } catch (IOException exception) {
-        throw new RuntimeException("Failed to configure for job.", exception);
-      }
-    }
-
-  }
-
-  public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
-
-    @Override
-    public void configure(JobConf job) {
-      try {
-        initialize(job);
-      } catch (IOException exception) {
-        throw new RuntimeException("Failed to initialize.", exception);
-      }
-    }
-
-    @Override
-    protected void initialize(JobConf job) throws IOException {
-      initialize(job, "exampleJobConfigurableTable");
-    }
-  }
-
-
-  public static class ExampleTIF extends TableInputFormatBase {
-
-    @Override
-    protected void initialize(JobConf job) throws IOException {
-      initialize(job, "exampleTable");
-    }
-
-    protected void initialize(JobConf job, String table) throws IOException {
-      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
-      TableName tableName = TableName.valueOf(table);
-      // mandatory
-      initializeTable(connection, tableName);
-      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-        Bytes.toBytes("columnB") };
-      // mandatory
-      setInputColumns(inputColumns);
-      Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-      // optional
-      setRowFilter(exampleFilter);
-    }
-
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
deleted file mode 100644
index 3f905cf..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TestTableMapReduceBase;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-@SuppressWarnings("deprecation")
-public class TestTableMapReduce extends TestTableMapReduceBase {
-  private static final Log LOG =
-    LogFactory.getLog(TestTableMapReduce.class.getName());
-
-  protected Log getLog() { return LOG; }
-
-  /**
-   * Pass the given key and processed record reduce
-   */
-  static class ProcessContentsMapper extends MapReduceBase implements
-      TableMap<ImmutableBytesWritable, Put> {
-
-    /**
-     * Pass the key, and reversed value to reduce
-     */
-    public void map(ImmutableBytesWritable key, Result value,
-      OutputCollector<ImmutableBytesWritable, Put> output,
-      Reporter reporter)
-    throws IOException {
-      output.collect(key, TestTableMapReduceBase.map(key, value));
-    }
-  }
-
-  @Override
-  protected void runTestOnTable(Table table) throws IOException {
-    JobConf jobConf = null;
-    try {
-      LOG.info("Before map/reduce startup");
-      jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
-      jobConf.setJobName("process column contents");
-      jobConf.setNumReduceTasks(1);
-      TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(),
-        Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
-        ImmutableBytesWritable.class, Put.class, jobConf);
-      TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(),
-        IdentityTableReduce.class, jobConf);
-
-      LOG.info("Started " + table.getName());
-      RunningJob job = JobClient.runJob(jobConf);
-      assertTrue(job.isSuccessful());
-      LOG.info("After map/reduce completion");
-
-      // verify map-reduce results
-      verify(table.getName());
-    } finally {
-      if (jobConf != null) {
-        FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
-      }
-    }
-  }
-}
-

[07/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
deleted file mode 100644
index 87522b6..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
+++ /dev/null
@@ -1,1495 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.Callable;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.HadoopShims;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.PerformanceEvaluation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.TagUtil;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.Store;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
-import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.ReflectionUtils;
-import org.apache.hadoop.hbase.util.Writables;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
-import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-import org.mockito.Mockito;
-
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-/**
- * Simple test for {@link HFileOutputFormat2}.
- * Sets up and runs a mapreduce job that writes hfile output.
- * Creates a few inner classes to implement splits and an inputformat that
- * emits keys and values like those of {@link PerformanceEvaluation}.
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestHFileOutputFormat2  {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  private final static int ROWSPERSPLIT = 1024;
-
-  private static final byte[][] FAMILIES
-    = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
-          , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
-  private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2",
-          "TestTable3").map(TableName::valueOf).toArray(TableName[]::new);
-
-  private HBaseTestingUtility util = new HBaseTestingUtility();
-
-  private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
-
-  /**
-   * Simple mapper that makes KeyValue output.
-   */
-  static class RandomKVGeneratingMapper
-      extends Mapper<NullWritable, NullWritable,
-                 ImmutableBytesWritable, Cell> {
-
-    private int keyLength;
-    private static final int KEYLEN_DEFAULT=10;
-    private static final String KEYLEN_CONF="randomkv.key.length";
-
-    private int valLength;
-    private static final int VALLEN_DEFAULT=10;
-    private static final String VALLEN_CONF="randomkv.val.length";
-    private static final byte [] QUALIFIER = Bytes.toBytes("data");
-    private boolean multiTableMapper = false;
-    private TableName[] tables = null;
-
-
-    @Override
-    protected void setup(Context context) throws IOException,
-        InterruptedException {
-      super.setup(context);
-
-      Configuration conf = context.getConfiguration();
-      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
-      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
-      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
-              false);
-      if (multiTableMapper) {
-        tables = TABLE_NAMES;
-      } else {
-        tables = new TableName[]{TABLE_NAMES[0]};
-      }
-    }
-
-    @Override
-    protected void map(
-        NullWritable n1, NullWritable n2,
-        Mapper<NullWritable, NullWritable,
-               ImmutableBytesWritable,Cell>.Context context)
-        throws java.io.IOException ,InterruptedException
-    {
-
-      byte keyBytes[] = new byte[keyLength];
-      byte valBytes[] = new byte[valLength];
-
-      int taskId = context.getTaskAttemptID().getTaskID().getId();
-      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
-      Random random = new Random();
-      byte[] key;
-      for (int j = 0; j < tables.length; ++j) {
-        for (int i = 0; i < ROWSPERSPLIT; i++) {
-          random.nextBytes(keyBytes);
-          // Ensure that unique tasks generate unique keys
-          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
-          random.nextBytes(valBytes);
-          key = keyBytes;
-          if (multiTableMapper) {
-            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
-          }
-
-          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
-            Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
-            context.write(new ImmutableBytesWritable(key), kv);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Simple mapper that makes Put output.
-   */
-  static class RandomPutGeneratingMapper
-      extends Mapper<NullWritable, NullWritable,
-                 ImmutableBytesWritable, Put> {
-
-    private int keyLength;
-    private static final int KEYLEN_DEFAULT = 10;
-    private static final String KEYLEN_CONF = "randomkv.key.length";
-
-    private int valLength;
-    private static final int VALLEN_DEFAULT = 10;
-    private static final String VALLEN_CONF = "randomkv.val.length";
-    private static final byte[] QUALIFIER = Bytes.toBytes("data");
-    private boolean multiTableMapper = false;
-    private TableName[] tables = null;
-
-    @Override
-    protected void setup(Context context) throws IOException,
-            InterruptedException {
-      super.setup(context);
-
-      Configuration conf = context.getConfiguration();
-      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
-      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
-      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
-              false);
-      if (multiTableMapper) {
-        tables = TABLE_NAMES;
-      } else {
-        tables = new TableName[]{TABLE_NAMES[0]};
-      }
-    }
-
-    @Override
-    protected void map(
-            NullWritable n1, NullWritable n2,
-            Mapper<NullWritable, NullWritable,
-                    ImmutableBytesWritable, Put>.Context context)
-            throws java.io.IOException, InterruptedException {
-
-      byte keyBytes[] = new byte[keyLength];
-      byte valBytes[] = new byte[valLength];
-
-      int taskId = context.getTaskAttemptID().getTaskID().getId();
-      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
-
-      Random random = new Random();
-      byte[] key;
-      for (int j = 0; j < tables.length; ++j) {
-        for (int i = 0; i < ROWSPERSPLIT; i++) {
-          random.nextBytes(keyBytes);
-          // Ensure that unique tasks generate unique keys
-          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
-          random.nextBytes(valBytes);
-          key = keyBytes;
-          if (multiTableMapper) {
-            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
-          }
-
-          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
-            Put p = new Put(keyBytes);
-            p.addColumn(family, QUALIFIER, valBytes);
-            // set TTL to very low so that the scan does not return any value
-            p.setTTL(1l);
-            context.write(new ImmutableBytesWritable(key), p);
-          }
-        }
-      }
-    }
-  }
-
-  private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
-    if (putSortReducer) {
-      job.setInputFormatClass(NMapInputFormat.class);
-      job.setMapperClass(RandomPutGeneratingMapper.class);
-      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-      job.setMapOutputValueClass(Put.class);
-    } else {
-      job.setInputFormatClass(NMapInputFormat.class);
-      job.setMapperClass(RandomKVGeneratingMapper.class);
-      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-      job.setMapOutputValueClass(KeyValue.class);
-    }
-  }
-
-  /**
-   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
-   * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
-   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void test_LATEST_TIMESTAMP_isReplaced()
-  throws Exception {
-    Configuration conf = new Configuration(this.util.getConfiguration());
-    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
-    TaskAttemptContext context = null;
-    Path dir =
-      util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
-    try {
-      Job job = new Job(conf);
-      FileOutputFormat.setOutputPath(job, dir);
-      context = createTestTaskAttemptContext(job);
-      HFileOutputFormat2 hof = new HFileOutputFormat2();
-      writer = hof.getRecordWriter(context);
-      final byte [] b = Bytes.toBytes("b");
-
-      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
-      // changed by call to write.  Check all in kv is same but ts.
-      KeyValue kv = new KeyValue(b, b, b);
-      KeyValue original = kv.clone();
-      writer.write(new ImmutableBytesWritable(), kv);
-      assertFalse(original.equals(kv));
-      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
-      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
-      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
-      assertNotSame(original.getTimestamp(), kv.getTimestamp());
-      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
-
-      // Test 2. Now test passing a kv that has explicit ts.  It should not be
-      // changed by call to record write.
-      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
-      original = kv.clone();
-      writer.write(new ImmutableBytesWritable(), kv);
-      assertTrue(original.equals(kv));
-    } finally {
-      if (writer != null && context != null) writer.close(context);
-      dir.getFileSystem(conf).delete(dir, true);
-    }
-  }
-
-  private TaskAttemptContext createTestTaskAttemptContext(final Job job)
-  throws Exception {
-    HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
-    TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
-      job, "attempt_201402131733_0001_m_000000_0");
-    return context;
-  }
-
-  /*
-   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
-   * metadata used by time-restricted scans.
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void test_TIMERANGE() throws Exception {
-    Configuration conf = new Configuration(this.util.getConfiguration());
-    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
-    TaskAttemptContext context = null;
-    Path dir =
-      util.getDataTestDir("test_TIMERANGE_present");
-    LOG.info("Timerange dir writing to dir: "+ dir);
-    try {
-      // build a record writer using HFileOutputFormat2
-      Job job = new Job(conf);
-      FileOutputFormat.setOutputPath(job, dir);
-      context = createTestTaskAttemptContext(job);
-      HFileOutputFormat2 hof = new HFileOutputFormat2();
-      writer = hof.getRecordWriter(context);
-
-      // Pass two key values with explicit times stamps
-      final byte [] b = Bytes.toBytes("b");
-
-      // value 1 with timestamp 2000
-      KeyValue kv = new KeyValue(b, b, b, 2000, b);
-      KeyValue original = kv.clone();
-      writer.write(new ImmutableBytesWritable(), kv);
-      assertEquals(original,kv);
-
-      // value 2 with timestamp 1000
-      kv = new KeyValue(b, b, b, 1000, b);
-      original = kv.clone();
-      writer.write(new ImmutableBytesWritable(), kv);
-      assertEquals(original, kv);
-
-      // verify that the file has the proper FileInfo.
-      writer.close(context);
-
-      // the generated file lives 1 directory down from the attempt directory
-      // and is the only file, e.g.
-      // _attempt__0000_r_000000_0/b/1979617994050536795
-      FileSystem fs = FileSystem.get(conf);
-      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
-      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
-      FileStatus[] file = fs.listStatus(sub1[0].getPath());
-
-      // open as HFile Reader and pull out TIMERANGE FileInfo.
-      HFile.Reader rd =
-          HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
-      Map<byte[],byte[]> finfo = rd.loadFileInfo();
-      byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8"));
-      assertNotNull(range);
-
-      // unmarshall and check values.
-      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
-      Writables.copyWritable(range, timeRangeTracker);
-      LOG.info(timeRangeTracker.getMin() +
-          "...." + timeRangeTracker.getMax());
-      assertEquals(1000, timeRangeTracker.getMin());
-      assertEquals(2000, timeRangeTracker.getMax());
-      rd.close();
-    } finally {
-      if (writer != null && context != null) writer.close(context);
-      dir.getFileSystem(conf).delete(dir, true);
-    }
-  }
-
-  /**
-   * Run small MR job.
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testWritingPEData() throws Exception {
-    Configuration conf = util.getConfiguration();
-    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
-    FileSystem fs = testDir.getFileSystem(conf);
-
-    // Set down this value or we OOME in eclipse.
-    conf.setInt("mapreduce.task.io.sort.mb", 20);
-    // Write a few files.
-    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
-
-    Job job = new Job(conf, "testWritingPEData");
-    setupRandomGeneratorMapper(job, false);
-    // This partitioner doesn't work well for number keys but using it anyways
-    // just to demonstrate how to configure it.
-    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
-    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
-
-    Arrays.fill(startKey, (byte)0);
-    Arrays.fill(endKey, (byte)0xff);
-
-    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
-    // Set start and end rows for partitioner.
-    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
-    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
-    job.setReducerClass(KeyValueSortReducer.class);
-    job.setOutputFormatClass(HFileOutputFormat2.class);
-    job.setNumReduceTasks(4);
-    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
-        KeyValueSerialization.class.getName());
-
-    FileOutputFormat.setOutputPath(job, testDir);
-    assertTrue(job.waitForCompletion(false));
-    FileStatus [] files = fs.listStatus(testDir);
-    assertTrue(files.length > 0);
-  }
-
-  /**
-   * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
-   * hfile.
-   */
-  @Test
-  public void test_WritingTagData()
-      throws Exception {
-    Configuration conf = new Configuration(this.util.getConfiguration());
-    final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
-    conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
-    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
-    TaskAttemptContext context = null;
-    Path dir =
-        util.getDataTestDir("WritingTagData");
-    try {
-      conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
-      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
-      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
-      Job job = new Job(conf);
-      FileOutputFormat.setOutputPath(job, dir);
-      context = createTestTaskAttemptContext(job);
-      HFileOutputFormat2 hof = new HFileOutputFormat2();
-      writer = hof.getRecordWriter(context);
-      final byte [] b = Bytes.toBytes("b");
-
-      List< Tag > tags = new ArrayList<>();
-      tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
-      KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
-      writer.write(new ImmutableBytesWritable(), kv);
-      writer.close(context);
-      writer = null;
-      FileSystem fs = dir.getFileSystem(conf);
-      RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
-      while(iterator.hasNext()) {
-        LocatedFileStatus keyFileStatus = iterator.next();
-        HFile.Reader reader =
-            HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
-        HFileScanner scanner = reader.getScanner(false, false, false);
-        scanner.seekTo();
-        Cell cell = scanner.getCell();
-        List<Tag> tagsFromCell = TagUtil.asList(cell.getTagsArray(), cell.getTagsOffset(),
-            cell.getTagsLength());
-        assertTrue(tagsFromCell.size() > 0);
-        for (Tag tag : tagsFromCell) {
-          assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
-        }
-      }
-    } finally {
-      if (writer != null && context != null) writer.close(context);
-      dir.getFileSystem(conf).delete(dir, true);
-    }
-  }
-
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testJobConfiguration() throws Exception {
-    Configuration conf = new Configuration(this.util.getConfiguration());
-    conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
-        .toString());
-    Job job = new Job(conf);
-    job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
-    Table table = Mockito.mock(Table.class);
-    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
-    setupMockStartKeys(regionLocator);
-    setupMockTableName(regionLocator);
-    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
-    assertEquals(job.getNumReduceTasks(), 4);
-  }
-
-  private byte [][] generateRandomStartKeys(int numKeys) {
-    Random random = new Random();
-    byte[][] ret = new byte[numKeys][];
-    // first region start key is always empty
-    ret[0] = HConstants.EMPTY_BYTE_ARRAY;
-    for (int i = 1; i < numKeys; i++) {
-      ret[i] =
-        PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
-    }
-    return ret;
-  }
-
-  private byte[][] generateRandomSplitKeys(int numKeys) {
-    Random random = new Random();
-    byte[][] ret = new byte[numKeys][];
-    for (int i = 0; i < numKeys; i++) {
-      ret[i] =
-          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
-    }
-    return ret;
-  }
-
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testMRIncrementalLoad() throws Exception {
-    LOG.info("\nStarting test testMRIncrementalLoad\n");
-    doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
-  }
-
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testMRIncrementalLoadWithSplit() throws Exception {
-    LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
-    doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
-  }
-
-  /**
-   * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true
-   * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY
-   * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports),
-   * it's not possible to check the region locality by comparing region locations and DN hostnames.
-   * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does),
-   * we could test region locality features more easily.
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testMRIncrementalLoadWithLocality() throws Exception {
-    LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
-    doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
-    doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
-  }
-
-  //@Ignore("Wahtevs")
-  @Test
-  public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
-    LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
-    doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
-  }
-
-  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
-                                     boolean putSortReducer, String tableStr) throws Exception {
-      doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer,
-              Arrays.asList(tableStr));
-  }
-
-  @Test
-  public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception {
-    LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n");
-    doIncrementalLoadTest(false, false, true,
-            Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList
-                    ()));
-  }
-
-  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
-      boolean putSortReducer, List<String> tableStr) throws Exception {
-    util = new HBaseTestingUtility();
-    Configuration conf = util.getConfiguration();
-    conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
-    int hostCount = 1;
-    int regionNum = 5;
-    if (shouldKeepLocality) {
-      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
-      // explicit hostnames parameter just like MiniDFSCluster does.
-      hostCount = 3;
-      regionNum = 20;
-    }
-
-    String[] hostnames = new String[hostCount];
-    for (int i = 0; i < hostCount; ++i) {
-      hostnames[i] = "datanode_" + i;
-    }
-    util.startMiniCluster(1, hostCount, hostnames);
-
-    Map<String, Table> allTables = new HashMap<>(tableStr.size());
-    List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
-    boolean writeMultipleTables = tableStr.size() > 1;
-    for (String tableStrSingle : tableStr) {
-      byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
-      TableName tableName = TableName.valueOf(tableStrSingle);
-      Table table = util.createTable(tableName, FAMILIES, splitKeys);
-
-      RegionLocator r = util.getConnection().getRegionLocator(tableName);
-      assertEquals("Should start with empty table", 0, util.countRows(table));
-      int numRegions = r.getStartKeys().length;
-      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
-
-      allTables.put(tableStrSingle, table);
-      tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r));
-    }
-    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
-    // Generate the bulk load files
-    runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
-
-    for (Table tableSingle : allTables.values()) {
-      // This doesn't write into the table, just makes files
-      assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
-    }
-    int numTableDirs = 0;
-    for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) {
-      Path tablePath = testDir;
-
-      if (writeMultipleTables) {
-        if (allTables.containsKey(tf.getPath().getName())) {
-          ++numTableDirs;
-          tablePath = tf.getPath();
-        }
-        else {
-          continue;
-        }
-      }
-
-      // Make sure that a directory was created for every CF
-      int dir = 0;
-      for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) {
-        for (byte[] family : FAMILIES) {
-          if (Bytes.toString(family).equals(f.getPath().getName())) {
-            ++dir;
-          }
-        }
-      }
-      assertEquals("Column family not found in FS.", FAMILIES.length, dir);
-    }
-    if (writeMultipleTables) {
-      assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
-    }
-
-    Admin admin = util.getConnection().getAdmin();
-    try {
-      // handle the split case
-      if (shouldChangeRegions) {
-        Table chosenTable = allTables.values().iterator().next();
-        // Choose a semi-random table if multiple tables are available
-        LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
-        admin.disableTable(chosenTable.getName());
-        util.waitUntilNoRegionsInTransition();
-
-        util.deleteTable(chosenTable.getName());
-        byte[][] newSplitKeys = generateRandomSplitKeys(14);
-        Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
-
-        while (util.getConnection().getRegionLocator(chosenTable.getName())
-                .getAllRegionLocations().size() != 15 ||
-                !admin.isTableAvailable(table.getName())) {
-          Thread.sleep(200);
-          LOG.info("Waiting for new region assignment to happen");
-        }
-      }
-
-      // Perform the actual load
-      for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
-        Path tableDir = testDir;
-        String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString();
-        LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr);
-        if (writeMultipleTables) {
-          tableDir = new Path(testDir, tableNameStr);
-        }
-        Table currentTable = allTables.get(tableNameStr);
-        TableName currentTableName = currentTable.getName();
-        new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo
-                .getRegionLocator());
-
-        // Ensure data shows up
-        int expectedRows = 0;
-        if (putSortReducer) {
-          // no rows should be extracted
-          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
-                  util.countRows(currentTable));
-        } else {
-          expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
-          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
-                  util.countRows(currentTable));
-          Scan scan = new Scan();
-          ResultScanner results = currentTable.getScanner(scan);
-          for (Result res : results) {
-            assertEquals(FAMILIES.length, res.rawCells().length);
-            Cell first = res.rawCells()[0];
-            for (Cell kv : res.rawCells()) {
-              assertTrue(CellUtil.matchingRow(first, kv));
-              assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
-            }
-          }
-          results.close();
-        }
-        String tableDigestBefore = util.checksumRows(currentTable);
-        // Check region locality
-        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
-        for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
-          hbd.add(region.getHDFSBlocksDistribution());
-        }
-        for (String hostname : hostnames) {
-          float locality = hbd.getBlockLocalityIndex(hostname);
-          LOG.info("locality of [" + hostname + "]: " + locality);
-          assertEquals(100, (int) (locality * 100));
-        }
-
-        // Cause regions to reopen
-        admin.disableTable(currentTableName);
-        while (!admin.isTableDisabled(currentTableName)) {
-          Thread.sleep(200);
-          LOG.info("Waiting for table to disable");
-        }
-        admin.enableTable(currentTableName);
-        util.waitTableAvailable(currentTableName);
-        assertEquals("Data should remain after reopening of regions",
-                tableDigestBefore, util.checksumRows(currentTable));
-      }
-    } finally {
-      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
-          tableInfoSingle.getRegionLocator().close();
-      }
-      for (Entry<String, Table> singleTable : allTables.entrySet() ) {
-        singleTable.getValue().close();
-        util.deleteTable(singleTable.getValue().getName());
-      }
-      testDir.getFileSystem(conf).delete(testDir, true);
-      util.shutdownMiniCluster();
-    }
-  }
-
-  private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
-                                    boolean putSortReducer) throws IOException,
-          InterruptedException, ClassNotFoundException {
-    Job job = new Job(conf, "testLocalMRIncrementalLoad");
-    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
-    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
-        KeyValueSerialization.class.getName());
-    setupRandomGeneratorMapper(job, putSortReducer);
-    if (tableInfo.size() > 1) {
-      MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
-      int sum = 0;
-      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
-        sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
-      }
-      assertEquals(sum, job.getNumReduceTasks());
-    }
-    else {
-      RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
-      HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
-              regionLocator);
-      assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
-    }
-
-    FileOutputFormat.setOutputPath(job, outDir);
-
-    assertFalse(util.getTestFileSystem().exists(outDir)) ;
-
-    assertTrue(job.waitForCompletion(true));
-  }
-
-  /**
-   * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and
-   * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
-   * Tests that the compression map is correctly serialized into
-   * and deserialized from configuration
-   *
-   * @throws IOException
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
-    for (int numCfs = 0; numCfs <= 3; numCfs++) {
-      Configuration conf = new Configuration(this.util.getConfiguration());
-      Map<String, Compression.Algorithm> familyToCompression =
-          getMockColumnFamiliesForCompression(numCfs);
-      Table table = Mockito.mock(Table.class);
-      setupMockColumnFamiliesForCompression(table, familyToCompression);
-      conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY,
-              HFileOutputFormat2.serializeColumnFamilyAttribute
-                      (HFileOutputFormat2.compressionDetails,
-                              Arrays.asList(table.getTableDescriptor())));
-
-      // read back family specific compression setting from the configuration
-      Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
-          .createFamilyCompressionMap(conf);
-
-      // test that we have a value for all column families that matches with the
-      // used mock values
-      for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
-        assertEquals("Compression configuration incorrect for column family:"
-            + entry.getKey(), entry.getValue(),
-            retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8")));
-      }
-    }
-  }
-
-  private void setupMockColumnFamiliesForCompression(Table table,
-      Map<String, Compression.Algorithm> familyToCompression) throws IOException {
-    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
-    for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
-      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
-          .setMaxVersions(1)
-          .setCompressionType(entry.getValue())
-          .setBlockCacheEnabled(false)
-          .setTimeToLive(0));
-    }
-    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
-  }
-
-  /**
-   * @return a map from column family names to compression algorithms for
-   *         testing column family compression. Column family names have special characters
-   */
-  private Map<String, Compression.Algorithm>
-      getMockColumnFamiliesForCompression (int numCfs) {
-    Map<String, Compression.Algorithm> familyToCompression = new HashMap<>();
-    // use column family names having special characters
-    if (numCfs-- > 0) {
-      familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
-    }
-    if (numCfs-- > 0) {
-      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
-    }
-    if (numCfs-- > 0) {
-      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
-    }
-    if (numCfs-- > 0) {
-      familyToCompression.put("Family3", Compression.Algorithm.NONE);
-    }
-    return familyToCompression;
-  }
-
-
-  /**
-   * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and
-   * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}.
-   * Tests that the compression map is correctly serialized into
-   * and deserialized from configuration
-   *
-   * @throws IOException
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
-    for (int numCfs = 0; numCfs <= 2; numCfs++) {
-      Configuration conf = new Configuration(this.util.getConfiguration());
-      Map<String, BloomType> familyToBloomType =
-          getMockColumnFamiliesForBloomType(numCfs);
-      Table table = Mockito.mock(Table.class);
-      setupMockColumnFamiliesForBloomType(table,
-          familyToBloomType);
-      conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY,
-              HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails,
-              Arrays.asList(table.getTableDescriptor())));
-
-      // read back family specific data block encoding settings from the
-      // configuration
-      Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
-          HFileOutputFormat2
-              .createFamilyBloomTypeMap(conf);
-
-      // test that we have a value for all column families that matches with the
-      // used mock values
-      for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
-        assertEquals("BloomType configuration incorrect for column family:"
-            + entry.getKey(), entry.getValue(),
-            retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8")));
-      }
-    }
-  }
-
-  private void setupMockColumnFamiliesForBloomType(Table table,
-      Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
-    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
-    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
-      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
-          .setMaxVersions(1)
-          .setBloomFilterType(entry.getValue())
-          .setBlockCacheEnabled(false)
-          .setTimeToLive(0));
-    }
-    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
-  }
-
-  /**
-   * @return a map from column family names to compression algorithms for
-   *         testing column family compression. Column family names have special characters
-   */
-  private Map<String, BloomType>
-  getMockColumnFamiliesForBloomType (int numCfs) {
-    Map<String, BloomType> familyToBloomType = new HashMap<>();
-    // use column family names having special characters
-    if (numCfs-- > 0) {
-      familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
-    }
-    if (numCfs-- > 0) {
-      familyToBloomType.put("Family2=asdads&!AASD",
-          BloomType.ROWCOL);
-    }
-    if (numCfs-- > 0) {
-      familyToBloomType.put("Family3", BloomType.NONE);
-    }
-    return familyToBloomType;
-  }
-
-  /**
-   * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and
-   * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}.
-   * Tests that the compression map is correctly serialized into
-   * and deserialized from configuration
-   *
-   * @throws IOException
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
-    for (int numCfs = 0; numCfs <= 3; numCfs++) {
-      Configuration conf = new Configuration(this.util.getConfiguration());
-      Map<String, Integer> familyToBlockSize =
-          getMockColumnFamiliesForBlockSize(numCfs);
-      Table table = Mockito.mock(Table.class);
-      setupMockColumnFamiliesForBlockSize(table,
-          familyToBlockSize);
-      conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY,
-              HFileOutputFormat2.serializeColumnFamilyAttribute
-                      (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table
-                              .getTableDescriptor())));
-
-      // read back family specific data block encoding settings from the
-      // configuration
-      Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
-          HFileOutputFormat2
-              .createFamilyBlockSizeMap(conf);
-
-      // test that we have a value for all column families that matches with the
-      // used mock values
-      for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
-          ) {
-        assertEquals("BlockSize configuration incorrect for column family:"
-            + entry.getKey(), entry.getValue(),
-            retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8")));
-      }
-    }
-  }
-
-  private void setupMockColumnFamiliesForBlockSize(Table table,
-      Map<String, Integer> familyToDataBlockEncoding) throws IOException {
-    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
-    for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
-      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
-          .setMaxVersions(1)
-          .setBlocksize(entry.getValue())
-          .setBlockCacheEnabled(false)
-          .setTimeToLive(0));
-    }
-    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
-  }
-
-  /**
-   * @return a map from column family names to compression algorithms for
-   *         testing column family compression. Column family names have special characters
-   */
-  private Map<String, Integer>
-  getMockColumnFamiliesForBlockSize (int numCfs) {
-    Map<String, Integer> familyToBlockSize = new HashMap<>();
-    // use column family names having special characters
-    if (numCfs-- > 0) {
-      familyToBlockSize.put("Family1!@#!@#&", 1234);
-    }
-    if (numCfs-- > 0) {
-      familyToBlockSize.put("Family2=asdads&!AASD",
-          Integer.MAX_VALUE);
-    }
-    if (numCfs-- > 0) {
-      familyToBlockSize.put("Family2=asdads&!AASD",
-          Integer.MAX_VALUE);
-    }
-    if (numCfs-- > 0) {
-      familyToBlockSize.put("Family3", 0);
-    }
-    return familyToBlockSize;
-  }
-
-  /**
-   * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
-   * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
-   * Tests that the compression map is correctly serialized into
-   * and deserialized from configuration
-   *
-   * @throws IOException
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
-    for (int numCfs = 0; numCfs <= 3; numCfs++) {
-      Configuration conf = new Configuration(this.util.getConfiguration());
-      Map<String, DataBlockEncoding> familyToDataBlockEncoding =
-          getMockColumnFamiliesForDataBlockEncoding(numCfs);
-      Table table = Mockito.mock(Table.class);
-      setupMockColumnFamiliesForDataBlockEncoding(table,
-          familyToDataBlockEncoding);
-      HTableDescriptor tableDescriptor = table.getTableDescriptor();
-      conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
-              HFileOutputFormat2.serializeColumnFamilyAttribute
-                      (HFileOutputFormat2.dataBlockEncodingDetails, Arrays
-                      .asList(tableDescriptor)));
-
-      // read back family specific data block encoding settings from the
-      // configuration
-      Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
-          HFileOutputFormat2
-          .createFamilyDataBlockEncodingMap(conf);
-
-      // test that we have a value for all column families that matches with the
-      // used mock values
-      for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
-        assertEquals("DataBlockEncoding configuration incorrect for column family:"
-            + entry.getKey(), entry.getValue(),
-            retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8")));
-      }
-    }
-  }
-
-  private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
-      Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
-    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
-    for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
-      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
-          .setMaxVersions(1)
-          .setDataBlockEncoding(entry.getValue())
-          .setBlockCacheEnabled(false)
-          .setTimeToLive(0));
-    }
-    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
-  }
-
-  /**
-   * @return a map from column family names to compression algorithms for
-   *         testing column family compression. Column family names have special characters
-   */
-  private Map<String, DataBlockEncoding>
-      getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
-    Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>();
-    // use column family names having special characters
-    if (numCfs-- > 0) {
-      familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
-    }
-    if (numCfs-- > 0) {
-      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
-          DataBlockEncoding.FAST_DIFF);
-    }
-    if (numCfs-- > 0) {
-      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
-          DataBlockEncoding.PREFIX);
-    }
-    if (numCfs-- > 0) {
-      familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
-    }
-    return familyToDataBlockEncoding;
-  }
-
-  private void setupMockStartKeys(RegionLocator table) throws IOException {
-    byte[][] mockKeys = new byte[][] {
-        HConstants.EMPTY_BYTE_ARRAY,
-        Bytes.toBytes("aaa"),
-        Bytes.toBytes("ggg"),
-        Bytes.toBytes("zzz")
-    };
-    Mockito.doReturn(mockKeys).when(table).getStartKeys();
-  }
-
-  private void setupMockTableName(RegionLocator table) throws IOException {
-    TableName mockTableName = TableName.valueOf("mock_table");
-    Mockito.doReturn(mockTableName).when(table).getName();
-  }
-
-  /**
-   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
-   * bloom filter settings from the column family descriptor
-   */
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testColumnFamilySettings() throws Exception {
-    Configuration conf = new Configuration(this.util.getConfiguration());
-    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
-    TaskAttemptContext context = null;
-    Path dir = util.getDataTestDir("testColumnFamilySettings");
-
-    // Setup table descriptor
-    Table table = Mockito.mock(Table.class);
-    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
-    HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]);
-    Mockito.doReturn(htd).when(table).getTableDescriptor();
-    for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
-      htd.addFamily(hcd);
-    }
-
-    // set up the table to return some mock keys
-    setupMockStartKeys(regionLocator);
-
-    try {
-      // partial map red setup to get an operational writer for testing
-      // We turn off the sequence file compression, because DefaultCodec
-      // pollutes the GZip codec pool with an incompatible compressor.
-      conf.set("io.seqfile.compression.type", "NONE");
-      conf.set("hbase.fs.tmp.dir", dir.toString());
-      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
-      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
-
-      Job job = new Job(conf, "testLocalMRIncrementalLoad");
-      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
-      setupRandomGeneratorMapper(job, false);
-      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
-      FileOutputFormat.setOutputPath(job, dir);
-      context = createTestTaskAttemptContext(job);
-      HFileOutputFormat2 hof = new HFileOutputFormat2();
-      writer = hof.getRecordWriter(context);
-
-      // write out random rows
-      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
-      writer.close(context);
-
-      // Make sure that a directory was created for every CF
-      FileSystem fs = dir.getFileSystem(conf);
-
-      // commit so that the filesystem has one directory per column family
-      hof.getOutputCommitter(context).commitTask(context);
-      hof.getOutputCommitter(context).commitJob(context);
-      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
-      assertEquals(htd.getFamilies().size(), families.length);
-      for (FileStatus f : families) {
-        String familyStr = f.getPath().getName();
-        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
-        // verify that the compression on this file matches the configured
-        // compression
-        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
-        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
-        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
-
-        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
-        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
-        assertEquals("Incorrect bloom filter used for column family " + familyStr +
-          "(reader: " + reader + ")",
-          hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
-        assertEquals("Incorrect compression used for column family " + familyStr +
-          "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
-      }
-    } finally {
-      dir.getFileSystem(conf).delete(dir, true);
-    }
-  }
-
-  /**
-   * Write random values to the writer assuming a table created using
-   * {@link #FAMILIES} as column family descriptors
-   */
-  private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
-      TaskAttemptContext context, Set<byte[]> families, int numRows)
-      throws IOException, InterruptedException {
-    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
-    int valLength = 10;
-    byte valBytes[] = new byte[valLength];
-
-    int taskId = context.getTaskAttemptID().getTaskID().getId();
-    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
-    final byte [] qualifier = Bytes.toBytes("data");
-    Random random = new Random();
-    for (int i = 0; i < numRows; i++) {
-
-      Bytes.putInt(keyBytes, 0, i);
-      random.nextBytes(valBytes);
-      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
-
-      for (byte[] family : families) {
-        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
-        writer.write(key, kv);
-      }
-    }
-  }
-
-  /**
-   * This test is to test the scenario happened in HBASE-6901.
-   * All files are bulk loaded and excluded from minor compaction.
-   * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
-   * will be thrown.
-   */
-  @Ignore ("Flakey: See HBASE-9051") @Test
-  public void testExcludeAllFromMinorCompaction() throws Exception {
-    Configuration conf = util.getConfiguration();
-    conf.setInt("hbase.hstore.compaction.min", 2);
-    generateRandomStartKeys(5);
-
-    util.startMiniCluster();
-    try (Connection conn = ConnectionFactory.createConnection();
-        Admin admin = conn.getAdmin();
-        Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
-        RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) {
-      final FileSystem fs = util.getDFSCluster().getFileSystem();
-      assertEquals("Should start with empty table", 0, util.countRows(table));
-
-      // deep inspection: get the StoreFile dir
-      final Path storePath = new Path(
-        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
-          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
-            Bytes.toString(FAMILIES[0])));
-      assertEquals(0, fs.listStatus(storePath).length);
-
-      // Generate two bulk load files
-      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
-          true);
-
-      for (int i = 0; i < 2; i++) {
-        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
-        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
-                .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false);
-        // Perform the actual load
-        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
-      }
-
-      // Ensure data shows up
-      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
-      assertEquals("LoadIncrementalHFiles should put expected data in table",
-          expectedRows, util.countRows(table));
-
-      // should have a second StoreFile now
-      assertEquals(2, fs.listStatus(storePath).length);
-
-      // minor compactions shouldn't get rid of the file
-      admin.compact(TABLE_NAMES[0]);
-      try {
-        quickPoll(new Callable<Boolean>() {
-          @Override
-          public Boolean call() throws Exception {
-            List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
-            for (HRegion region : regions) {
-              for (Store store : region.getStores()) {
-                store.closeAndArchiveCompactedFiles();
-              }
-            }
-            return fs.listStatus(storePath).length == 1;
-          }
-        }, 5000);
-        throw new IOException("SF# = " + fs.listStatus(storePath).length);
-      } catch (AssertionError ae) {
-        // this is expected behavior
-      }
-
-      // a major compaction should work though
-      admin.majorCompact(TABLE_NAMES[0]);
-      quickPoll(new Callable<Boolean>() {
-        @Override
-        public Boolean call() throws Exception {
-          List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
-          for (HRegion region : regions) {
-            for (Store store : region.getStores()) {
-              store.closeAndArchiveCompactedFiles();
-            }
-          }
-          return fs.listStatus(storePath).length == 1;
-        }
-      }, 5000);
-
-    } finally {
-      util.shutdownMiniCluster();
-    }
-  }
-
-  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
-  public void testExcludeMinorCompaction() throws Exception {
-    Configuration conf = util.getConfiguration();
-    conf.setInt("hbase.hstore.compaction.min", 2);
-    generateRandomStartKeys(5);
-
-    util.startMiniCluster();
-    try (Connection conn = ConnectionFactory.createConnection(conf);
-        Admin admin = conn.getAdmin()){
-      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
-      final FileSystem fs = util.getDFSCluster().getFileSystem();
-      Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
-      assertEquals("Should start with empty table", 0, util.countRows(table));
-
-      // deep inspection: get the StoreFile dir
-      final Path storePath = new Path(
-        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
-          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
-            Bytes.toString(FAMILIES[0])));
-      assertEquals(0, fs.listStatus(storePath).length);
-
-      // put some data in it and flush to create a storefile
-      Put p = new Put(Bytes.toBytes("test"));
-      p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
-      table.put(p);
-      admin.flush(TABLE_NAMES[0]);
-      assertEquals(1, util.countRows(table));
-      quickPoll(new Callable<Boolean>() {
-        @Override
-        public Boolean call() throws Exception {
-          return fs.listStatus(storePath).length == 1;
-        }
-      }, 5000);
-
-      // Generate a bulk load file with more rows
-      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
-          true);
-
-      RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]);
-      runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
-                      .getTableDescriptor(), regionLocator)), testDir, false);
-
-      // Perform the actual load
-      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
-
-      // Ensure data shows up
-      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
-      assertEquals("LoadIncrementalHFiles should put expected data in table",
-          expectedRows + 1, util.countRows(table));
-
-      // should have a second StoreFile now
-      assertEquals(2, fs.listStatus(storePath).length);
-
-      // minor compactions shouldn't get rid of the file
-      admin.compact(TABLE_NAMES[0]);
-      try {
-        quickPoll(new Callable<Boolean>() {
-          @Override
-          public Boolean call() throws Exception {
-            return fs.listStatus(storePath).length == 1;
-          }
-        }, 5000);
-        throw new IOException("SF# = " + fs.listStatus(storePath).length);
-      } catch (AssertionError ae) {
-        // this is expected behavior
-      }
-
-      // a major compaction should work though
-      admin.majorCompact(TABLE_NAMES[0]);
-      quickPoll(new Callable<Boolean>() {
-        @Override
-        public Boolean call() throws Exception {
-          return fs.listStatus(storePath).length == 1;
-        }
-      }, 5000);
-
-    } finally {
-      util.shutdownMiniCluster();
-    }
-  }
-
-  private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
-    int sleepMs = 10;
-    int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
-    while (retries-- > 0) {
-      if (c.call().booleanValue()) {
-        return;
-      }
-      Thread.sleep(sleepMs);
-    }
-    fail();
-  }
-
-  public static void main(String args[]) throws Exception {
-    new TestHFileOutputFormat2().manualTest(args);
-  }
-
-  public void manualTest(String args[]) throws Exception {
-    Configuration conf = HBaseConfiguration.create();
-    util = new HBaseTestingUtility(conf);
-    if ("newtable".equals(args[0])) {
-      TableName tname = TableName.valueOf(args[1]);
-      byte[][] splitKeys = generateRandomSplitKeys(4);
-      Table table = util.createTable(tname, FAMILIES, splitKeys);
-    } else if ("incremental".equals(args[0])) {
-      TableName tname = TableName.valueOf(args[1]);
-      try(Connection c = ConnectionFactory.createConnection(conf);
-          Admin admin = c.getAdmin();
-          RegionLocator regionLocator = c.getRegionLocator(tname)) {
-        Path outDir = new Path("incremental-out");
-        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin
-                .getTableDescriptor(tname), regionLocator)), outDir, false);
-      }
-    } else {
-      throw new RuntimeException(
-          "usage: TestHFileOutputFormat2 newtable | incremental");
-    }
-  }
-
-  @Test
-  public void testBlockStoragePolicy() throws Exception {
-    util = new HBaseTestingUtility();
-    Configuration conf = util.getConfiguration();
-    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD");
-
-    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX +
-            Bytes.toString(HFileOutputFormat2.combineTableNameSuffix(
-                    TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD");
-    Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0]));
-    Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1]));
-    util.startMiniDFSCluster(3);
-    FileSystem fs = util.getDFSCluster().getFileSystem();
-    try {
-      fs.mkdirs(cf1Dir);
-      fs.mkdirs(cf2Dir);
-
-      // the original block storage policy would be HOT
-      String spA = getStoragePolicyName(fs, cf1Dir);
-      String spB = getStoragePolicyName(fs, cf2Dir);
-      LOG.debug("Storage policy of cf 0: [" + spA + "].");
-      LOG.debug("Storage policy of cf 1: [" + spB + "].");
-      assertEquals("HOT", spA);
-      assertEquals("HOT", spB);
-
-      // alter table cf schema to change storage policies
-      HFileOutputFormat2.configureStoragePolicy(conf, fs,
-              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir);
-      HFileOutputFormat2.configureStoragePolicy(conf, fs,
-              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir);
-      spA = getStoragePolicyName(fs, cf1Dir);
-      spB = getStoragePolicyName(fs, cf2Dir);
-      LOG.debug("Storage policy of cf 0: [" + spA + "].");
-      LOG.debug("Storage policy of cf 1: [" + spB + "].");
-      assertNotNull(spA);
-      assertEquals("ONE_SSD", spA);
-      assertNotNull(spB);
-      assertEquals("ALL_SSD", spB);
-    } finally {
-      fs.delete(cf1Dir, true);
-      fs.delete(cf2Dir, true);
-      util.shutdownMiniDFSCluster();
-    }
-  }
-
-  private String getStoragePolicyName(FileSystem fs, Path path) {
-    try {
-      Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path);
-      return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
-    } catch (Exception e) {
-      // Maybe fail because of using old HDFS version, try the old way
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Failed to get policy directly", e);
-      }
-      String policy = getStoragePolicyNameForOldHDFSVersion(fs, path);
-      return policy == null ? "HOT" : policy;// HOT by default
-    }
-  }
-
-  private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) {
-    try {
-      if (fs instanceof DistributedFileSystem) {
-        DistributedFileSystem dfs = (DistributedFileSystem) fs;
-        HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
-        if (null != status) {
-          byte storagePolicyId = status.getStoragePolicy();
-          Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
-          if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) {
-            BlockStoragePolicy[] policies = dfs.getStoragePolicies();
-            for (BlockStoragePolicy policy : policies) {
-              if (policy.getId() == storagePolicyId) {
-                return policy.getName();
-              }
-            }
-          }
-        }
-      }
-    } catch (Throwable e) {
-      LOG.warn("failed to get block storage policy of [" + path + "]", e);
-    }
-
-    return null;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
deleted file mode 100644
index 2867f13..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import static org.junit.Assert.assertEquals;
-
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestHRegionPartitioner {
-  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
-  @Rule
-  public TestName name = new TestName();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Test HRegionPartitioner
-   */
-  @Test (timeout=300000)
-  public void testHRegionPartitioner() throws Exception {
-
-    byte[][] families = { Bytes.toBytes("familyA"), Bytes.toBytes("familyB") };
-
-    UTIL.createTable(TableName.valueOf(name.getMethodName()), families, 1,
-    Bytes.toBytes("aa"), Bytes.toBytes("cc"), 3);
-
-    HRegionPartitioner<Long, Long> partitioner = new HRegionPartitioner<>();
-    Configuration configuration = UTIL.getConfiguration();
-    configuration.set(TableOutputFormat.OUTPUT_TABLE, name.getMethodName());
-    partitioner.setConf(configuration);
-    ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes("bb"));
-
-    assertEquals(1, partitioner.getPartition(writable, 10L, 3));
-    assertEquals(0, partitioner.getPartition(writable, 10L, 1));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
deleted file mode 100644
index 1f4efcd..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.MapFile;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.junit.rules.TestName;
-
-/**
- * Basic test for the HashTable M/R tool
- */
-@Category(LargeTests.class)
-public class TestHashTable {
-  
-  private static final Log LOG = LogFactory.getLog(TestHashTable.class);
-  
-  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
-  @Rule
-  public TestName name = new TestName();
-  
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    TEST_UTIL.startMiniCluster(3);
-  }
-  
-  @AfterClass
-  public static void afterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-  
-  @Test
-  public void testHashTable() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    final byte[] family = Bytes.toBytes("family");
-    final byte[] column1 = Bytes.toBytes("c1");
-    final byte[] column2 = Bytes.toBytes("c2");
-    final byte[] column3 = Bytes.toBytes("c3");
-    
-    int numRows = 100;
-    int numRegions = 10;
-    int numHashFiles = 3;
-    
-    byte[][] splitRows = new byte[numRegions-1][];
-    for (int i = 1; i < numRegions; i++) {
-      splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
-    }
-    
-    long timestamp = 1430764183454L;
-    // put rows into the first table
-    Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
-    for (int i = 0; i < numRows; i++) {
-      Put p = new Put(Bytes.toBytes(i), timestamp);
-      p.addColumn(family, column1, column1);
-      p.addColumn(family, column2, column2);
-      p.addColumn(family, column3, column3);
-      t1.put(p);
-    }
-    t1.close();
-    
-    HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
-    
-    Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
-    
-    long batchSize = 300;
-    int code = hashTable.run(new String[] { 
-        "--batchsize=" + batchSize,
-        "--numhashfiles=" + numHashFiles,
-        "--scanbatch=2",
-        tableName.getNameAsString(),
-        testDir.toString()});
-    assertEquals("test job failed", 0, code);
-    
-    FileSystem fs = TEST_UTIL.getTestFileSystem();
-    
-    HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
-    assertEquals(tableName.getNameAsString(), tableHash.tableName);
-    assertEquals(batchSize, tableHash.batchSize);
-    assertEquals(numHashFiles, tableHash.numHashFiles);
-    assertEquals(numHashFiles - 1, tableHash.partitions.size());
-    for (ImmutableBytesWritable bytes : tableHash.partitions) {
-      LOG.debug("partition: " + Bytes.toInt(bytes.get()));
-    }
-    
-    ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes
-      = ImmutableMap.<Integer, ImmutableBytesWritable>builder()
-      .put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
-      .put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
-      .put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
-      .put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881")))
-      .put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352")))
-      .put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
-      .put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
-      .put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
-      .put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
-      .put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb")))
-      .put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc")))
-      .put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
-      .put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b")))
-      .put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59")))
-      .put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
-      .put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
-      .put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095")))
-      .put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91")))
-      .put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38")))
-      .put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
-      .build();
-  
-    Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
-    Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
-    for (int i = 0; i < numHashFiles; i++) {
-      Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
-      
-      MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
-      ImmutableBytesWritable key = new ImmutableBytesWritable();
-      ImmutableBytesWritable hash = new ImmutableBytesWritable();
-      while(reader.next(key, hash)) {
-        String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
-        LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16))
-            + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
-        
-        int intKey = -1;
-        if (key.getLength() > 0) {
-          intKey = Bytes.toInt(key.get(),  key.getOffset(), key.getLength());
-        }
-        if (actualHashes.containsKey(intKey)) {
-          Assert.fail("duplicate key in data files: " + intKey);
-        }
-        actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
-      }
-      reader.close();
-    }
-    
-    FileStatus[] files = fs.listStatus(testDir);
-    for (FileStatus file : files) {
-      LOG.debug("Output file: " + file.getPath());
-    }
-    
-    files = fs.listStatus(dataDir);
-    for (FileStatus file : files) {
-      LOG.debug("Data file: " + file.getPath());
-    }
-    
-    if (!expectedHashes.equals(actualHashes)) {
-      LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
-    }
-    Assert.assertEquals(expectedHashes, actualHashes);
-    
-    TEST_UTIL.deleteTable(tableName);
-    TEST_UTIL.cleanupDataTestDirOnTestFS();
-  }
-
-
-}

[23/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
new file mode 100644
index 0000000..13b6a96
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
@@ -0,0 +1,287 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+
+
+/**
+ * <p>
+ * Tests various scan start and stop row scenarios. This is set in a scan and
+ * tested in a MapReduce job to see if that is handed over and done properly
+ * too.
+ * </p>
+ * <p>
+ * This test is broken into two parts in order to side-step the test timeout
+ * period of 900, as documented in HBASE-8326.
+ * </p>
+ */
+public abstract class TestTableInputFormatScanBase {
+
+  private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
+  static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  static final TableName TABLE_NAME = TableName.valueOf("scantest");
+  static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
+  static final String KEY_STARTROW = "startRow";
+  static final String KEY_LASTROW = "stpRow";
+
+  private static Table table = null;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
+    // this turns it off for this test.  TODO: Figure out why scr breaks recovery.
+    System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
+
+    // switch TIF to log at DEBUG level
+    TEST_UTIL.enableDebug(TableInputFormat.class);
+    TEST_UTIL.enableDebug(TableInputFormatBase.class);
+    // start mini hbase cluster
+    TEST_UTIL.startMiniCluster(3);
+    // create and fill table
+    table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILYS);
+    TEST_UTIL.loadTable(table, INPUT_FAMILYS, null, false);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Pass the key and value to reduce.
+   */
+  public static class ScanMapper
+  extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+    /**
+     * Pass the key and value to reduce.
+     *
+     * @param key  The key, here "aaa", "aab" etc.
+     * @param value  The value is the same as the key.
+     * @param context  The task context.
+     * @throws IOException When reading the rows fails.
+     */
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+      Context context)
+    throws IOException, InterruptedException {
+      if (value.size() != 2) {
+        throw new IOException("There should be two input columns");
+      }
+      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+        cfMap = value.getMap();
+
+      if (!cfMap.containsKey(INPUT_FAMILYS[0]) || !cfMap.containsKey(INPUT_FAMILYS[1])) {
+        throw new IOException("Wrong input columns. Missing: '" +
+          Bytes.toString(INPUT_FAMILYS[0]) + "' or '" + Bytes.toString(INPUT_FAMILYS[1]) + "'.");
+      }
+
+      String val0 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[0], null));
+      String val1 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[1], null));
+      LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
+               ", value -> (" + val0 + ", " + val1 + ")");
+      context.write(key, key);
+    }
+  }
+
+  /**
+   * Checks the last and first key seen against the scanner boundaries.
+   */
+  public static class ScanReducer
+  extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+                  NullWritable, NullWritable> {
+
+    private String first = null;
+    private String last = null;
+
+    protected void reduce(ImmutableBytesWritable key,
+        Iterable<ImmutableBytesWritable> values, Context context)
+    throws IOException ,InterruptedException {
+      int count = 0;
+      for (ImmutableBytesWritable value : values) {
+        String val = Bytes.toStringBinary(value.get());
+        LOG.info("reduce: key[" + count + "] -> " +
+          Bytes.toStringBinary(key.get()) + ", value -> " + val);
+        if (first == null) first = val;
+        last = val;
+        count++;
+      }
+    }
+
+    protected void cleanup(Context context)
+    throws IOException, InterruptedException {
+      Configuration c = context.getConfiguration();
+      String startRow = c.get(KEY_STARTROW);
+      String lastRow = c.get(KEY_LASTROW);
+      LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
+      LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
+      if (startRow != null && startRow.length() > 0) {
+        assertEquals(startRow, first);
+      }
+      if (lastRow != null && lastRow.length() > 0) {
+        assertEquals(lastRow, last);
+      }
+    }
+
+  }
+
+  /**
+   * Tests an MR Scan initialized from properties set in the Configuration.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  protected void testScanFromConfiguration(String start, String stop, String last)
+  throws IOException, InterruptedException, ClassNotFoundException {
+    String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
+      "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+    c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString());
+    c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", "
+          + Bytes.toString(INPUT_FAMILYS[1]));
+    c.set(KEY_STARTROW, start != null ? start : "");
+    c.set(KEY_LASTROW, last != null ? last : "");
+
+    if (start != null) {
+      c.set(TableInputFormat.SCAN_ROW_START, start);
+    }
+
+    if (stop != null) {
+      c.set(TableInputFormat.SCAN_ROW_STOP, stop);
+    }
+
+    Job job = new Job(c, jobName);
+    job.setMapperClass(ScanMapper.class);
+    job.setReducerClass(ScanReducer.class);
+    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+    job.setMapOutputValueClass(ImmutableBytesWritable.class);
+    job.setInputFormatClass(TableInputFormat.class);
+    job.setNumReduceTasks(1);
+    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+    TableMapReduceUtil.addDependencyJars(job);
+    assertTrue(job.waitForCompletion(true));
+  }
+
+  /**
+   * Tests a MR scan using specific start and stop rows.
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  protected void testScan(String start, String stop, String last)
+  throws IOException, InterruptedException, ClassNotFoundException {
+    String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
+      "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+    LOG.info("Before map/reduce startup - job " + jobName);
+    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+    Scan scan = new Scan();
+    scan.addFamily(INPUT_FAMILYS[0]);
+    scan.addFamily(INPUT_FAMILYS[1]);
+    if (start != null) {
+      scan.setStartRow(Bytes.toBytes(start));
+    }
+    c.set(KEY_STARTROW, start != null ? start : "");
+    if (stop != null) {
+      scan.setStopRow(Bytes.toBytes(stop));
+    }
+    c.set(KEY_LASTROW, last != null ? last : "");
+    LOG.info("scan before: " + scan);
+    Job job = new Job(c, jobName);
+    TableMapReduceUtil.initTableMapperJob(
+      TABLE_NAME, scan, ScanMapper.class,
+      ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+    job.setReducerClass(ScanReducer.class);
+    job.setNumReduceTasks(1); // one to get final "first" and "last" key
+    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+    LOG.info("Started " + job.getJobName());
+    assertTrue(job.waitForCompletion(true));
+    LOG.info("After map/reduce completion - job " + jobName);
+  }
+
+
+  /**
+   * Tests a MR scan using data skew auto-balance
+   *
+   * @throws IOException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   */
+  public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
+          InterruptedException,
+          ClassNotFoundException {
+    String jobName = "TestJobForNumOfSplits";
+    LOG.info("Before map/reduce startup - job " + jobName);
+    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+    Scan scan = new Scan();
+    scan.addFamily(INPUT_FAMILYS[0]);
+    scan.addFamily(INPUT_FAMILYS[1]);
+    c.set("hbase.mapreduce.input.autobalance", "true");
+    c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
+    c.set(KEY_STARTROW, "");
+    c.set(KEY_LASTROW, "");
+    Job job = new Job(c, jobName);
+    TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
+            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+    TableInputFormat tif = new TableInputFormat();
+    tif.setConf(job.getConfiguration());
+    Assert.assertEquals(TABLE_NAME, table.getName());
+    List<InputSplit> splits = tif.getSplits(job);
+    Assert.assertEquals(expectedNumOfSplits, splits.size());
+  }
+
+  /**
+   * Tests for the getSplitKey() method in TableInputFormatBase.java
+   */
+  public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
+    byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
+      Assert.assertArrayEquals(splitKey, result);
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
new file mode 100644
index 0000000..d702e0d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
@@ -0,0 +1,174 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableMapReduce extends TestTableMapReduceBase {
+  private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
+
+  @Override
+  protected Log getLog() { return LOG; }
+
+  /**
+   * Pass the given key and processed record reduce
+   */
+  static class ProcessContentsMapper extends TableMapper<ImmutableBytesWritable, Put> {
+
+    /**
+     * Pass the key, and reversed value to reduce
+     *
+     * @param key
+     * @param value
+     * @param context
+     * @throws IOException
+     */
+    @Override
+    public void map(ImmutableBytesWritable key, Result value,
+      Context context)
+    throws IOException, InterruptedException {
+      if (value.size() != 1) {
+        throw new IOException("There should only be one input column");
+      }
+      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+        cf = value.getMap();
+      if(!cf.containsKey(INPUT_FAMILY)) {
+        throw new IOException("Wrong input columns. Missing: '" +
+          Bytes.toString(INPUT_FAMILY) + "'.");
+      }
+
+      // Get the original value and reverse it
+      String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+      StringBuilder newValue = new StringBuilder(originalValue);
+      newValue.reverse();
+      // Now set the value to be collected
+      Put outval = new Put(key.get());
+      outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+      context.write(key, outval);
+    }
+  }
+
+  @Override
+  protected void runTestOnTable(Table table) throws IOException {
+    Job job = null;
+    try {
+      LOG.info("Before map/reduce startup");
+      job = new Job(table.getConfiguration(), "process column contents");
+      job.setNumReduceTasks(1);
+      Scan scan = new Scan();
+      scan.addFamily(INPUT_FAMILY);
+      TableMapReduceUtil.initTableMapperJob(
+        table.getName().getNameAsString(), scan,
+        ProcessContentsMapper.class, ImmutableBytesWritable.class,
+        Put.class, job);
+      TableMapReduceUtil.initTableReducerJob(
+          table.getName().getNameAsString(),
+        IdentityTableReducer.class, job);
+      FileOutputFormat.setOutputPath(job, new Path("test"));
+      LOG.info("Started " + table.getName().getNameAsString());
+      assertTrue(job.waitForCompletion(true));
+      LOG.info("After map/reduce completion");
+
+      // verify map-reduce results
+      verify(table.getName());
+
+      verifyJobCountersAreEmitted(job);
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    } catch (ClassNotFoundException e) {
+      throw new IOException(e);
+    } finally {
+      table.close();
+      if (job != null) {
+        FileUtil.fullyDelete(
+          new File(job.getConfiguration().get("hadoop.tmp.dir")));
+      }
+    }
+  }
+
+  /**
+   * Verify scan counters are emitted from the job
+   * @param job
+   * @throws IOException
+   */
+  private void verifyJobCountersAreEmitted(Job job) throws IOException {
+    Counters counters = job.getCounters();
+    Counter counter
+      = counters.findCounter(TableRecordReaderImpl.HBASE_COUNTER_GROUP_NAME, "RPC_CALLS");
+    assertNotNull("Unable to find Job counter for HBase scan metrics, RPC_CALLS", counter);
+    assertTrue("Counter value for RPC_CALLS should be larger than 0", counter.getValue() > 0);
+  }
+
+  @Test(expected = TableNotEnabledException.class)
+  public void testWritingToDisabledTable() throws IOException {
+
+    try (Admin admin = UTIL.getConnection().getAdmin();
+      Table table = UTIL.getConnection().getTable(TABLE_FOR_NEGATIVE_TESTS)) {
+      admin.disableTable(table.getName());
+      runTestOnTable(table);
+      fail("Should not have reached here, should have thrown an exception");
+    }
+  }
+
+  @Test(expected = TableNotFoundException.class)
+  public void testWritingToNonExistentTable() throws IOException {
+
+    try (Table table = UTIL.getConnection().getTable(TableName.valueOf("table-does-not-exist"))) {
+      runTestOnTable(table);
+      fail("Should not have reached here, should have thrown an exception");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
new file mode 100644
index 0000000..27bf063
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
@@ -0,0 +1,233 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+/**
+ * A base class for a test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of a particular cell,
+ * and write it back to the table. Implements common components between mapred and mapreduce
+ * implementations.
+ */
+public abstract class TestTableMapReduceBase {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  protected static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
+  protected static final TableName TABLE_FOR_NEGATIVE_TESTS = TableName.valueOf("testfailuretable");
+  protected static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+  protected static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
+
+  protected static final byte[][] columns = new byte[][] {
+    INPUT_FAMILY,
+    OUTPUT_FAMILY
+  };
+
+  /**
+   * Retrieve my logger instance.
+   */
+  protected abstract Log getLog();
+
+  /**
+   * Handles API-specifics for setting up and executing the job.
+   */
+  protected abstract void runTestOnTable(Table table) throws IOException;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+    Table table =
+        UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
+            OUTPUT_FAMILY });
+    UTIL.loadTable(table, INPUT_FAMILY, false);
+    UTIL.createTable(TABLE_FOR_NEGATIVE_TESTS, new byte[][] { INPUT_FAMILY, OUTPUT_FAMILY });
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.deleteTable(TABLE_FOR_NEGATIVE_TESTS);
+    UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test a map/reduce against a multi-region table
+   * @throws IOException
+   */
+  @Test
+  public void testMultiRegionTable() throws IOException {
+    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+  }
+
+  @Test
+  public void testCombiner() throws IOException {
+    Configuration conf = new Configuration(UTIL.getConfiguration());
+    // force use of combiner for testing purposes
+    conf.setInt("mapreduce.map.combine.minspills", 1);
+    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+  }
+
+  /**
+   * Implements mapper logic for use across APIs.
+   */
+  protected static Put map(ImmutableBytesWritable key, Result value) throws IOException {
+    if (value.size() != 1) {
+      throw new IOException("There should only be one input column");
+    }
+    Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+      cf = value.getMap();
+    if(!cf.containsKey(INPUT_FAMILY)) {
+      throw new IOException("Wrong input columns. Missing: '" +
+        Bytes.toString(INPUT_FAMILY) + "'.");
+    }
+
+    // Get the original value and reverse it
+
+    String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+    StringBuilder newValue = new StringBuilder(originalValue);
+    newValue.reverse();
+
+    // Now set the value to be collected
+
+    Put outval = new Put(key.get());
+    outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+    return outval;
+  }
+
+  protected void verify(TableName tableName) throws IOException {
+    Table table = UTIL.getConnection().getTable(tableName);
+    boolean verified = false;
+    long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
+    int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+    for (int i = 0; i < numRetries; i++) {
+      try {
+        getLog().info("Verification attempt #" + i);
+        verifyAttempt(table);
+        verified = true;
+        break;
+      } catch (NullPointerException e) {
+        // If here, a cell was empty. Presume its because updates came in
+        // after the scanner had been opened. Wait a while and retry.
+        getLog().debug("Verification attempt failed: " + e.getMessage());
+      }
+      try {
+        Thread.sleep(pause);
+      } catch (InterruptedException e) {
+        // continue
+      }
+    }
+    assertTrue(verified);
+  }
+
+  /**
+   * Looks at every value of the mapreduce output and verifies that indeed
+   * the values have been reversed.
+   * @param table Table to scan.
+   * @throws IOException
+   * @throws NullPointerException if we failed to find a cell value
+   */
+  private void verifyAttempt(final Table table) throws IOException, NullPointerException {
+    Scan scan = new Scan();
+    TableInputFormat.addColumns(scan, columns);
+    ResultScanner scanner = table.getScanner(scan);
+    try {
+      Iterator<Result> itr = scanner.iterator();
+      assertTrue(itr.hasNext());
+      while(itr.hasNext()) {
+        Result r = itr.next();
+        if (getLog().isDebugEnabled()) {
+          if (r.size() > 2 ) {
+            throw new IOException("Too many results, expected 2 got " +
+              r.size());
+          }
+        }
+        byte[] firstValue = null;
+        byte[] secondValue = null;
+        int count = 0;
+         for(Cell kv : r.listCells()) {
+          if (count == 0) {
+            firstValue = CellUtil.cloneValue(kv);
+          }
+          if (count == 1) {
+            secondValue = CellUtil.cloneValue(kv);
+          }
+          count++;
+          if (count == 2) {
+            break;
+          }
+        }
+
+
+        if (firstValue == null) {
+          throw new NullPointerException(Bytes.toString(r.getRow()) +
+            ": first value is null");
+        }
+        String first = Bytes.toString(firstValue);
+
+        if (secondValue == null) {
+          throw new NullPointerException(Bytes.toString(r.getRow()) +
+            ": second value is null");
+        }
+        byte[] secondReversed = new byte[secondValue.length];
+        for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
+          secondReversed[i] = secondValue[j];
+        }
+        String second = Bytes.toString(secondReversed);
+
+        if (first.compareTo(second) != 0) {
+          if (getLog().isDebugEnabled()) {
+            getLog().debug("second key is not the reverse of first. row=" +
+                Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
+                ", second value=" + second);
+          }
+          fail();
+        }
+      }
+    } finally {
+      scanner.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
new file mode 100644
index 0000000..506bf4f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test different variants of initTableMapperJob method
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestTableMapReduceUtil {
+
+  /*
+   * initTableSnapshotMapperJob is tested in {@link TestTableSnapshotInputFormat} because
+   * the method depends on an online cluster.
+   */
+
+  @Test
+  public void testInitTableMapperJob1() throws Exception {
+    Configuration configuration = new Configuration();
+    Job job = new Job(configuration, "tableName");
+    // test
+    TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
+        Text.class, job, false, WALInputFormat.class);
+    assertEquals(WALInputFormat.class, job.getInputFormatClass());
+    assertEquals(Import.Importer.class, job.getMapperClass());
+    assertEquals(LongWritable.class, job.getOutputKeyClass());
+    assertEquals(Text.class, job.getOutputValueClass());
+    assertNull(job.getCombinerClass());
+    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+  }
+
+  @Test
+  public void testInitTableMapperJob2() throws Exception {
+    Configuration configuration = new Configuration();
+    Job job = new Job(configuration, "tableName");
+    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+        Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
+    assertEquals(WALInputFormat.class, job.getInputFormatClass());
+    assertEquals(Import.Importer.class, job.getMapperClass());
+    assertEquals(LongWritable.class, job.getOutputKeyClass());
+    assertEquals(Text.class, job.getOutputValueClass());
+    assertNull(job.getCombinerClass());
+    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+  }
+
+  @Test
+  public void testInitTableMapperJob3() throws Exception {
+    Configuration configuration = new Configuration();
+    Job job = new Job(configuration, "tableName");
+    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+        Import.Importer.class, Text.class, Text.class, job);
+    assertEquals(TableInputFormat.class, job.getInputFormatClass());
+    assertEquals(Import.Importer.class, job.getMapperClass());
+    assertEquals(LongWritable.class, job.getOutputKeyClass());
+    assertEquals(Text.class, job.getOutputValueClass());
+    assertNull(job.getCombinerClass());
+    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+  }
+
+  @Test
+  public void testInitTableMapperJob4() throws Exception {
+    Configuration configuration = new Configuration();
+    Job job = new Job(configuration, "tableName");
+    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+        Import.Importer.class, Text.class, Text.class, job, false);
+    assertEquals(TableInputFormat.class, job.getInputFormatClass());
+    assertEquals(Import.Importer.class, job.getMapperClass());
+    assertEquals(LongWritable.class, job.getOutputKeyClass());
+    assertEquals(Text.class, job.getOutputValueClass());
+    assertNull(job.getCombinerClass());
+    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
new file mode 100644
index 0000000..028df98
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -0,0 +1,373 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.TestTableSnapshotScanner;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+import java.util.Arrays;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
+  private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+
+  private static final byte[] bbb = Bytes.toBytes("bbb");
+  private static final byte[] yyy = Bytes.toBytes("yyy");
+
+  @Rule
+  public TestName name = new TestName();
+
+  @Override
+  protected byte[] getStartRow() {
+    return bbb;
+  }
+
+  @Override
+  protected byte[] getEndRow() {
+    return yyy;
+  }
+
+  @After
+  public void tearDown() throws Exception {
+  }
+
+  @Test
+  public void testGetBestLocations() throws IOException {
+    TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl();
+    Configuration conf = UTIL.getConfiguration();
+
+    HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
+    Assert.assertEquals(Lists.newArrayList(),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
+    Assert.assertEquals(Lists.newArrayList("h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
+    Assert.assertEquals(Lists.newArrayList("h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
+    Assert.assertEquals(Lists.newArrayList("h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution = new HDFSBlocksDistribution();
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
+    Assert.assertEquals(Lists.newArrayList("h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
+    Assert.assertEquals(Lists.newArrayList("h1", "h2"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
+    Assert.assertEquals(Lists.newArrayList("h2", "h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
+    blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
+
+    Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
+      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+  }
+
+  public static enum TestTableSnapshotCounters {
+    VALIDATION_ERROR
+  }
+
+  public static class TestTableSnapshotMapper
+    extends TableMapper<ImmutableBytesWritable, NullWritable> {
+    @Override
+    protected void map(ImmutableBytesWritable key, Result value,
+        Context context) throws IOException, InterruptedException {
+      // Validate a single row coming from the snapshot, and emit the row key
+      verifyRowFromMap(key, value);
+      context.write(key, NullWritable.get());
+    }
+  }
+
+  public static class TestTableSnapshotReducer
+    extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
+    HBaseTestingUtility.SeenRowTracker rowTracker =
+        new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
+    @Override
+    protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
+       Context context) throws IOException, InterruptedException {
+      rowTracker.addRow(key.get());
+    }
+
+    @Override
+    protected void cleanup(Context context) throws IOException,
+        InterruptedException {
+      rowTracker.validate();
+    }
+  }
+
+  @Test
+  public void testInitTableSnapshotMapperJobConfig() throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    String snapshotName = "foo";
+
+    try {
+      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+      Job job = new Job(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+        new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+        NullWritable.class, job, false, tmpTableDir);
+
+      // TODO: would be better to examine directly the cache instance that results from this
+      // config. Currently this is not possible because BlockCache initialization is static.
+      Assert.assertEquals(
+        "Snapshot job should be configured for default LruBlockCache.",
+        HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
+        job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
+      Assert.assertEquals(
+        "Snapshot job should not use BucketCache.",
+        0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
+    } finally {
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  @Override
+  public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+      String snapshotName, Path tmpTableDir) throws Exception {
+    Job job = new Job(UTIL.getConfiguration());
+    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+      new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+      NullWritable.class, job, false, tmpTableDir);
+  }
+
+  @Override
+  public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+      int numRegions, int expectedNumSplits) throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    try {
+      createTableAndSnapshot(
+        util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
+
+      Job job = new Job(util.getConfiguration());
+      Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
+      Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+          scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+          NullWritable.class, job, false, tmpTableDir);
+
+      verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
+
+    } finally {
+      util.getAdmin().deleteSnapshot(snapshotName);
+      util.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  @Test
+  public void testNoDuplicateResultsWhenSplitting() throws Exception {
+    setupCluster();
+    TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+    String snapshotName = "testSnapshotBug";
+    try {
+      if (UTIL.getAdmin().tableExists(tableName)) {
+        UTIL.deleteTable(tableName);
+      }
+
+      UTIL.createTable(tableName, FAMILIES);
+      Admin admin = UTIL.getAdmin();
+
+      // put some stuff in the table
+      Table table = UTIL.getConnection().getTable(tableName);
+      UTIL.loadTable(table, FAMILIES);
+
+      // split to 2 regions
+      admin.split(tableName, Bytes.toBytes("eee"));
+      TestTableSnapshotScanner.blockUntilSplitFinished(UTIL, tableName, 2);
+
+      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+        null, snapshotName, rootDir, fs, true);
+
+      // load different values
+      byte[] value = Bytes.toBytes("after_snapshot_value");
+      UTIL.loadTable(table, FAMILIES, value);
+
+      // cause flush to create new files in the region
+      admin.flush(tableName);
+      table.close();
+
+      Job job = new Job(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+      // limit the scan
+      Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+        TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
+        tmpTableDir);
+
+      verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
+    } finally {
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
+      byte[] startRow, byte[] stopRow)
+      throws IOException, InterruptedException {
+    TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
+    List<InputSplit> splits = tsif.getSplits(job);
+
+    Assert.assertEquals(expectedNumSplits, splits.size());
+
+    HBaseTestingUtility.SeenRowTracker rowTracker =
+        new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
+
+    for (int i = 0; i < splits.size(); i++) {
+      // validate input split
+      InputSplit split = splits.get(i);
+      Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
+
+      // validate record reader
+      TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
+      when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
+      RecordReader<ImmutableBytesWritable, Result> rr =
+          tsif.createRecordReader(split, taskAttemptContext);
+      rr.initialize(split, taskAttemptContext);
+
+      // validate we can read all the data back
+      while (rr.nextKeyValue()) {
+        byte[] row = rr.getCurrentKey().get();
+        verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
+        rowTracker.addRow(row);
+      }
+
+      rr.close();
+    }
+
+    // validate all rows are seen
+    rowTracker.validate();
+  }
+
+  @Override
+  protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+      String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+      boolean shutdownCluster) throws Exception {
+    doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
+      numRegions, expectedNumSplits, shutdownCluster);
+  }
+
+  // this is also called by the IntegrationTestTableSnapshotInputFormat
+  public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
+      String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
+      int expectedNumSplits, boolean shutdownCluster) throws Exception {
+
+    LOG.info("testing with MapReduce");
+
+    LOG.info("create the table and snapshot");
+    createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
+
+    if (shutdownCluster) {
+      LOG.info("shutting down hbase cluster.");
+      util.shutdownMiniHBaseCluster();
+    }
+
+    try {
+      // create the job
+      Job job = new Job(util.getConfiguration());
+      Scan scan = new Scan(startRow, endRow); // limit the scan
+
+      job.setJarByClass(util.getClass());
+      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+        TestTableSnapshotInputFormat.class);
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+        scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+        NullWritable.class, job, true, tableDir);
+
+      job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
+      job.setNumReduceTasks(1);
+      job.setOutputFormatClass(NullOutputFormat.class);
+
+      Assert.assertTrue(job.waitForCompletion(true));
+    } finally {
+      if (!shutdownCluster) {
+        util.getAdmin().deleteSnapshot(snapshotName);
+        util.deleteTable(tableName);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
new file mode 100644
index 0000000..4382c9c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.util.HashSet;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestTableSplit {
+  @Rule
+  public TestName name = new TestName();
+
+  @Test
+  public void testHashCode() {
+    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+        "row-start".getBytes(),
+        "row-end".getBytes(), "location");
+    TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
+        "row-start".getBytes(),
+        "row-end".getBytes(), "location");
+    assertEquals (split1, split2);
+    assertTrue   (split1.hashCode() == split2.hashCode());
+    HashSet<TableSplit> set = new HashSet<>(2);
+    set.add(split1);
+    set.add(split2);
+    assertTrue(set.size() == 1);
+  }
+
+  /**
+   * length of region should not influence hashcode
+   * */
+  @Test
+  public void testHashCode_length() {
+    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+            "row-start".getBytes(),
+            "row-end".getBytes(), "location", 1984);
+    TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
+            "row-start".getBytes(),
+            "row-end".getBytes(), "location", 1982);
+
+    assertEquals (split1, split2);
+    assertTrue   (split1.hashCode() == split2.hashCode());
+    HashSet<TableSplit> set = new HashSet<>(2);
+    set.add(split1);
+    set.add(split2);
+    assertTrue(set.size() == 1);
+  }
+
+  /**
+   * Length of region need to be properly serialized.
+   * */
+  @Test
+  public void testLengthIsSerialized() throws Exception {
+    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+            "row-start".getBytes(),
+            "row-end".getBytes(), "location", 666);
+
+    TableSplit deserialized = new TableSplit(TableName.valueOf(name.getMethodName()),
+            "row-start2".getBytes(),
+            "row-end2".getBytes(), "location1");
+    ReflectionUtils.copy(new Configuration(), split1, deserialized);
+
+    Assert.assertEquals(666, deserialized.getLength());
+  }
+
+  @Test
+  public void testToString() {
+    TableSplit split =
+        new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
+            "location");
+    String str =
+        "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
+            + "end row: row-end, region location: location, "
+            + "encoded region name: )";
+    Assert.assertEquals(str, split.toString());
+
+    split =
+        new TableSplit(TableName.valueOf(name.getMethodName()), null, "row-start".getBytes(),
+            "row-end".getBytes(), "location", "encoded-region-name", 1000L);
+    str =
+        "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
+            + "end row: row-end, region location: location, "
+            + "encoded region name: encoded-region-name)";
+    Assert.assertEquals(str, split.toString());
+
+    split = new TableSplit((TableName) null, null, null, null);
+    str =
+        "HBase table split(table name: null, scan: , start row: null, "
+            + "end row: null, region location: null, "
+            + "encoded region name: )";
+    Assert.assertEquals(str, split.toString());
+
+    split = new TableSplit((TableName) null, null, null, null, null, null, 1000L);
+    str =
+        "HBase table split(table name: null, scan: , start row: null, "
+            + "end row: null, region location: null, "
+            + "encoded region name: null)";
+    Assert.assertEquals(str, split.toString());
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
new file mode 100644
index 0000000..6796c94
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
@@ -0,0 +1,211 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTimeRangeMapRed {
+  private final static Log log = LogFactory.getLog(TestTimeRangeMapRed.class);
+  private static final HBaseTestingUtility UTIL =
+    new HBaseTestingUtility();
+  private Admin admin;
+
+  private static final byte [] KEY = Bytes.toBytes("row1");
+  private static final NavigableMap<Long, Boolean> TIMESTAMP = new TreeMap<>();
+  static {
+    TIMESTAMP.put((long)1245620000, false);
+    TIMESTAMP.put((long)1245620005, true); // include
+    TIMESTAMP.put((long)1245620010, true); // include
+    TIMESTAMP.put((long)1245620055, true); // include
+    TIMESTAMP.put((long)1245620100, true); // include
+    TIMESTAMP.put((long)1245620150, false);
+    TIMESTAMP.put((long)1245620250, false);
+  }
+  static final long MINSTAMP = 1245620005;
+  static final long MAXSTAMP = 1245620100 + 1; // maxStamp itself is excluded. so increment it.
+
+  static final TableName TABLE_NAME = TableName.valueOf("table123");
+  static final byte[] FAMILY_NAME = Bytes.toBytes("text");
+  static final byte[] COLUMN_NAME = Bytes.toBytes("input");
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws Exception {
+    this.admin = UTIL.getAdmin();
+  }
+
+  private static class ProcessTimeRangeMapper
+  extends TableMapper<ImmutableBytesWritable, MapWritable>
+  implements Configurable {
+
+    private Configuration conf = null;
+    private Table table = null;
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result result,
+        Context context)
+    throws IOException {
+      List<Long> tsList = new ArrayList<>();
+      for (Cell kv : result.listCells()) {
+        tsList.add(kv.getTimestamp());
+      }
+
+      List<Put> puts = new ArrayList<>();
+      for (Long ts : tsList) {
+        Put put = new Put(key.get());
+        put.setDurability(Durability.SKIP_WAL);
+        put.addColumn(FAMILY_NAME, COLUMN_NAME, ts, Bytes.toBytes(true));
+        puts.add(put);
+      }
+      table.put(puts);
+    }
+
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    @Override
+    public void setConf(Configuration configuration) {
+      this.conf = configuration;
+      try {
+        Connection connection = ConnectionFactory.createConnection(conf);
+        table = connection.getTable(TABLE_NAME);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
+  @Test
+  public void testTimeRangeMapRed()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    final HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
+    final HColumnDescriptor col = new HColumnDescriptor(FAMILY_NAME);
+    col.setMaxVersions(Integer.MAX_VALUE);
+    desc.addFamily(col);
+    admin.createTable(desc);
+    List<Put> puts = new ArrayList<>();
+    for (Map.Entry<Long, Boolean> entry : TIMESTAMP.entrySet()) {
+      Put put = new Put(KEY);
+      put.setDurability(Durability.SKIP_WAL);
+      put.addColumn(FAMILY_NAME, COLUMN_NAME, entry.getKey(), Bytes.toBytes(false));
+      puts.add(put);
+    }
+    Table table = UTIL.getConnection().getTable(desc.getTableName());
+    table.put(puts);
+    runTestOnTable();
+    verify(table);
+    table.close();
+  }
+
+  private void runTestOnTable()
+  throws IOException, InterruptedException, ClassNotFoundException {
+    Job job = null;
+    try {
+      job = new Job(UTIL.getConfiguration(), "test123");
+      job.setOutputFormatClass(NullOutputFormat.class);
+      job.setNumReduceTasks(0);
+      Scan scan = new Scan();
+      scan.addColumn(FAMILY_NAME, COLUMN_NAME);
+      scan.setTimeRange(MINSTAMP, MAXSTAMP);
+      scan.setMaxVersions();
+      TableMapReduceUtil.initTableMapperJob(TABLE_NAME,
+        scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job);
+      job.waitForCompletion(true);
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } finally {
+      if (job != null) {
+        FileUtil.fullyDelete(
+          new File(job.getConfiguration().get("hadoop.tmp.dir")));
+      }
+    }
+  }
+
+  private void verify(final Table table) throws IOException {
+    Scan scan = new Scan();
+    scan.addColumn(FAMILY_NAME, COLUMN_NAME);
+    scan.setMaxVersions(1);
+    ResultScanner scanner = table.getScanner(scan);
+    for (Result r: scanner) {
+      for (Cell kv : r.listCells()) {
+        log.debug(Bytes.toString(r.getRow()) + "\t" + Bytes.toString(CellUtil.cloneFamily(kv))
+            + "\t" + Bytes.toString(CellUtil.cloneQualifier(kv))
+            + "\t" + kv.getTimestamp() + "\t" + Bytes.toBoolean(CellUtil.cloneValue(kv)));
+        org.junit.Assert.assertEquals(TIMESTAMP.get(kv.getTimestamp()),
+          Bytes.toBoolean(CellUtil.cloneValue(kv)));
+      }
+    }
+    scanner.close();
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
new file mode 100644
index 0000000..427c5cc
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Basic test for the WALPlayer M/R tool
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestWALPlayer {
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static MiniHBaseCluster cluster;
+  private static Path rootDir;
+  private static Path walRootDir;
+  private static FileSystem fs;
+  private static FileSystem logFs;
+  private static Configuration conf;
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    conf= TEST_UTIL.getConfiguration();
+    rootDir = TEST_UTIL.createRootDir();
+    walRootDir = TEST_UTIL.createWALRootDir();
+    fs = FSUtils.getRootDirFileSystem(conf);
+    logFs = FSUtils.getWALFileSystem(conf);
+    cluster = TEST_UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+    fs.delete(rootDir, true);
+    logFs.delete(walRootDir, true);
+  }
+
+  /**
+   * Simple end-to-end test
+   * @throws Exception
+   */
+  @Test
+  public void testWALPlayer() throws Exception {
+    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+    final byte[] FAMILY = Bytes.toBytes("family");
+    final byte[] COLUMN1 = Bytes.toBytes("c1");
+    final byte[] COLUMN2 = Bytes.toBytes("c2");
+    final byte[] ROW = Bytes.toBytes("row");
+    Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+    Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
+
+    // put a row into the first table
+    Put p = new Put(ROW);
+    p.addColumn(FAMILY, COLUMN1, COLUMN1);
+    p.addColumn(FAMILY, COLUMN2, COLUMN2);
+    t1.put(p);
+    // delete one column
+    Delete d = new Delete(ROW);
+    d.addColumns(FAMILY, COLUMN1);
+    t1.delete(d);
+
+    // replay the WAL, map table 1 to table 2
+    WAL log = cluster.getRegionServer(0).getWAL(null);
+    log.rollWriter();
+    String walInputDir = new Path(cluster.getMaster().getMasterFileSystem()
+        .getWALRootDir(), HConstants.HREGION_LOGDIR_NAME).toString();
+
+    Configuration configuration= TEST_UTIL.getConfiguration();
+    WALPlayer player = new WALPlayer(configuration);
+    String optionName="_test_.name";
+    configuration.set(optionName, "1000");
+    player.setupTime(configuration, optionName);
+    assertEquals(1000,configuration.getLong(optionName,0));
+    assertEquals(0, ToolRunner.run(configuration, player,
+        new String[] {walInputDir, tableName1.getNameAsString(),
+        tableName2.getNameAsString() }));
+
+
+    // verify the WAL was player into table 2
+    Get g = new Get(ROW);
+    Result r = t2.get(g);
+    assertEquals(1, r.size());
+    assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN2));
+  }
+
+  /**
+   * Test WALKeyValueMapper setup and map
+   */
+  @Test
+  public void testWALKeyValueMapper() throws Exception {
+    testWALKeyValueMapper(WALPlayer.TABLES_KEY);
+  }
+
+  @Test
+  public void testWALKeyValueMapperWithDeprecatedConfig() throws Exception {
+    testWALKeyValueMapper("hlog.input.tables");
+  }
+
+  private void testWALKeyValueMapper(final String tableConfigKey) throws Exception {
+    Configuration configuration = new Configuration();
+    configuration.set(tableConfigKey, "table");
+    WALKeyValueMapper mapper = new WALKeyValueMapper();
+    WALKey key = mock(WALKey.class);
+    when(key.getTablename()).thenReturn(TableName.valueOf("table"));
+    @SuppressWarnings("unchecked")
+    Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue>.Context context = mock(Context.class);
+    when(context.getConfiguration()).thenReturn(configuration);
+
+    WALEdit value = mock(WALEdit.class);
+    ArrayList<Cell> values = new ArrayList<>();
+    KeyValue kv1 = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), null);
+
+    values.add(kv1);
+    when(value.getCells()).thenReturn(values);
+    mapper.setup(context);
+
+    doAnswer(new Answer<Void>() {
+
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Throwable {
+        ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
+        KeyValue key = (KeyValue) invocation.getArguments()[1];
+        assertEquals("row", Bytes.toString(writer.get()));
+        assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
+        return null;
+      }
+    }).when(context).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
+
+    mapper.map(key, value, context);
+
+  }
+
+  /**
+   * Test main method
+   */
+  @Test
+  public void testMainMethod() throws Exception {
+
+    PrintStream oldPrintStream = System.err;
+    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+    System.setSecurityManager(newSecurityManager);
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    String[] args = {};
+    System.setErr(new PrintStream(data));
+    try {
+      System.setErr(new PrintStream(data));
+      try {
+        WALPlayer.main(args);
+        fail("should be SecurityException");
+      } catch (SecurityException e) {
+        assertEquals(-1, newSecurityManager.getExitCode());
+        assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
+        assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
+            " <tables> [<tableMappings>]"));
+        assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
+      }
+
+    } finally {
+      System.setErr(oldPrintStream);
+      System.setSecurityManager(SECURITY_MANAGER);
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
new file mode 100644
index 0000000..34725b4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
@@ -0,0 +1,276 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
+import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALFactory;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.MapReduceTestUtil;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * JUnit tests for the WALRecordReader
+ */
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestWALRecordReader {
+  private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static Configuration conf;
+  private static FileSystem fs;
+  private static Path hbaseDir;
+  private static FileSystem walFs;
+  private static Path walRootDir;
+  // visible for TestHLogRecordReader
+  static final TableName tableName = TableName.valueOf(getName());
+  private static final byte [] rowName = tableName.getName();
+  // visible for TestHLogRecordReader
+  static final HRegionInfo info = new HRegionInfo(tableName,
+      Bytes.toBytes(""), Bytes.toBytes(""), false);
+  private static final byte [] family = Bytes.toBytes("column");
+  private static final byte [] value = Bytes.toBytes("value");
+  private static HTableDescriptor htd;
+  private static Path logDir;
+  protected MultiVersionConcurrencyControl mvcc;
+  protected static NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+
+  private static String getName() {
+    return "TestWALRecordReader";
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    fs.delete(hbaseDir, true);
+    walFs.delete(walRootDir, true);
+    mvcc = new MultiVersionConcurrencyControl();
+  }
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    // Make block sizes small.
+    conf = TEST_UTIL.getConfiguration();
+    conf.setInt("dfs.blocksize", 1024 * 1024);
+    conf.setInt("dfs.replication", 1);
+    TEST_UTIL.startMiniDFSCluster(1);
+
+    conf = TEST_UTIL.getConfiguration();
+    fs = TEST_UTIL.getDFSCluster().getFileSystem();
+
+    hbaseDir = TEST_UTIL.createRootDir();
+    walRootDir = TEST_UTIL.createWALRootDir();
+    walFs = FSUtils.getWALFileSystem(conf);
+    logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
+
+    htd = new HTableDescriptor(tableName);
+    htd.addFamily(new HColumnDescriptor(family));
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    fs.delete(hbaseDir, true);
+    walFs.delete(walRootDir, true);
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test partial reads from the log based on passed time range
+   * @throws Exception
+   */
+  @Test
+  public void testPartialRead() throws Exception {
+    final WALFactory walfactory = new WALFactory(conf, null, getName());
+    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
+    // This test depends on timestamp being millisecond based and the filename of the WAL also
+    // being millisecond based.
+    long ts = System.currentTimeMillis();
+    WALEdit edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
+    log.append(info, getWalKey(ts, scopes), edit, true);
+    edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
+    log.append(info, getWalKey(ts+1, scopes), edit, true);
+    log.sync();
+    LOG.info("Before 1st WAL roll " + log.toString());
+    log.rollWriter();
+    LOG.info("Past 1st WAL roll " + log.toString());
+
+    Thread.sleep(1);
+    long ts1 = System.currentTimeMillis();
+
+    edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
+    log.append(info, getWalKey(ts1+1, scopes), edit, true);
+    edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
+    log.append(info, getWalKey(ts1+2, scopes), edit, true);
+    log.sync();
+    log.shutdown();
+    walfactory.shutdown();
+    LOG.info("Closed WAL " + log.toString());
+
+
+    WALInputFormat input = new WALInputFormat();
+    Configuration jobConf = new Configuration(conf);
+    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
+    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
+
+    // only 1st file is considered, and only its 1st entry is used
+    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+
+    assertEquals(1, splits.size());
+    testSplit(splits.get(0), Bytes.toBytes("1"));
+
+    jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
+    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
+    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+    // both files need to be considered
+    assertEquals(2, splits.size());
+    // only the 2nd entry from the 1st file is used
+    testSplit(splits.get(0), Bytes.toBytes("2"));
+    // only the 1nd entry from the 2nd file is used
+    testSplit(splits.get(1), Bytes.toBytes("3"));
+  }
+
+  /**
+   * Test basic functionality
+   * @throws Exception
+   */
+  @Test
+  public void testWALRecordReader() throws Exception {
+    final WALFactory walfactory = new WALFactory(conf, null, getName());
+    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
+    byte [] value = Bytes.toBytes("value");
+    final AtomicLong sequenceId = new AtomicLong(0);
+    WALEdit edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
+        System.currentTimeMillis(), value));
+    long txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
+    log.sync(txid);
+
+    Thread.sleep(1); // make sure 2nd log gets a later timestamp
+    long secondTs = System.currentTimeMillis();
+    log.rollWriter();
+
+    edit = new WALEdit();
+    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
+        System.currentTimeMillis(), value));
+    txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
+    log.sync(txid);
+    log.shutdown();
+    walfactory.shutdown();
+    long thirdTs = System.currentTimeMillis();
+
+    // should have 2 log files now
+    WALInputFormat input = new WALInputFormat();
+    Configuration jobConf = new Configuration(conf);
+    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
+
+    // make sure both logs are found
+    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+    assertEquals(2, splits.size());
+
+    // should return exactly one KV
+    testSplit(splits.get(0), Bytes.toBytes("1"));
+    // same for the 2nd split
+    testSplit(splits.get(1), Bytes.toBytes("2"));
+
+    // now test basic time ranges:
+
+    // set an endtime, the 2nd log file can be ignored completely.
+    jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
+    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+    assertEquals(1, splits.size());
+    testSplit(splits.get(0), Bytes.toBytes("1"));
+
+    // now set a start time
+    jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
+    jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
+    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+    // both logs need to be considered
+    assertEquals(2, splits.size());
+    // but both readers skip all edits
+    testSplit(splits.get(0));
+    testSplit(splits.get(1));
+  }
+
+  protected WALKey getWalKey(final long time, NavigableMap<byte[], Integer> scopes) {
+    return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc, scopes);
+  }
+
+  protected WALRecordReader getReader() {
+    return new WALKeyRecordReader();
+  }
+
+  /**
+   * Create a new reader from the split, and match the edits against the passed columns.
+   */
+  private void testSplit(InputSplit split, byte[]... columns) throws Exception {
+    final WALRecordReader reader = getReader();
+    reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
+
+    for (byte[] column : columns) {
+      assertTrue(reader.nextKeyValue());
+      Cell cell = reader.getCurrentValue().getCells().get(0);
+      if (!Bytes.equals(column, 0, column.length, cell.getQualifierArray(),
+        cell.getQualifierOffset(), cell.getQualifierLength())) {
+        assertTrue(
+          "expected ["
+              + Bytes.toString(column)
+              + "], actual ["
+              + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
+                cell.getQualifierLength()) + "]", false);
+      }
+    }
+    assertFalse(reader.nextKeyValue());
+    reader.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
new file mode 100644
index 0000000..aea5036
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.KeyValue;
+
+import java.io.IOException;
+
+/**
+ * Dummy mapper used for unit tests to verify that the mapper can be injected.
+ * This approach would be used if a custom transformation needed to be done after
+ * reading the input data before writing it to HFiles.
+ */
+public class TsvImporterCustomTestMapper extends TsvImporterMapper {
+
+  @Override
+  protected void setup(Context context) {
+    doSetup(context);
+  }
+
+  /**
+   * Convert a line of TSV text into an HBase table row after transforming the
+   * values by multiplying them by 3.
+   */
+  @Override
+  public void map(LongWritable offset, Text value, Context context)
+        throws IOException {
+    byte[] family = Bytes.toBytes("FAM");
+    final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };
+
+    // do some basic line parsing
+    byte[] lineBytes = value.getBytes();
+    String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");
+
+    // create the rowKey and Put
+    ImmutableBytesWritable rowKey =
+      new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
+    Put put = new Put(rowKey.copyBytes());
+    put.setDurability(Durability.SKIP_WAL);
+
+    //The value should look like this: VALUE1 or VALUE2. Let's multiply
+    //the integer by 3
+    for(int i = 1; i < valueTokens.length; i++) {
+      String prefix = valueTokens[i].substring(0, "VALUE".length());
+      String suffix = valueTokens[i].substring("VALUE".length());
+      String newValue = prefix + Integer.parseInt(suffix) * 3;
+
+      KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
+          qualifiers[i-1], Bytes.toBytes(newValue));
+      put.add(kv);
+    }
+
+    try {
+      context.write(rowKey, put);
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+  }
+}

[10/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
deleted file mode 100644
index eebb0f3..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ /dev/null
@@ -1,2626 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase;
-
-import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.lang.reflect.Constructor;
-import java.math.BigDecimal;
-import java.math.MathContext;
-import java.text.DecimalFormat;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.LinkedList;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Random;
-import java.util.TreeMap;
-import java.util.NoSuchElementException;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Append;
-import org.apache.hadoop.hbase.client.AsyncConnection;
-import org.apache.hadoop.hbase.client.AsyncTable;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Consistency;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Increment;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RawAsyncTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.RowMutations;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.BinaryComparator;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterAllFilter;
-import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.PageFilter;
-import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
-import org.apache.hadoop.hbase.filter.WhileMatchFilter;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.RandomDistribution;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
-import org.apache.hadoop.hbase.trace.HBaseHTraceConfiguration;
-import org.apache.hadoop.hbase.trace.SpanReceiverHost;
-import org.apache.hadoop.hbase.util.*;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.apache.htrace.Sampler;
-import org.apache.htrace.Trace;
-import org.apache.htrace.TraceScope;
-import org.apache.htrace.impl.ProbabilitySampler;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects;
-import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.UniformReservoir;
-
-/**
- * Script used evaluating HBase performance and scalability.  Runs a HBase
- * client that steps through one of a set of hardcoded tests or 'experiments'
- * (e.g. a random reads test, a random writes test, etc.). Pass on the
- * command-line which test to run and how many clients are participating in
- * this experiment. Run {@code PerformanceEvaluation --help} to obtain usage.
- *
- * <p>This class sets up and runs the evaluation programs described in
- * Section 7, <i>Performance Evaluation</i>, of the <a
- * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
- * paper, pages 8-10.
- *
- * <p>By default, runs as a mapreduce job where each mapper runs a single test
- * client. Can also run as a non-mapreduce, multithreaded application by
- * specifying {@code --nomapred}. Each client does about 1GB of data, unless
- * specified otherwise.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class PerformanceEvaluation extends Configured implements Tool {
-  static final String RANDOM_SEEK_SCAN = "randomSeekScan";
-  static final String RANDOM_READ = "randomRead";
-  private static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-  static {
-    MAPPER.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
-  }
-
-  public static final String TABLE_NAME = "TestTable";
-  public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
-  public static final byte [] COLUMN_ZERO = Bytes.toBytes("" + 0);
-  public static final byte [] QUALIFIER_NAME = COLUMN_ZERO;
-  public static final int DEFAULT_VALUE_LENGTH = 1000;
-  public static final int ROW_LENGTH = 26;
-
-  private static final int ONE_GB = 1024 * 1024 * 1000;
-  private static final int DEFAULT_ROWS_PER_GB = ONE_GB / DEFAULT_VALUE_LENGTH;
-  // TODO : should we make this configurable
-  private static final int TAG_LENGTH = 256;
-  private static final DecimalFormat FMT = new DecimalFormat("0.##");
-  private static final MathContext CXT = MathContext.DECIMAL64;
-  private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
-  private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
-  private static final TestOptions DEFAULT_OPTS = new TestOptions();
-
-  private static Map<String, CmdDescriptor> COMMANDS = new TreeMap<>();
-  private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
-
-  static {
-    addCommandDescriptor(AsyncRandomReadTest.class, "asyncRandomRead",
-        "Run async random read test");
-    addCommandDescriptor(AsyncRandomWriteTest.class, "asyncRandomWrite",
-        "Run async random write test");
-    addCommandDescriptor(AsyncSequentialReadTest.class, "asyncSequentialRead",
-        "Run async sequential read test");
-    addCommandDescriptor(AsyncSequentialWriteTest.class, "asyncSequentialWrite",
-        "Run async sequential write test");
-    addCommandDescriptor(AsyncScanTest.class, "asyncScan",
-        "Run async scan test (read every row)");
-    addCommandDescriptor(RandomReadTest.class, RANDOM_READ,
-      "Run random read test");
-    addCommandDescriptor(RandomSeekScanTest.class, RANDOM_SEEK_SCAN,
-      "Run random seek and scan 100 test");
-    addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
-      "Run random seek scan with both start and stop row (max 10 rows)");
-    addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
-      "Run random seek scan with both start and stop row (max 100 rows)");
-    addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
-      "Run random seek scan with both start and stop row (max 1000 rows)");
-    addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
-      "Run random seek scan with both start and stop row (max 10000 rows)");
-    addCommandDescriptor(RandomWriteTest.class, "randomWrite",
-      "Run random write test");
-    addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
-      "Run sequential read test");
-    addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
-      "Run sequential write test");
-    addCommandDescriptor(ScanTest.class, "scan",
-      "Run scan test (read every row)");
-    addCommandDescriptor(FilteredScanTest.class, "filterScan",
-      "Run scan test using a filter to find a specific row based on it's value " +
-      "(make sure to use --rows=20)");
-    addCommandDescriptor(IncrementTest.class, "increment",
-      "Increment on each row; clients overlap on keyspace so some concurrent operations");
-    addCommandDescriptor(AppendTest.class, "append",
-      "Append on each row; clients overlap on keyspace so some concurrent operations");
-    addCommandDescriptor(CheckAndMutateTest.class, "checkAndMutate",
-      "CheckAndMutate on each row; clients overlap on keyspace so some concurrent operations");
-    addCommandDescriptor(CheckAndPutTest.class, "checkAndPut",
-      "CheckAndPut on each row; clients overlap on keyspace so some concurrent operations");
-    addCommandDescriptor(CheckAndDeleteTest.class, "checkAndDelete",
-      "CheckAndDelete on each row; clients overlap on keyspace so some concurrent operations");
-  }
-
-  /**
-   * Enum for map metrics.  Keep it out here rather than inside in the Map
-   * inner-class so we can find associated properties.
-   */
-  protected static enum Counter {
-    /** elapsed time */
-    ELAPSED_TIME,
-    /** number of rows */
-    ROWS
-  }
-
-  protected static class RunResult implements Comparable<RunResult> {
-    public RunResult(long duration, Histogram hist) {
-      this.duration = duration;
-      this.hist = hist;
-    }
-
-    public final long duration;
-    public final Histogram hist;
-
-    @Override
-    public String toString() {
-      return Long.toString(duration);
-    }
-
-    @Override public int compareTo(RunResult o) {
-      return Long.compare(this.duration, o.duration);
-    }
-  }
-
-  /**
-   * Constructor
-   * @param conf Configuration object
-   */
-  public PerformanceEvaluation(final Configuration conf) {
-    super(conf);
-  }
-
-  protected static void addCommandDescriptor(Class<? extends TestBase> cmdClass,
-      String name, String description) {
-    CmdDescriptor cmdDescriptor = new CmdDescriptor(cmdClass, name, description);
-    COMMANDS.put(name, cmdDescriptor);
-  }
-
-  /**
-   * Implementations can have their status set.
-   */
-  interface Status {
-    /**
-     * Sets status
-     * @param msg status message
-     * @throws IOException
-     */
-    void setStatus(final String msg) throws IOException;
-  }
-
-  /**
-   * MapReduce job that runs a performance evaluation client in each map task.
-   */
-  public static class EvaluationMapTask
-      extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
-
-    /** configuration parameter name that contains the command */
-    public final static String CMD_KEY = "EvaluationMapTask.command";
-    /** configuration parameter name that contains the PE impl */
-    public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
-
-    private Class<? extends Test> cmd;
-
-    @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
-      this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
-
-      // this is required so that extensions of PE are instantiated within the
-      // map reduce task...
-      Class<? extends PerformanceEvaluation> peClass =
-          forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
-      try {
-        peClass.getConstructor(Configuration.class).newInstance(context.getConfiguration());
-      } catch (Exception e) {
-        throw new IllegalStateException("Could not instantiate PE instance", e);
-      }
-    }
-
-    private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
-      try {
-        return Class.forName(className).asSubclass(type);
-      } catch (ClassNotFoundException e) {
-        throw new IllegalStateException("Could not find class for name: " + className, e);
-      }
-    }
-
-    @Override
-    protected void map(LongWritable key, Text value, final Context context)
-           throws IOException, InterruptedException {
-
-      Status status = new Status() {
-        @Override
-        public void setStatus(String msg) {
-           context.setStatus(msg);
-        }
-      };
-
-      ObjectMapper mapper = new ObjectMapper();
-      TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
-      Configuration conf = HBaseConfiguration.create(context.getConfiguration());
-      final Connection con = ConnectionFactory.createConnection(conf);
-      AsyncConnection asyncCon = null;
-      try {
-        asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
-      } catch (ExecutionException e) {
-        throw new IOException(e);
-      }
-
-      // Evaluation task
-      RunResult result = PerformanceEvaluation.runOneClient(this.cmd, conf, con, asyncCon, opts, status);
-      // Collect how much time the thing took. Report as map output and
-      // to the ELAPSED_TIME counter.
-      context.getCounter(Counter.ELAPSED_TIME).increment(result.duration);
-      context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
-      context.write(new LongWritable(opts.startRow), new LongWritable(result.duration));
-      context.progress();
-    }
-  }
-
-  /*
-   * If table does not already exist, create. Also create a table when
-   * {@code opts.presplitRegions} is specified or when the existing table's
-   * region replica count doesn't match {@code opts.replicas}.
-   */
-  static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
-    TableName tableName = TableName.valueOf(opts.tableName);
-    boolean needsDelete = false, exists = admin.tableExists(tableName);
-    boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
-      || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
-    if (!exists && isReadCmd) {
-      throw new IllegalStateException(
-        "Must specify an existing table for read commands. Run a write command first.");
-    }
-    HTableDescriptor desc =
-      exists ? admin.getTableDescriptor(TableName.valueOf(opts.tableName)) : null;
-    byte[][] splits = getSplits(opts);
-
-    // recreate the table when user has requested presplit or when existing
-    // {RegionSplitPolicy,replica count} does not match requested.
-    if ((exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions)
-      || (!isReadCmd && desc != null &&
-          !StringUtils.equals(desc.getRegionSplitPolicyClassName(), opts.splitPolicy))
-      || (!isReadCmd && desc != null && desc.getRegionReplication() != opts.replicas)) {
-      needsDelete = true;
-      // wait, why did it delete my table?!?
-      LOG.debug(MoreObjects.toStringHelper("needsDelete")
-        .add("needsDelete", needsDelete)
-        .add("isReadCmd", isReadCmd)
-        .add("exists", exists)
-        .add("desc", desc)
-        .add("presplit", opts.presplitRegions)
-        .add("splitPolicy", opts.splitPolicy)
-        .add("replicas", opts.replicas));
-    }
-
-    // remove an existing table
-    if (needsDelete) {
-      if (admin.isTableEnabled(tableName)) {
-        admin.disableTable(tableName);
-      }
-      admin.deleteTable(tableName);
-    }
-
-    // table creation is necessary
-    if (!exists || needsDelete) {
-      desc = getTableDescriptor(opts);
-      if (splits != null) {
-        if (LOG.isDebugEnabled()) {
-          for (int i = 0; i < splits.length; i++) {
-            LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
-          }
-        }
-      }
-      admin.createTable(desc, splits);
-      LOG.info("Table " + desc + " created");
-    }
-    return admin.tableExists(tableName);
-  }
-
-  /**
-   * Create an HTableDescriptor from provided TestOptions.
-   */
-  protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
-    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(opts.tableName));
-    HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
-    family.setDataBlockEncoding(opts.blockEncoding);
-    family.setCompressionType(opts.compression);
-    family.setBloomFilterType(opts.bloomType);
-    family.setBlocksize(opts.blockSize);
-    if (opts.inMemoryCF) {
-      family.setInMemory(true);
-    }
-    family.setInMemoryCompaction(opts.inMemoryCompaction);
-    desc.addFamily(family);
-    if (opts.replicas != DEFAULT_OPTS.replicas) {
-      desc.setRegionReplication(opts.replicas);
-    }
-    if (opts.splitPolicy != DEFAULT_OPTS.splitPolicy) {
-      desc.setRegionSplitPolicyClassName(opts.splitPolicy);
-    }
-    return desc;
-  }
-
-  /**
-   * generates splits based on total number of rows and specified split regions
-   */
-  protected static byte[][] getSplits(TestOptions opts) {
-    if (opts.presplitRegions == DEFAULT_OPTS.presplitRegions)
-      return null;
-
-    int numSplitPoints = opts.presplitRegions - 1;
-    byte[][] splits = new byte[numSplitPoints][];
-    int jump = opts.totalRows / opts.presplitRegions;
-    for (int i = 0; i < numSplitPoints; i++) {
-      int rowkey = jump * (1 + i);
-      splits[i] = format(rowkey);
-    }
-    return splits;
-  }
-
-  /*
-   * Run all clients in this vm each to its own thread.
-   */
-  static RunResult[] doLocalClients(final TestOptions opts, final Configuration conf)
-      throws IOException, InterruptedException, ExecutionException {
-    final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
-    assert cmd != null;
-    @SuppressWarnings("unchecked")
-    Future<RunResult>[] threads = new Future[opts.numClientThreads];
-    RunResult[] results = new RunResult[opts.numClientThreads];
-    ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
-      new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
-    final Connection con = ConnectionFactory.createConnection(conf);
-    final AsyncConnection asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
-    for (int i = 0; i < threads.length; i++) {
-      final int index = i;
-      threads[i] = pool.submit(new Callable<RunResult>() {
-        @Override
-        public RunResult call() throws Exception {
-          TestOptions threadOpts = new TestOptions(opts);
-          if (threadOpts.startRow == 0) threadOpts.startRow = index * threadOpts.perClientRunRows;
-          RunResult run = runOneClient(cmd, conf, con, asyncCon, threadOpts, new Status() {
-            @Override
-            public void setStatus(final String msg) throws IOException {
-              LOG.info(msg);
-            }
-          });
-          LOG.info("Finished " + Thread.currentThread().getName() + " in " + run.duration +
-            "ms over " + threadOpts.perClientRunRows + " rows");
-          return run;
-        }
-      });
-    }
-    pool.shutdown();
-
-    for (int i = 0; i < threads.length; i++) {
-      try {
-        results[i] = threads[i].get();
-      } catch (ExecutionException e) {
-        throw new IOException(e.getCause());
-      }
-    }
-    final String test = cmd.getSimpleName();
-    LOG.info("[" + test + "] Summary of timings (ms): "
-             + Arrays.toString(results));
-    Arrays.sort(results);
-    long total = 0;
-    for (RunResult result : results) {
-      total += result.duration;
-    }
-    LOG.info("[" + test + "]"
-      + "\tMin: " + results[0] + "ms"
-      + "\tMax: " + results[results.length - 1] + "ms"
-      + "\tAvg: " + (total / results.length) + "ms");
-
-    con.close();
-    asyncCon.close();
-
-    return results;
-  }
-
-  /*
-   * Run a mapreduce job.  Run as many maps as asked-for clients.
-   * Before we start up the job, write out an input file with instruction
-   * per client regards which row they are to start on.
-   * @param cmd Command to run.
-   * @throws IOException
-   */
-  static Job doMapReduce(TestOptions opts, final Configuration conf)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
-    assert cmd != null;
-    Path inputDir = writeInputFile(conf, opts);
-    conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
-    conf.set(EvaluationMapTask.PE_KEY, PerformanceEvaluation.class.getName());
-    Job job = Job.getInstance(conf);
-    job.setJarByClass(PerformanceEvaluation.class);
-    job.setJobName("HBase Performance Evaluation - " + opts.cmdName);
-
-    job.setInputFormatClass(NLineInputFormat.class);
-    NLineInputFormat.setInputPaths(job, inputDir);
-    // this is default, but be explicit about it just in case.
-    NLineInputFormat.setNumLinesPerSplit(job, 1);
-
-    job.setOutputKeyClass(LongWritable.class);
-    job.setOutputValueClass(LongWritable.class);
-
-    job.setMapperClass(EvaluationMapTask.class);
-    job.setReducerClass(LongSumReducer.class);
-
-    job.setNumReduceTasks(1);
-
-    job.setOutputFormatClass(TextOutputFormat.class);
-    TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
-
-    TableMapReduceUtil.addDependencyJars(job);
-    TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-      Histogram.class,     // yammer metrics
-      ObjectMapper.class); // jackson-mapper-asl
-
-    TableMapReduceUtil.initCredentials(job);
-
-    job.waitForCompletion(true);
-    return job;
-  }
-
-  /**
-   * Each client has one mapper to do the work,  and client do the resulting count in a map task.
-   */
-
-  static String JOB_INPUT_FILENAME = "input.txt";
-
-  /*
-   * Write input file of offsets-per-client for the mapreduce job.
-   * @param c Configuration
-   * @return Directory that contains file written whose name is JOB_INPUT_FILENAME
-   * @throws IOException
-   */
-  static Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
-    return writeInputFile(c, opts, new Path("."));
-  }
-
-  static Path writeInputFile(final Configuration c, final TestOptions opts, final Path basedir)
-  throws IOException {
-    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
-    Path jobdir = new Path(new Path(basedir, PERF_EVAL_DIR), formatter.format(new Date()));
-    Path inputDir = new Path(jobdir, "inputs");
-
-    FileSystem fs = FileSystem.get(c);
-    fs.mkdirs(inputDir);
-
-    Path inputFile = new Path(inputDir, JOB_INPUT_FILENAME);
-    PrintStream out = new PrintStream(fs.create(inputFile));
-    // Make input random.
-    Map<Integer, String> m = new TreeMap<>();
-    Hash h = MurmurHash.getInstance();
-    int perClientRows = (opts.totalRows / opts.numClientThreads);
-    try {
-      for (int j = 0; j < opts.numClientThreads; j++) {
-        TestOptions next = new TestOptions(opts);
-        next.startRow = j * perClientRows;
-        next.perClientRunRows = perClientRows;
-        String s = MAPPER.writeValueAsString(next);
-        LOG.info("Client=" + j + ", input=" + s);
-        byte[] b = Bytes.toBytes(s);
-        int hash = h.hash(new ByteArrayHashKey(b, 0, b.length), -1);
-        m.put(hash, s);
-      }
-      for (Map.Entry<Integer, String> e: m.entrySet()) {
-        out.println(e.getValue());
-      }
-    } finally {
-      out.close();
-    }
-    return inputDir;
-  }
-
-  /**
-   * Describes a command.
-   */
-  static class CmdDescriptor {
-    private Class<? extends TestBase> cmdClass;
-    private String name;
-    private String description;
-
-    CmdDescriptor(Class<? extends TestBase> cmdClass, String name, String description) {
-      this.cmdClass = cmdClass;
-      this.name = name;
-      this.description = description;
-    }
-
-    public Class<? extends TestBase> getCmdClass() {
-      return cmdClass;
-    }
-
-    public String getName() {
-      return name;
-    }
-
-    public String getDescription() {
-      return description;
-    }
-  }
-
-  /**
-   * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
-   * This makes tracking all these arguments a little easier.
-   * NOTE: ADDING AN OPTION, you need to add a data member, a getter/setter (to make JSON
-   * serialization of this TestOptions class behave), and you need to add to the clone constructor
-   * below copying your new option from the 'that' to the 'this'.  Look for 'clone' below.
-   */
-  static class TestOptions {
-    String cmdName = null;
-    boolean nomapred = false;
-    boolean filterAll = false;
-    int startRow = 0;
-    float size = 1.0f;
-    int perClientRunRows = DEFAULT_ROWS_PER_GB;
-    int numClientThreads = 1;
-    int totalRows = DEFAULT_ROWS_PER_GB;
-    int measureAfter = 0;
-    float sampleRate = 1.0f;
-    double traceRate = 0.0;
-    String tableName = TABLE_NAME;
-    boolean flushCommits = true;
-    boolean writeToWAL = true;
-    boolean autoFlush = false;
-    boolean oneCon = false;
-    boolean useTags = false;
-    int noOfTags = 1;
-    boolean reportLatency = false;
-    int multiGet = 0;
-    int randomSleep = 0;
-    boolean inMemoryCF = false;
-    int presplitRegions = 0;
-    int replicas = HTableDescriptor.DEFAULT_REGION_REPLICATION;
-    String splitPolicy = null;
-    Compression.Algorithm compression = Compression.Algorithm.NONE;
-    BloomType bloomType = BloomType.ROW;
-    int blockSize = HConstants.DEFAULT_BLOCKSIZE;
-    DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
-    boolean valueRandom = false;
-    boolean valueZipf = false;
-    int valueSize = DEFAULT_VALUE_LENGTH;
-    int period = (this.perClientRunRows / 10) == 0? perClientRunRows: perClientRunRows / 10;
-    int cycles = 1;
-    int columns = 1;
-    int caching = 30;
-    boolean addColumns = true;
-    MemoryCompactionPolicy inMemoryCompaction =
-        MemoryCompactionPolicy.valueOf(
-            CompactingMemStore.COMPACTING_MEMSTORE_TYPE_DEFAULT);
-    boolean asyncPrefetch = false;
-    boolean cacheBlocks = true;
-    Scan.ReadType scanReadType = Scan.ReadType.DEFAULT;
-
-    public TestOptions() {}
-
-    /**
-     * Clone constructor.
-     * @param that Object to copy from.
-     */
-    public TestOptions(TestOptions that) {
-      this.cmdName = that.cmdName;
-      this.cycles = that.cycles;
-      this.nomapred = that.nomapred;
-      this.startRow = that.startRow;
-      this.size = that.size;
-      this.perClientRunRows = that.perClientRunRows;
-      this.numClientThreads = that.numClientThreads;
-      this.totalRows = that.totalRows;
-      this.sampleRate = that.sampleRate;
-      this.traceRate = that.traceRate;
-      this.tableName = that.tableName;
-      this.flushCommits = that.flushCommits;
-      this.writeToWAL = that.writeToWAL;
-      this.autoFlush = that.autoFlush;
-      this.oneCon = that.oneCon;
-      this.useTags = that.useTags;
-      this.noOfTags = that.noOfTags;
-      this.reportLatency = that.reportLatency;
-      this.multiGet = that.multiGet;
-      this.inMemoryCF = that.inMemoryCF;
-      this.presplitRegions = that.presplitRegions;
-      this.replicas = that.replicas;
-      this.splitPolicy = that.splitPolicy;
-      this.compression = that.compression;
-      this.blockEncoding = that.blockEncoding;
-      this.filterAll = that.filterAll;
-      this.bloomType = that.bloomType;
-      this.blockSize = that.blockSize;
-      this.valueRandom = that.valueRandom;
-      this.valueZipf = that.valueZipf;
-      this.valueSize = that.valueSize;
-      this.period = that.period;
-      this.randomSleep = that.randomSleep;
-      this.measureAfter = that.measureAfter;
-      this.addColumns = that.addColumns;
-      this.columns = that.columns;
-      this.caching = that.caching;
-      this.inMemoryCompaction = that.inMemoryCompaction;
-      this.asyncPrefetch = that.asyncPrefetch;
-      this.cacheBlocks = that.cacheBlocks;
-      this.scanReadType = that.scanReadType;
-    }
-
-    public int getCaching() {
-      return this.caching;
-    }
-
-    public void setCaching(final int caching) {
-      this.caching = caching;
-    }
-
-    public int getColumns() {
-      return this.columns;
-    }
-
-    public void setColumns(final int columns) {
-      this.columns = columns;
-    }
-
-    public int getCycles() {
-      return this.cycles;
-    }
-
-    public void setCycles(final int cycles) {
-      this.cycles = cycles;
-    }
-
-    public boolean isValueZipf() {
-      return valueZipf;
-    }
-
-    public void setValueZipf(boolean valueZipf) {
-      this.valueZipf = valueZipf;
-    }
-
-    public String getCmdName() {
-      return cmdName;
-    }
-
-    public void setCmdName(String cmdName) {
-      this.cmdName = cmdName;
-    }
-
-    public int getRandomSleep() {
-      return randomSleep;
-    }
-
-    public void setRandomSleep(int randomSleep) {
-      this.randomSleep = randomSleep;
-    }
-
-    public int getReplicas() {
-      return replicas;
-    }
-
-    public void setReplicas(int replicas) {
-      this.replicas = replicas;
-    }
-
-    public String getSplitPolicy() {
-      return splitPolicy;
-    }
-
-    public void setSplitPolicy(String splitPolicy) {
-      this.splitPolicy = splitPolicy;
-    }
-
-    public void setNomapred(boolean nomapred) {
-      this.nomapred = nomapred;
-    }
-
-    public void setFilterAll(boolean filterAll) {
-      this.filterAll = filterAll;
-    }
-
-    public void setStartRow(int startRow) {
-      this.startRow = startRow;
-    }
-
-    public void setSize(float size) {
-      this.size = size;
-    }
-
-    public void setPerClientRunRows(int perClientRunRows) {
-      this.perClientRunRows = perClientRunRows;
-    }
-
-    public void setNumClientThreads(int numClientThreads) {
-      this.numClientThreads = numClientThreads;
-    }
-
-    public void setTotalRows(int totalRows) {
-      this.totalRows = totalRows;
-    }
-
-    public void setSampleRate(float sampleRate) {
-      this.sampleRate = sampleRate;
-    }
-
-    public void setTraceRate(double traceRate) {
-      this.traceRate = traceRate;
-    }
-
-    public void setTableName(String tableName) {
-      this.tableName = tableName;
-    }
-
-    public void setFlushCommits(boolean flushCommits) {
-      this.flushCommits = flushCommits;
-    }
-
-    public void setWriteToWAL(boolean writeToWAL) {
-      this.writeToWAL = writeToWAL;
-    }
-
-    public void setAutoFlush(boolean autoFlush) {
-      this.autoFlush = autoFlush;
-    }
-
-    public void setOneCon(boolean oneCon) {
-      this.oneCon = oneCon;
-    }
-
-    public void setUseTags(boolean useTags) {
-      this.useTags = useTags;
-    }
-
-    public void setNoOfTags(int noOfTags) {
-      this.noOfTags = noOfTags;
-    }
-
-    public void setReportLatency(boolean reportLatency) {
-      this.reportLatency = reportLatency;
-    }
-
-    public void setMultiGet(int multiGet) {
-      this.multiGet = multiGet;
-    }
-
-    public void setInMemoryCF(boolean inMemoryCF) {
-      this.inMemoryCF = inMemoryCF;
-    }
-
-    public void setPresplitRegions(int presplitRegions) {
-      this.presplitRegions = presplitRegions;
-    }
-
-    public void setCompression(Compression.Algorithm compression) {
-      this.compression = compression;
-    }
-
-    public void setBloomType(BloomType bloomType) {
-      this.bloomType = bloomType;
-    }
-
-    public void setBlockSize(int blockSize) {
-      this.blockSize = blockSize;
-    }
-
-    public void setBlockEncoding(DataBlockEncoding blockEncoding) {
-      this.blockEncoding = blockEncoding;
-    }
-
-    public void setValueRandom(boolean valueRandom) {
-      this.valueRandom = valueRandom;
-    }
-
-    public void setValueSize(int valueSize) {
-      this.valueSize = valueSize;
-    }
-
-    public void setPeriod(int period) {
-      this.period = period;
-    }
-
-    public boolean isNomapred() {
-      return nomapred;
-    }
-
-    public boolean isFilterAll() {
-      return filterAll;
-    }
-
-    public int getStartRow() {
-      return startRow;
-    }
-
-    public float getSize() {
-      return size;
-    }
-
-    public int getPerClientRunRows() {
-      return perClientRunRows;
-    }
-
-    public int getNumClientThreads() {
-      return numClientThreads;
-    }
-
-    public int getTotalRows() {
-      return totalRows;
-    }
-
-    public float getSampleRate() {
-      return sampleRate;
-    }
-
-    public double getTraceRate() {
-      return traceRate;
-    }
-
-    public String getTableName() {
-      return tableName;
-    }
-
-    public boolean isFlushCommits() {
-      return flushCommits;
-    }
-
-    public boolean isWriteToWAL() {
-      return writeToWAL;
-    }
-
-    public boolean isAutoFlush() {
-      return autoFlush;
-    }
-
-    public boolean isUseTags() {
-      return useTags;
-    }
-
-    public int getNoOfTags() {
-      return noOfTags;
-    }
-
-    public boolean isReportLatency() {
-      return reportLatency;
-    }
-
-    public int getMultiGet() {
-      return multiGet;
-    }
-
-    public boolean isInMemoryCF() {
-      return inMemoryCF;
-    }
-
-    public int getPresplitRegions() {
-      return presplitRegions;
-    }
-
-    public Compression.Algorithm getCompression() {
-      return compression;
-    }
-
-    public DataBlockEncoding getBlockEncoding() {
-      return blockEncoding;
-    }
-
-    public boolean isValueRandom() {
-      return valueRandom;
-    }
-
-    public int getValueSize() {
-      return valueSize;
-    }
-
-    public int getPeriod() {
-      return period;
-    }
-
-    public BloomType getBloomType() {
-      return bloomType;
-    }
-
-    public int getBlockSize() {
-      return blockSize;
-    }
-
-    public boolean isOneCon() {
-      return oneCon;
-    }
-
-    public int getMeasureAfter() {
-      return measureAfter;
-    }
-
-    public void setMeasureAfter(int measureAfter) {
-      this.measureAfter = measureAfter;
-    }
-
-    public boolean getAddColumns() {
-      return addColumns;
-    }
-
-    public void setAddColumns(boolean addColumns) {
-      this.addColumns = addColumns;
-    }
-
-    public void setInMemoryCompaction(MemoryCompactionPolicy inMemoryCompaction) {
-      this.inMemoryCompaction = inMemoryCompaction;
-    }
-
-    public MemoryCompactionPolicy getInMemoryCompaction() {
-      return this.inMemoryCompaction;
-    }
-  }
-
-  /*
-   * A test.
-   * Subclass to particularize what happens per row.
-   */
-  static abstract class TestBase {
-    // Below is make it so when Tests are all running in the one
-    // jvm, that they each have a differently seeded Random.
-    private static final Random randomSeed = new Random(System.currentTimeMillis());
-
-    private static long nextRandomSeed() {
-      return randomSeed.nextLong();
-    }
-    private final int everyN;
-
-    protected final Random rand = new Random(nextRandomSeed());
-    protected final Configuration conf;
-    protected final TestOptions opts;
-
-    private final Status status;
-    private final Sampler<?> traceSampler;
-    private final SpanReceiverHost receiverHost;
-
-    private String testName;
-    private Histogram latencyHistogram;
-    private Histogram valueSizeHistogram;
-    private RandomDistribution.Zipf zipf;
-
-    /**
-     * Note that all subclasses of this class must provide a public constructor
-     * that has the exact same list of arguments.
-     */
-    TestBase(final Configuration conf, final TestOptions options, final Status status) {
-      this.conf = conf;
-      this.receiverHost = this.conf == null? null: SpanReceiverHost.getInstance(conf);
-      this.opts = options;
-      this.status = status;
-      this.testName = this.getClass().getSimpleName();
-      if (options.traceRate >= 1.0) {
-        this.traceSampler = Sampler.ALWAYS;
-      } else if (options.traceRate > 0.0) {
-        conf.setDouble("hbase.sampler.fraction", options.traceRate);
-        this.traceSampler = new ProbabilitySampler(new HBaseHTraceConfiguration(conf));
-      } else {
-        this.traceSampler = Sampler.NEVER;
-      }
-      everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
-      if (options.isValueZipf()) {
-        this.zipf = new RandomDistribution.Zipf(this.rand, 1, options.getValueSize(), 1.2);
-      }
-      LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
-    }
-
-    int getValueLength(final Random r) {
-      if (this.opts.isValueRandom()) return Math.abs(r.nextInt() % opts.valueSize);
-      else if (this.opts.isValueZipf()) return Math.abs(this.zipf.nextInt());
-      else return opts.valueSize;
-    }
-
-    void updateValueSize(final Result [] rs) throws IOException {
-      if (rs == null || !isRandomValueSize()) return;
-      for (Result r: rs) updateValueSize(r);
-    }
-
-    void updateValueSize(final Result r) throws IOException {
-      if (r == null || !isRandomValueSize()) return;
-      int size = 0;
-      for (CellScanner scanner = r.cellScanner(); scanner.advance();) {
-        size += scanner.current().getValueLength();
-      }
-      updateValueSize(size);
-    }
-
-    void updateValueSize(final int valueSize) {
-      if (!isRandomValueSize()) return;
-      this.valueSizeHistogram.update(valueSize);
-    }
-
-    String generateStatus(final int sr, final int i, final int lr) {
-      return sr + "/" + i + "/" + lr + ", latency " + getShortLatencyReport() +
-        (!isRandomValueSize()? "": ", value size " + getShortValueSizeReport());
-    }
-
-    boolean isRandomValueSize() {
-      return opts.valueRandom;
-    }
-
-    protected int getReportingPeriod() {
-      return opts.period;
-    }
-
-    /**
-     * Populated by testTakedown. Only implemented by RandomReadTest at the moment.
-     */
-    public Histogram getLatencyHistogram() {
-      return latencyHistogram;
-    }
-
-    void testSetup() throws IOException {
-      createConnection();
-      onStartup();
-      latencyHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
-      valueSizeHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
-    }
-
-    abstract void createConnection() throws IOException;
-
-    abstract void onStartup() throws IOException;
-
-    void testTakedown() throws IOException {
-      onTakedown();
-      // Print all stats for this thread continuously.
-      // Synchronize on Test.class so different threads don't intermingle the
-      // output. We can't use 'this' here because each thread has its own instance of Test class.
-      synchronized (Test.class) {
-        status.setStatus("Test : " + testName + ", Thread : " + Thread.currentThread().getName());
-        status.setStatus("Latency (us) : " + YammerHistogramUtils.getHistogramReport(
-            latencyHistogram));
-        status.setStatus("Num measures (latency) : " + latencyHistogram.getCount());
-        status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(latencyHistogram));
-        status.setStatus("ValueSize (bytes) : "
-            + YammerHistogramUtils.getHistogramReport(valueSizeHistogram));
-        status.setStatus("Num measures (ValueSize): " + valueSizeHistogram.getCount());
-        status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(valueSizeHistogram));
-      }
-      closeConnection();
-      receiverHost.closeReceivers();
-    }
-
-    abstract void onTakedown() throws IOException;
-
-    abstract void closeConnection() throws IOException;
-
-    /*
-     * Run test
-     * @return Elapsed time.
-     * @throws IOException
-     */
-    long test() throws IOException, InterruptedException {
-      testSetup();
-      LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
-      final long startTime = System.nanoTime();
-      try {
-        testTimed();
-      } finally {
-        testTakedown();
-      }
-      return (System.nanoTime() - startTime) / 1000000;
-    }
-
-    int getStartRow() {
-      return opts.startRow;
-    }
-
-    int getLastRow() {
-      return getStartRow() + opts.perClientRunRows;
-    }
-
-    /**
-     * Provides an extension point for tests that don't want a per row invocation.
-     */
-    void testTimed() throws IOException, InterruptedException {
-      int startRow = getStartRow();
-      int lastRow = getLastRow();
-      // Report on completion of 1/10th of total.
-      for (int ii = 0; ii < opts.cycles; ii++) {
-        if (opts.cycles > 1) LOG.info("Cycle=" + ii + " of " + opts.cycles);
-        for (int i = startRow; i < lastRow; i++) {
-          if (i % everyN != 0) continue;
-          long startTime = System.nanoTime();
-          TraceScope scope = Trace.startSpan("test row", traceSampler);
-          try {
-            testRow(i);
-          } finally {
-            scope.close();
-          }
-          if ( (i - startRow) > opts.measureAfter) {
-            // If multiget is enabled, say set to 10, testRow() returns immediately first 9 times
-            // and sends the actual get request in the 10th iteration. We should only set latency
-            // when actual request is sent because otherwise it turns out to be 0.
-            if (opts.multiGet == 0 || (i - startRow + 1) % opts.multiGet == 0) {
-              latencyHistogram.update((System.nanoTime() - startTime) / 1000);
-            }
-            if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
-              status.setStatus(generateStatus(startRow, i, lastRow));
-            }
-          }
-        }
-      }
-    }
-
-    /**
-     * @return Subset of the histograms' calculation.
-     */
-    public String getShortLatencyReport() {
-      return YammerHistogramUtils.getShortHistogramReport(this.latencyHistogram);
-    }
-
-    /**
-     * @return Subset of the histograms' calculation.
-     */
-    public String getShortValueSizeReport() {
-      return YammerHistogramUtils.getShortHistogramReport(this.valueSizeHistogram);
-    }
-
-    /*
-    * Test for individual row.
-    * @param i Row index.
-    */
-    abstract void testRow(final int i) throws IOException, InterruptedException;
-  }
-
-  static abstract class Test extends TestBase {
-    protected Connection connection;
-
-    Test(final Connection con, final TestOptions options, final Status status) {
-      super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
-      this.connection = con;
-    }
-
-    @Override
-    void createConnection() throws IOException {
-      if (!opts.isOneCon()) {
-        this.connection = ConnectionFactory.createConnection(conf);
-      }
-    }
-
-    @Override
-    void closeConnection() throws IOException {
-      if (!opts.isOneCon()) {
-        this.connection.close();
-      }
-    }
-  }
-
-  static abstract class AsyncTest extends TestBase {
-    protected AsyncConnection connection;
-
-    AsyncTest(final AsyncConnection con, final TestOptions options, final Status status) {
-      super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
-      this.connection = con;
-    }
-
-    @Override
-    void createConnection() {
-      if (!opts.isOneCon()) {
-        try {
-          this.connection = ConnectionFactory.createAsyncConnection(conf).get();
-        } catch (InterruptedException | ExecutionException e) {
-          LOG.error("Failed to create async connection", e);
-        }
-      }
-    }
-
-    @Override
-    void closeConnection() throws IOException {
-      if (!opts.isOneCon()) {
-        this.connection.close();
-      }
-    }
-  }
-
-  static abstract class TableTest extends Test {
-    protected Table table;
-
-    TableTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void onStartup() throws IOException {
-      this.table = connection.getTable(TableName.valueOf(opts.tableName));
-    }
-
-    @Override
-    void onTakedown() throws IOException {
-      table.close();
-    }
-  }
-
-  static abstract class AsyncTableTest extends AsyncTest {
-    protected RawAsyncTable table;
-
-    AsyncTableTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void onStartup() throws IOException {
-      this.table = connection.getRawTable(TableName.valueOf(opts.tableName));
-    }
-
-    @Override
-    void onTakedown() throws IOException {
-    }
-  }
-
-  static class AsyncRandomReadTest extends AsyncTableTest {
-    private final Consistency consistency;
-    private ArrayList<Get> gets;
-    private Random rd = new Random();
-
-    AsyncRandomReadTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-      consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
-      if (opts.multiGet > 0) {
-        LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
-        this.gets = new ArrayList<>(opts.multiGet);
-      }
-    }
-
-    @Override
-    void testRow(final int i) throws IOException, InterruptedException {
-      if (opts.randomSleep > 0) {
-        Thread.sleep(rd.nextInt(opts.randomSleep));
-      }
-      Get get = new Get(getRandomRow(this.rand, opts.totalRows));
-      if (opts.addColumns) {
-        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      } else {
-        get.addFamily(FAMILY_NAME);
-      }
-      if (opts.filterAll) {
-        get.setFilter(new FilterAllFilter());
-      }
-      get.setConsistency(consistency);
-      if (LOG.isTraceEnabled()) LOG.trace(get.toString());
-      try {
-        if (opts.multiGet > 0) {
-          this.gets.add(get);
-          if (this.gets.size() == opts.multiGet) {
-            Result[] rs =
-                this.table.get(this.gets).stream().map(f -> propagate(f::get)).toArray(Result[]::new);
-            updateValueSize(rs);
-            this.gets.clear();
-          }
-        } else {
-          updateValueSize(this.table.get(get).get());
-        }
-      } catch (ExecutionException e) {
-        throw new IOException(e);
-      }
-    }
-
-    public static RuntimeException runtime(Throwable e) {
-      if (e instanceof RuntimeException) {
-        return (RuntimeException) e;
-      }
-      return new RuntimeException(e);
-    }
-
-    public static <V> V propagate(Callable<V> callable) {
-      try {
-        return callable.call();
-      } catch (Exception e) {
-        throw runtime(e);
-      }
-    }
-
-    @Override
-    protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 10;
-      return period == 0 ? opts.perClientRunRows : period;
-    }
-
-    @Override
-    protected void testTakedown() throws IOException {
-      if (this.gets != null && this.gets.size() > 0) {
-        this.table.get(gets);
-        this.gets.clear();
-      }
-      super.testTakedown();
-    }
-  }
-
-  static class AsyncRandomWriteTest extends AsyncTableTest {
-    AsyncRandomWriteTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException, InterruptedException {
-      byte[] row = getRandomRow(this.rand, opts.totalRows);
-      Put put = new Put(row);
-      for (int column = 0; column < opts.columns; column++) {
-        byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
-        byte[] value = generateData(this.rand, getValueLength(this.rand));
-        if (opts.useTags) {
-          byte[] tag = generateData(this.rand, TAG_LENGTH);
-          Tag[] tags = new Tag[opts.noOfTags];
-          for (int n = 0; n < opts.noOfTags; n++) {
-            Tag t = new ArrayBackedTag((byte) n, tag);
-            tags[n] = t;
-          }
-          KeyValue kv =
-              new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP, value, tags);
-          put.add(kv);
-          updateValueSize(kv.getValueLength());
-        } else {
-          put.addColumn(FAMILY_NAME, qualifier, value);
-          updateValueSize(value.length);
-        }
-      }
-      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
-      try {
-        table.put(put).get();
-      } catch (ExecutionException e) {
-        throw new IOException(e);
-      }
-    }
-  }
-
-  static class AsyncScanTest extends AsyncTableTest {
-    private ResultScanner testScanner;
-    private AsyncTable asyncTable;
-
-    AsyncScanTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void onStartup() throws IOException {
-      this.asyncTable =
-          connection.getTable(TableName.valueOf(opts.tableName),
-            Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
-    }
-
-    @Override
-    void testTakedown() throws IOException {
-      if (this.testScanner != null) {
-        this.testScanner.close();
-      }
-      super.testTakedown();
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      if (this.testScanner == null) {
-        Scan scan =
-            new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
-                .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
-                .setReadType(opts.scanReadType);
-        if (opts.addColumns) {
-          scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-        } else {
-          scan.addFamily(FAMILY_NAME);
-        }
-        if (opts.filterAll) {
-          scan.setFilter(new FilterAllFilter());
-        }
-        this.testScanner = asyncTable.getScanner(scan);
-      }
-      Result r = testScanner.next();
-      updateValueSize(r);
-    }
-  }
-
-  static class AsyncSequentialReadTest extends AsyncTableTest {
-    AsyncSequentialReadTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException, InterruptedException {
-      Get get = new Get(format(i));
-      if (opts.addColumns) {
-        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      }
-      if (opts.filterAll) {
-        get.setFilter(new FilterAllFilter());
-      }
-      try {
-        updateValueSize(table.get(get).get());
-      } catch (ExecutionException e) {
-        throw new IOException(e);
-      }
-    }
-  }
-
-  static class AsyncSequentialWriteTest extends AsyncTableTest {
-    AsyncSequentialWriteTest(AsyncConnection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException, InterruptedException {
-      byte[] row = format(i);
-      Put put = new Put(row);
-      for (int column = 0; column < opts.columns; column++) {
-        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
-        byte[] value = generateData(this.rand, getValueLength(this.rand));
-        if (opts.useTags) {
-          byte[] tag = generateData(this.rand, TAG_LENGTH);
-          Tag[] tags = new Tag[opts.noOfTags];
-          for (int n = 0; n < opts.noOfTags; n++) {
-            Tag t = new ArrayBackedTag((byte) n, tag);
-            tags[n] = t;
-          }
-          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
-              value, tags);
-          put.add(kv);
-          updateValueSize(kv.getValueLength());
-        } else {
-          put.addColumn(FAMILY_NAME, qualifier, value);
-          updateValueSize(value.length);
-        }
-      }
-      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
-      try {
-        table.put(put).get();
-      } catch (ExecutionException e) {
-        throw new IOException(e);
-      }
-    }
-  }
-
-  static abstract class BufferedMutatorTest extends Test {
-    protected BufferedMutator mutator;
-    protected Table table;
-
-    BufferedMutatorTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void onStartup() throws IOException {
-      this.mutator = connection.getBufferedMutator(TableName.valueOf(opts.tableName));
-      this.table = connection.getTable(TableName.valueOf(opts.tableName));
-    }
-
-    @Override
-    void onTakedown() throws IOException {
-      mutator.close();
-      table.close();
-    }
-  }
-
-  static class RandomSeekScanTest extends TableTest {
-    RandomSeekScanTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      Scan scan = new Scan().withStartRow(getRandomRow(this.rand, opts.totalRows))
-          .setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
-          .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
-      FilterList list = new FilterList();
-      if (opts.addColumns) {
-        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      } else {
-        scan.addFamily(FAMILY_NAME);
-      }
-      if (opts.filterAll) {
-        list.addFilter(new FilterAllFilter());
-      }
-      list.addFilter(new WhileMatchFilter(new PageFilter(120)));
-      scan.setFilter(list);
-      ResultScanner s = this.table.getScanner(scan);
-      for (Result rr; (rr = s.next()) != null;) {
-        updateValueSize(rr);
-      }
-      s.close();
-    }
-
-    @Override
-    protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 100;
-      return period == 0 ? opts.perClientRunRows : period;
-    }
-
-  }
-
-  static abstract class RandomScanWithRangeTest extends TableTest {
-    RandomScanWithRangeTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
-      Scan scan = new Scan().withStartRow(startAndStopRow.getFirst())
-          .withStopRow(startAndStopRow.getSecond()).setCaching(opts.caching)
-          .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
-          .setReadType(opts.scanReadType);
-      if (opts.filterAll) {
-        scan.setFilter(new FilterAllFilter());
-      }
-      if (opts.addColumns) {
-        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      } else {
-        scan.addFamily(FAMILY_NAME);
-      }
-      Result r = null;
-      int count = 0;
-      ResultScanner s = this.table.getScanner(scan);
-      for (; (r = s.next()) != null;) {
-        updateValueSize(r);
-        count++;
-      }
-      if (i % 100 == 0) {
-        LOG.info(String.format("Scan for key range %s - %s returned %s rows",
-            Bytes.toString(startAndStopRow.getFirst()),
-            Bytes.toString(startAndStopRow.getSecond()), count));
-      }
-
-      s.close();
-    }
-
-    protected abstract Pair<byte[],byte[]> getStartAndStopRow();
-
-    protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
-      int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
-      int stop = start + maxRange;
-      return new Pair<>(format(start), format(stop));
-    }
-
-    @Override
-    protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 100;
-      return period == 0? opts.perClientRunRows: period;
-    }
-  }
-
-  static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
-    RandomScanWithRange10Test(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    protected Pair<byte[], byte[]> getStartAndStopRow() {
-      return generateStartAndStopRows(10);
-    }
-  }
-
-  static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
-    RandomScanWithRange100Test(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    protected Pair<byte[], byte[]> getStartAndStopRow() {
-      return generateStartAndStopRows(100);
-    }
-  }
-
-  static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
-    RandomScanWithRange1000Test(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    protected Pair<byte[], byte[]> getStartAndStopRow() {
-      return generateStartAndStopRows(1000);
-    }
-  }
-
-  static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
-    RandomScanWithRange10000Test(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    protected Pair<byte[], byte[]> getStartAndStopRow() {
-      return generateStartAndStopRows(10000);
-    }
-  }
-
-  static class RandomReadTest extends TableTest {
-    private final Consistency consistency;
-    private ArrayList<Get> gets;
-    private Random rd = new Random();
-
-    RandomReadTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-      consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
-      if (opts.multiGet > 0) {
-        LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
-        this.gets = new ArrayList<>(opts.multiGet);
-      }
-    }
-
-    @Override
-    void testRow(final int i) throws IOException, InterruptedException {
-      if (opts.randomSleep > 0) {
-        Thread.sleep(rd.nextInt(opts.randomSleep));
-      }
-      Get get = new Get(getRandomRow(this.rand, opts.totalRows));
-      if (opts.addColumns) {
-        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      } else {
-        get.addFamily(FAMILY_NAME);
-      }
-      if (opts.filterAll) {
-        get.setFilter(new FilterAllFilter());
-      }
-      get.setConsistency(consistency);
-      if (LOG.isTraceEnabled()) LOG.trace(get.toString());
-      if (opts.multiGet > 0) {
-        this.gets.add(get);
-        if (this.gets.size() == opts.multiGet) {
-          Result [] rs = this.table.get(this.gets);
-          updateValueSize(rs);
-          this.gets.clear();
-        }
-      } else {
-        updateValueSize(this.table.get(get));
-      }
-    }
-
-    @Override
-    protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 10;
-      return period == 0 ? opts.perClientRunRows : period;
-    }
-
-    @Override
-    protected void testTakedown() throws IOException {
-      if (this.gets != null && this.gets.size() > 0) {
-        this.table.get(gets);
-        this.gets.clear();
-      }
-      super.testTakedown();
-    }
-  }
-
-  static class RandomWriteTest extends BufferedMutatorTest {
-    RandomWriteTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte[] row = getRandomRow(this.rand, opts.totalRows);
-      Put put = new Put(row);
-      for (int column = 0; column < opts.columns; column++) {
-        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
-        byte[] value = generateData(this.rand, getValueLength(this.rand));
-        if (opts.useTags) {
-          byte[] tag = generateData(this.rand, TAG_LENGTH);
-          Tag[] tags = new Tag[opts.noOfTags];
-          for (int n = 0; n < opts.noOfTags; n++) {
-            Tag t = new ArrayBackedTag((byte) n, tag);
-            tags[n] = t;
-          }
-          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
-              value, tags);
-          put.add(kv);
-          updateValueSize(kv.getValueLength());
-        } else {
-          put.addColumn(FAMILY_NAME, qualifier, value);
-          updateValueSize(value.length);
-        }
-      }
-      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
-      if (opts.autoFlush) {
-        table.put(put);
-      } else {
-        mutator.mutate(put);
-      }
-    }
-  }
-
-  static class ScanTest extends TableTest {
-    private ResultScanner testScanner;
-
-    ScanTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testTakedown() throws IOException {
-      if (this.testScanner != null) {
-        this.testScanner.close();
-      }
-      super.testTakedown();
-    }
-
-
-    @Override
-    void testRow(final int i) throws IOException {
-      if (this.testScanner == null) {
-        Scan scan = new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
-            .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
-            .setReadType(opts.scanReadType);
-        if (opts.addColumns) {
-          scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-        } else {
-          scan.addFamily(FAMILY_NAME);
-        }
-        if (opts.filterAll) {
-          scan.setFilter(new FilterAllFilter());
-        }
-        this.testScanner = table.getScanner(scan);
-      }
-      Result r = testScanner.next();
-      updateValueSize(r);
-    }
-  }
-
-  /**
-   * Base class for operations that are CAS-like; that read a value and then set it based off what
-   * they read. In this category is increment, append, checkAndPut, etc.
-   *
-   * <p>These operations also want some concurrency going on. Usually when these tests run, they
-   * operate in their own part of the key range. In CASTest, we will have them all overlap on the
-   * same key space. We do this with our getStartRow and getLastRow overrides.
-   */
-  static abstract class CASTableTest extends TableTest {
-    private final byte [] qualifier;
-    CASTableTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-      qualifier = Bytes.toBytes(this.getClass().getSimpleName());
-    }
-
-    byte [] getQualifier() {
-      return this.qualifier;
-    }
-
-    @Override
-    int getStartRow() {
-      return 0;
-    }
-
-    @Override
-    int getLastRow() {
-      return opts.perClientRunRows;
-    }
-  }
-
-  static class IncrementTest extends CASTableTest {
-    IncrementTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      Increment increment = new Increment(format(i));
-      increment.addColumn(FAMILY_NAME, getQualifier(), 1l);
-      updateValueSize(this.table.increment(increment));
-    }
-  }
-
-  static class AppendTest extends CASTableTest {
-    AppendTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte [] bytes = format(i);
-      Append append = new Append(bytes);
-      append.addColumn(FAMILY_NAME, getQualifier(), bytes);
-      updateValueSize(this.table.append(append));
-    }
-  }
-
-  static class CheckAndMutateTest extends CASTableTest {
-    CheckAndMutateTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte [] bytes = format(i);
-      // Put a known value so when we go to check it, it is there.
-      Put put = new Put(bytes);
-      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
-      this.table.put(put);
-      RowMutations mutations = new RowMutations(bytes);
-      mutations.add(put);
-      this.table.checkAndMutate(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes,
-          mutations);
-    }
-  }
-
-  static class CheckAndPutTest extends CASTableTest {
-    CheckAndPutTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte [] bytes = format(i);
-      // Put a known value so when we go to check it, it is there.
-      Put put = new Put(bytes);
-      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
-      this.table.put(put);
-      this.table.checkAndPut(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, put);
-    }
-  }
-
-  static class CheckAndDeleteTest extends CASTableTest {
-    CheckAndDeleteTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte [] bytes = format(i);
-      // Put a known value so when we go to check it, it is there.
-      Put put = new Put(bytes);
-      put.addColumn(FAMILY_NAME, getQualifier(), bytes);
-      this.table.put(put);
-      Delete delete = new Delete(put.getRow());
-      delete.addColumn(FAMILY_NAME, getQualifier());
-      this.table.checkAndDelete(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, delete);
-    }
-  }
-
-  static class SequentialReadTest extends TableTest {
-    SequentialReadTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      Get get = new Get(format(i));
-      if (opts.addColumns) {
-        get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      }
-      if (opts.filterAll) {
-        get.setFilter(new FilterAllFilter());
-      }
-      updateValueSize(table.get(get));
-    }
-  }
-
-  static class SequentialWriteTest extends BufferedMutatorTest {
-    SequentialWriteTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(final int i) throws IOException {
-      byte[] row = format(i);
-      Put put = new Put(row);
-      for (int column = 0; column < opts.columns; column++) {
-        byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
-        byte[] value = generateData(this.rand, getValueLength(this.rand));
-        if (opts.useTags) {
-          byte[] tag = generateData(this.rand, TAG_LENGTH);
-          Tag[] tags = new Tag[opts.noOfTags];
-          for (int n = 0; n < opts.noOfTags; n++) {
-            Tag t = new ArrayBackedTag((byte) n, tag);
-            tags[n] = t;
-          }
-          KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
-              value, tags);
-          put.add(kv);
-          updateValueSize(kv.getValueLength());
-        } else {
-          put.addColumn(FAMILY_NAME, qualifier, value);
-          updateValueSize(value.length);
-        }
-      }
-      put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
-      if (opts.autoFlush) {
-        table.put(put);
-      } else {
-        mutator.mutate(put);
-      }
-    }
-  }
-
-  static class FilteredScanTest extends TableTest {
-    protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
-
-    FilteredScanTest(Connection con, TestOptions options, Status status) {
-      super(con, options, status);
-    }
-
-    @Override
-    void testRow(int i) throws IOException {
-      byte[] value = generateData(this.rand, getValueLength(this.rand));
-      Scan scan = constructScan(value);
-      ResultScanner scanner = null;
-      try {
-        scanner = this.table.getScanner(scan);
-        for (Result r = null; (r = scanner.next()) != null;) {
-          updateValueSize(r);
-        }
-      } finally {
-        if (scanner != null) scanner.close();
-      }
-    }
-
-    protected Scan constructScan(byte[] valuePrefix) throws IOException {
-      FilterList list = new FilterList();
-      Filter filter = new SingleColumnValueFilter(
-          FAMILY_NAME, COLUMN_ZERO, CompareFilter.CompareOp.EQUAL,
-          new BinaryComparator(valuePrefix)
-      );
-      list.addFilter(filter);
-      if(opts.filterAll) {
-        list.addFilter(new FilterAllFilter());
-      }
-      Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
-          .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
-      if (opts.addColumns) {
-        scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      } else {
-        scan.addFamily(FAMILY_NAME);
-      }
-      scan.setFilter(list);
-      return scan;
-    }
-  }
-
-  /**
-   * Compute a throughput rate in MB/s.
-   * @param rows Number of records consumed.
-   * @param timeMs Time taken in milliseconds.
-   * @return String value with label, ie '123.76 MB/s'
-   */
-  private static String calculateMbps(int rows, long timeMs, final int valueSize, int columns) {
-    BigDecimal rowSize = BigDecimal.valueOf(ROW_LENGTH +
-      ((valueSize + FAMILY_NAME.length + COLUMN_ZERO.length) * columns));
-    BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
-      .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
-      .divide(BYTES_PER_MB, CXT);
-    return FMT.format(mbps) + " MB/s";
-  }
-
-  /*
-   * Format passed integer.
-   * @param number
-   * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
-   * number (Does absolute in case number is negative).
-   */
-  public static byte [] format(final int number) {
-    byte [] b = new byte[ROW_LENGTH];
-    int d = Math.abs(number);
-    for (int i = b.length - 1; i >= 0; i--) {
-      b[i] = (byte)((d % 10) + '0');
-      d /= 10;
-    }
-    return b;
-  }
-
-  /*
-   * This method takes some time and is done inline uploading data.  For
-   * example, doing the mapfile test, generation of the key and value
-   * consumes about 30% of CPU time.
-   * @return Generated random value to insert into a table cell.
-   */
-  public static byte[] generateData(final Random r, int length) {
-    byte [] b = new byte [length];
-    int i;
-
-    for(i = 0; i < (length-8); i += 8) {
-      b[i] = (byte) (65 + r.nextInt(26));
-      b[i+1] = b[i];
-      b[i+2] = b[i];
-      b[i+3] = b[i];
-      b[i+4] = b[i];
-      b[i+5] = b[i];
-      b[i+6] = b[i];
-      b[i+7] = b[i];
-    }
-
-    byte a = (byte) (65 + r.nextInt(26));
-    for(; i < length; i++) {
-      b[i] = a;
-    }
-    return b;
-  }
-
-  static byte [] getRandomRow(final Random random, final int totalRows) {
-    return format(generateRandomRow(random, totalRows));
-  }
-
-  static int generateRandomRow(final Random random, final int totalRows) {
-    return random.nextInt(Integer.MAX_VALUE) % totalRows;
-  }
-
-  static RunResult runOneClient(final Class<? extends TestBase> cmd, Configuration conf,
-      Connection con, AsyncConnection asyncCon, TestOptions opts, final Status status)
-      throws IOException, InterruptedException {
-    status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for "
-        + opts.perClientRunRows + " rows");
-    long totalElapsedTime;
-
-    final TestBase t;
-    try {
-      if (AsyncTest.class.isAssignableFrom(cmd)) {
-        Class<? extends AsyncTest> newCmd = (Class<? extends AsyncTest>) cmd;
-        Constructor<? extends AsyncTest> constructor =
-            newCmd.getDeclaredConstructor(AsyncConnection.class, TestOptions.class, Status.class);
-        t = constructor.newInstance(asyncCon, opts, status);
-      } else {
-        Class<? extends Test> newCmd = (Class<? extends Test>) cmd;
-        Constructor<? extends Test> constructor =
-            newCmd.getDeclaredConstructor(Connection.class, TestOptions.class, Status.class);
-        t = constructor.newInstance(con, opts, status);
-      }
-    } catch (NoSuchMethodException e) {
-      throw new IllegalArgumentException("Invalid command class: " + cmd.getName()
-          + ".  It does not provide a constructor as described by "
-          + "the javadoc comment.  Available constructors are: "
-          + Arrays.toString(cmd.getConstructors()));
-    } catch (Exception e) {
-      throw new IllegalStateException("Failed to construct command class", e);
-    }
-    totalElapsedTime = t.test();
-
-    status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
-      "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
-      " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime,
-          getAverageValueLength(opts), opts.columns) + ")");
-
-    return new RunResult(totalElapsedTime, t.getLatencyHistogram());
-  }
-
-  private static int getAverageValueLength(final TestOptions opts) {
-    return opts.valueRandom? opts.valueSize/2: opts.valueSize;
-  }
-
-  private void runTest(final Class<? extends TestBase> cmd, TestOptions opts) throws IOException,
-      InterruptedException, ClassNotFoundException, ExecutionException {
-    // Log the configuration we're going to run with. Uses JSON mapper because lazy. It'll do
-    // the TestOptions introspection for us and dump the output in a readable format.
-    LOG.info(cmd.getSimpleName() + " test run options=" + MAPPER.writeValueAsString(opts));
-    Admin admin = null;
-    Connection connection = null;
-    try {
-      connection = ConnectionFactory.createConnection(getConf());
-      admin = connection.getAdmin();
-      checkTable(admin, opts);
-    } finally {
-      if (admin != null) admin.close();
-      if (connection != null) connection.close();
-    }
-    if (opts.nomapred) {
-      doLocalClients(opts, getConf());
-    } else {
-      doMapReduce(opts, getConf());
-    }
-  }
-
-  protected void printUsage() {
-    printUsage(this.getClass().getName(), null);
-  }
-
-  protected static void printUsage(final String message) {
-    printUsage(PerformanceEvaluation.class.getName(), message);
-  }
-
-  protected static void printUsageAndExit(final String message, final int exitCode) {
-    printUsage(message);
-    System.exit(exitCode);
-  }
-
-  protected static void printUsage(final String className, final String message) {
-    if (message != null && message.length() > 0) {
-      System.err.println(message);
-    }
-    System.err.println("Usage: java " + className + " \\");
-    System.err.println("  <OPTIONS> [-D<property=value>]* <command> <nclients>");
-    System.err.println();
-    System.err.println("General Options:");
-    System.err.println(" nomapred        Run multiple clients using threads " +
-      "(rather than use mapreduce)");
-    System.err.println(" oneCon          all the threads share the same connection. Default: False");
-    System.err.println(" sampleRate      Execute test on a sample of total " +
-      "rows. Only supported by randomRead. Default: 1.0");
-    System.err.println(" period          Report every 'period' rows: " +
-      "Default: opts.perClientRunRows / 10 = " + DEFAULT_OPTS.getPerClientRunRows()/10);
-    System.err.println(" cycles          How many times to cycle the test. Defaults: 1.");
-    System.err.println(" traceRate       Enable HTrace spans. Initiate tracing every N rows. " +
-      "Default: 0");
-    System.err.println(" latency         Set to report operation latencies. Default: False");
-    System.err.println(" measureAfter    Start to measure the latency once 'measureAfter'" +
-        " rows have been treated. Default: 0");
-    System.err.println(" valueSize       Pass value size to use: Default: "
-        + DEFAULT_OPTS.getValueSize());
-    System.err.println(" valueRandom     Set if we should vary value size between 0 and " +
-        "'valueSize'; set on read for stats on size: Default: Not set.");
-    System.err.println(" blockEncoding   Block encoding to use. Value should be one of "
-        + Arrays.toString(DataBlockEncoding.values()) + ". Default: NONE");
-    System.err.println();
-    System.err.println("Table Creation / Write Tests:");
-    System.err.println(" table           Alternate table name. Default: 'TestTable'");
-    System.err.println(" rows            Rows each client runs. Default: "
-        + DEFAULT_OPTS.getPerClientRunRows()
-        + ".  In case of randomReads and randomSeekScans this could"
-        + " be specified along with --size to specify the number of rows to be scanned within"
-        + " the total range specified by the size.");
-    System.err.println(
-      " size            Total size in GiB. Mutually exclusive with --rows for writes and scans"
-          + ". But for randomReads and randomSeekScans when you use size with --rows you could"
-          + " use size to specify the end range and --rows"
-          + " specifies the number of rows within that range. " + "Default: 1.0.");
-    System.err.println(" compress        Compression type to use (GZ, LZO, ...). Default: 'NONE'");
-    System.err.println(" flushCommits    Used to determine if the test should flush the table. " +
-      "Default: false");
-    System.err.println(" valueZipf       Set if we should vary value size between 0 and " +
-        "'valueSize' in zipf form: Default: Not set.");
-    System.err.println(" writeToWAL      Set writeToWAL on puts. Default: True");
-    System.err.println(" autoFlush       Set autoFlush on htable. Default: False");
-    System.err.println(" presplit        Create presplit table. If a table with same name exists,"
-        + " it'll be deleted and recreated (instead of verifying count of its existing regions). "
-        + "Recommended for accurate perf analysis (see guide). Default: disabled");
-    System.err.println(" usetags         Writes tags along with KVs. Use with HFile V3. " +
-      "Default: false");
-    System.err.println(" numoftags       Specify the no of tags that would be needed. " +
-       "This works only if usetags is true. Default: " + DEFAULT_OPTS.noOfTags);
-    System.err.println(" splitPolicy     Specify a custom RegionSplitPolicy for the table.");
-    System.err.println(" columns         Columns to write per row. Default: 1");
-    System.err.println();
-    System.err.println("Read Tests:");
-    System.err.println(" filterAll       Helps to filter out all the rows on the server side"
-        + " there by not returning any thing back to the client.  Helps to check the server side"
-        + " performance.  Uses FilterAllFilter internally. ");
-    System.err.println(" multiGet        Batch gets together into groups of N. Only supported " +
-      "by randomRead. Default: disabled");
-    System.err.println(" inmemory        Tries to keep the HFiles of the CF " +
-      "inmemory as far as possible. Not guaranteed that reads are always served " +
-      "from memory.  Default: false");
-    System.err.println(" bloomFilter     Bloom filter type, one of "
-        + Arrays.toString(BloomType.values()));
-    System.err.println(" blockSize       Blocksize to use when writing out hfiles. ");
-    System.err.println(" inmemoryCompaction  Makes the column family to do inmemory flushes/compactions. "
-        + "Uses the CompactingMemstore");
-    System.err.println(" addColumns      Adds columns to scans/gets explicitly. Default: true");
-    System.err.println(" replicas        Enable region replica testing. Defaults: 1.");
-    System.err.println(" randomSleep     Do a random sleep before each get between 0 and entered value. Defaults: 0");
-    System.err.println(" caching         Scan caching to use. Default: 30");
-    System.err.println(" asyncPrefetch   Enable asyncPrefetch for scan");
-    System.err.println(" cacheBlocks     Set the cacheBlocks option for scan. Default: true");
-    System.err.println(" scanReadType    Set the readType option for scan, stream/pread/default. Default: default");
-    System.err.println();
-    System.err.println(" Note: -D properties will be applied to the conf used. ");
-    System.err.println("  For example: ");
-    System.err.println("   -Dmapreduce.output.fileoutputformat.compress=true");
-    System.err.println("   -Dmapreduce.task.timeout=60000");
-    System.err.println();
-    System.err.println("Command:");
-    for (CmdDescriptor command : COMMANDS.values()) {
-      System.err.println(String.format(" %-20s %s", command.getName(), command.getDescription()));
-    }
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" nclients        Integer. Required. Total number of clients "
-        + "(and HRegionServers) running. 1 <= value <= 500");
-    System.err.println("Examples:");
-    System.err.println(" To run a single client doing the default 1M sequentialWrites:");
-    System.err.println(" $ hbase " + className + " sequentialWrite 1");
-    System.err.println(" To run 10 clients doing increments over ten rows:");
-    System.err.println(" $ hbase " + className + " --rows=10 --nomapred increment 10");
-  }
-
-  /**
-   * Parse options passed in via an arguments array. Assumes that array has been split
-   * on white-space and placed into a {@code Queue}. Any unknown arguments will remain
-   * in the queue at the conclusion of this method call. It's up to the caller to deal
-   * with these unrecognized arguments.
-   */
-  static TestOptions parseOpts(Queue<String> args) {
-    TestOptions opts = new TestOptions();
-
-    String cmd = null;
-    while ((cmd = args.poll()) != null) {
-      if (cmd.equals("-h") || cmd.startsWith("--h")) {
-        // place item back onto queue so that caller knows parsing was incomplete
-        args.add(cmd);
-        break;
-      }
-
-      final String nmr = "--nomapred";
-      if (cmd.startsWith(nmr)) {
-        opts.nomapred = true;
-        continue;
-      }
-
-      final String rows = "--rows=";
-      if (cmd.startsWith(rows)) {
-        opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
-        continue;
-      }
-
-      final String cycles = "--cycles=";
-      if (cmd.startsWith(cycles)) {
-        opts.cycles = Integer.parseInt(cmd.substring(cycles.length()));
-        continue;
-      }
-
-      final String sampleRate = "--sampleRate=";
-      if (cmd.startsWith(sampleRate)) {
-        opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
-        continue;
-      }
-
-      final String table = "--table=";
-      if (cmd.startsWith(table)) {
-        opts.tableName = cmd.substring(table.length());
-        continue;
-      }
-
-      final String startRow = "--startRow=";
-      if (cmd.startsWith(startRow)) {
-        opts.startRow = Integer.parseInt(cmd.substring(startRow.length()));
-        continue;
-      }
-
-      final String compress = "--compress=";
-      if (cmd.startsWith(compress)) {
-        opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
-        continue;
-      }
-
-      final String traceRate = "--traceRate=";
-      if (cmd.startsWith(traceRate)) {
-        opts.traceRate = Double.parseDouble(cmd.substring(traceRate.length()));
-        continue;
-      }
-
-      final String blockEncoding = "--blockEncoding=";
-      if (cmd.startsWith(blockEncoding)) {
-        opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
-        continue;
-      }
-
-      final String flushCommits = "--flushCommits=";
-      if (cmd.startsWith(flushCommits)) {
-        opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
-        continue;
-      }
-
-      final String writeToWAL = "--writeToWAL=";
-      if (cmd.startsWith(writeToWAL)) {
-        opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
-        continue;
-      }
-
-      final String presplit = "--presplit=";
-      if (cmd.startsWith(presplit)) {
-        opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
-        continue;
-      }
-
-      final String inMemory = "--inmemory=";
-      if (cmd.startsWith(inMemory)) {
-        opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
-        continue;
-      }
-
-      final String autoFlush = "--autoFlush=";
-      if (cmd.startsWith(autoFlush)) {
-        opts.autoFlush = Boolean.parseBoolean(cmd.substring(autoFlush.length()));
-        continue;
-      }
-
-      final String onceCon = "--oneCon=";
-      if (cmd.startsWith(onceCon)) {
-        opts.oneCon = Boolean.parseBoolean(cmd.substring(onceCon.length()));
-        continue;
-      }
-
-      final String latency = "--latency";
-      if (cmd.startsWith(latency)) {
-        opts.reportLatency = true;
-        continue;
-      }
-
-      final String multiGet = "--multiGet=";
-      if (cmd.startsWith(multiGet)) {
-        opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
-        continue;
-      }
-
-      final String useTags = "--usetags=";
-      if (cmd.startsWith(useTags)) {
-        opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
-        continue;
-      }
-
-      final String noOfTags = "--numoftags=";
-      if (cmd.startsWith(noOfTags)) {
-        opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
-        continue;
-      }
-
-      final String replicas = "--replicas=";
-      if (cmd.startsWith(replicas)) {
-        opts.replicas = Integer.parseInt(cmd.substring(replicas.length()));
-        continue;
-      }
-
-      final String filterOutAll = "--filterAll";
-      if (cmd.startsWith(filterOutAll)) {
-        opts.filterAll = true;
-        continue;
-      }
-
-      final String size = "--size=";
-      if (cmd.startsWith(size)) {
-        opts.size = Float.parseFloat(cmd.substring(size.length()));
-        if (opts.size <= 1.0f) throw new IllegalStateException("Size must be > 1; i.e. 1GB");
-        continue;
-      }
-
-      final String splitPolicy = "--splitPolicy=";
-      if (cmd.startsWith(splitPolicy)) {
-        opts.splitPolicy = cmd.substring(splitPolicy.length());
-        continue;
-      }
-
-      final String randomSleep = "--randomSleep=";
-      if (cmd.startsWith(randomSleep)) {
-        opts.randomSleep = Integer.parseInt(cmd.substring(randomSleep.length()));
-        continue;
-      }
-
-      final String measureAfter = "--measureAfter=";
-      if (cmd.startsWith(measureAfter)) {
-        opts.measureAfter = Integer.parseInt(cmd.substring(measureAfter.length()));
-        continue;
-      }
-
-      final String bloomFilter = "--bloomFilter=";
-      if (cmd.startsWith(bloomFilter)) {
-        opts.bloomType = BloomType.valueOf(cmd.substring(bloomFilter.length()));
-        continue;
-      }
-
-      final String blockSize = "--blockSize=";
-      if(cmd.startsWith(blockSize) ) {
-        opts.blockSize = Integer.parseInt(cmd.substring(blockSize.length()));
-      }
-
-      final String valueSize = "--valueSize=";
-      if (cmd.startsWith(valueSize)) {
-        opts.valueSize = Integer.parseInt(cmd.substring(valueSize.length()));
-        continue;
-      }
-
-      final String valueRandom = "--valueRandom";
-      if (cmd.startsWith(valueRandom)) {
-        opts.valueRandom = true;
-        if (opts.valueZipf) {
-          throw new IllegalStateException("Either valueZipf or valueRandom but not both");
-        }
-        continue;
-      }
-
-      final String valueZipf = "--valueZipf";
-      if (cmd.startsWith(valueZipf)) {
-        opts.valueZipf = true;
-        if (opts.valueRandom) {
-          throw new IllegalStateException("Either valueZipf or valueRandom but not both");
-        }
-        continue;
-      }
-
-      final String period = "--period=";
-      if (cmd.startsWith(period)) {
-        opts.period = Integer.parseInt(cmd.substring(period.length()));
-        continue;
-      }
-
-      final String addColumns = "--addColumns=";
-      if (cmd.startsWith(addColumns)) {
-        opts.addColumns = Boolean.parseBoolean(cmd.substring(addColumns.length

<TRUNCATED>

[03/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
deleted file mode 100644
index 0f49333..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-
-
-/**
- * <p>
- * Tests various scan start and stop row scenarios. This is set in a scan and
- * tested in a MapReduce job to see if that is handed over and done properly
- * too.
- * </p>
- * <p>
- * This test is broken into two parts in order to side-step the test timeout
- * period of 900, as documented in HBASE-8326.
- * </p>
- */
-public abstract class TestTableInputFormatScanBase {
-
-  private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
-  static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
-  static final TableName TABLE_NAME = TableName.valueOf("scantest");
-  static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
-  static final String KEY_STARTROW = "startRow";
-  static final String KEY_LASTROW = "stpRow";
-
-  private static Table table = null;
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
-    // this turns it off for this test.  TODO: Figure out why scr breaks recovery. 
-    System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
-
-    // switch TIF to log at DEBUG level
-    TEST_UTIL.enableDebug(TableInputFormat.class);
-    TEST_UTIL.enableDebug(TableInputFormatBase.class);
-    // start mini hbase cluster
-    TEST_UTIL.startMiniCluster(3);
-    // create and fill table
-    table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILYS);
-    TEST_UTIL.loadTable(table, INPUT_FAMILYS, null, false);
-  }
-
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Pass the key and value to reduce.
-   */
-  public static class ScanMapper
-  extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
-
-    /**
-     * Pass the key and value to reduce.
-     *
-     * @param key  The key, here "aaa", "aab" etc.
-     * @param value  The value is the same as the key.
-     * @param context  The task context.
-     * @throws IOException When reading the rows fails.
-     */
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-      Context context)
-    throws IOException, InterruptedException {
-      if (value.size() != 2) {
-        throw new IOException("There should be two input columns");
-      }
-      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
-        cfMap = value.getMap();
-
-      if (!cfMap.containsKey(INPUT_FAMILYS[0]) || !cfMap.containsKey(INPUT_FAMILYS[1])) {
-        throw new IOException("Wrong input columns. Missing: '" +
-          Bytes.toString(INPUT_FAMILYS[0]) + "' or '" + Bytes.toString(INPUT_FAMILYS[1]) + "'.");
-      }
-
-      String val0 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[0], null));
-      String val1 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[1], null));
-      LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
-               ", value -> (" + val0 + ", " + val1 + ")");
-      context.write(key, key);
-    }
-  }
-
-  /**
-   * Checks the last and first key seen against the scanner boundaries.
-   */
-  public static class ScanReducer
-  extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
-                  NullWritable, NullWritable> {
-
-    private String first = null;
-    private String last = null;
-
-    protected void reduce(ImmutableBytesWritable key,
-        Iterable<ImmutableBytesWritable> values, Context context)
-    throws IOException ,InterruptedException {
-      int count = 0;
-      for (ImmutableBytesWritable value : values) {
-        String val = Bytes.toStringBinary(value.get());
-        LOG.info("reduce: key[" + count + "] -> " +
-          Bytes.toStringBinary(key.get()) + ", value -> " + val);
-        if (first == null) first = val;
-        last = val;
-        count++;
-      }
-    }
-
-    protected void cleanup(Context context)
-    throws IOException, InterruptedException {
-      Configuration c = context.getConfiguration();
-      String startRow = c.get(KEY_STARTROW);
-      String lastRow = c.get(KEY_LASTROW);
-      LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
-      LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
-      if (startRow != null && startRow.length() > 0) {
-        assertEquals(startRow, first);
-      }
-      if (lastRow != null && lastRow.length() > 0) {
-        assertEquals(lastRow, last);
-      }
-    }
-
-  }
-
-  /**
-   * Tests an MR Scan initialized from properties set in the Configuration.
-   * 
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  protected void testScanFromConfiguration(String start, String stop, String last)
-  throws IOException, InterruptedException, ClassNotFoundException {
-    String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
-      "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
-    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
-    c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString());
-    c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", "
-          + Bytes.toString(INPUT_FAMILYS[1]));
-    c.set(KEY_STARTROW, start != null ? start : "");
-    c.set(KEY_LASTROW, last != null ? last : "");
-
-    if (start != null) {
-      c.set(TableInputFormat.SCAN_ROW_START, start);
-    }
-
-    if (stop != null) {
-      c.set(TableInputFormat.SCAN_ROW_STOP, stop);
-    }
-
-    Job job = new Job(c, jobName);
-    job.setMapperClass(ScanMapper.class);
-    job.setReducerClass(ScanReducer.class);
-    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-    job.setMapOutputValueClass(ImmutableBytesWritable.class);
-    job.setInputFormatClass(TableInputFormat.class);
-    job.setNumReduceTasks(1);
-    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
-    TableMapReduceUtil.addDependencyJars(job);
-    assertTrue(job.waitForCompletion(true));
-  }
-
-  /**
-   * Tests a MR scan using specific start and stop rows.
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  protected void testScan(String start, String stop, String last)
-  throws IOException, InterruptedException, ClassNotFoundException {
-    String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
-      "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
-    LOG.info("Before map/reduce startup - job " + jobName);
-    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
-    Scan scan = new Scan();
-    scan.addFamily(INPUT_FAMILYS[0]);
-    scan.addFamily(INPUT_FAMILYS[1]);
-    if (start != null) {
-      scan.setStartRow(Bytes.toBytes(start));
-    }
-    c.set(KEY_STARTROW, start != null ? start : "");
-    if (stop != null) {
-      scan.setStopRow(Bytes.toBytes(stop));
-    }
-    c.set(KEY_LASTROW, last != null ? last : "");
-    LOG.info("scan before: " + scan);
-    Job job = new Job(c, jobName);
-    TableMapReduceUtil.initTableMapperJob(
-      TABLE_NAME, scan, ScanMapper.class,
-      ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
-    job.setReducerClass(ScanReducer.class);
-    job.setNumReduceTasks(1); // one to get final "first" and "last" key
-    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
-    LOG.info("Started " + job.getJobName());
-    assertTrue(job.waitForCompletion(true));
-    LOG.info("After map/reduce completion - job " + jobName);
-  }
-
-
-  /**
-   * Tests a MR scan using data skew auto-balance
-   *
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   */
-  public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
-          InterruptedException,
-          ClassNotFoundException {
-    String jobName = "TestJobForNumOfSplits";
-    LOG.info("Before map/reduce startup - job " + jobName);
-    Configuration c = new Configuration(TEST_UTIL.getConfiguration());
-    Scan scan = new Scan();
-    scan.addFamily(INPUT_FAMILYS[0]);
-    scan.addFamily(INPUT_FAMILYS[1]);
-    c.set("hbase.mapreduce.input.autobalance", "true");
-    c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
-    c.set(KEY_STARTROW, "");
-    c.set(KEY_LASTROW, "");
-    Job job = new Job(c, jobName);
-    TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
-            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
-    TableInputFormat tif = new TableInputFormat();
-    tif.setConf(job.getConfiguration());
-    Assert.assertEquals(TABLE_NAME, table.getName());
-    List<InputSplit> splits = tif.getSplits(job);
-    Assert.assertEquals(expectedNumOfSplits, splits.size());
-  }
-
-  /**
-   * Tests for the getSplitKey() method in TableInputFormatBase.java
-   */
-  public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
-    byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
-      Assert.assertArrayEquals(splitKey, result);
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
deleted file mode 100644
index d702e0d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableMapReduce extends TestTableMapReduceBase {
-  private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
-
-  @Override
-  protected Log getLog() { return LOG; }
-
-  /**
-   * Pass the given key and processed record reduce
-   */
-  static class ProcessContentsMapper extends TableMapper<ImmutableBytesWritable, Put> {
-
-    /**
-     * Pass the key, and reversed value to reduce
-     *
-     * @param key
-     * @param value
-     * @param context
-     * @throws IOException
-     */
-    @Override
-    public void map(ImmutableBytesWritable key, Result value,
-      Context context)
-    throws IOException, InterruptedException {
-      if (value.size() != 1) {
-        throw new IOException("There should only be one input column");
-      }
-      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
-        cf = value.getMap();
-      if(!cf.containsKey(INPUT_FAMILY)) {
-        throw new IOException("Wrong input columns. Missing: '" +
-          Bytes.toString(INPUT_FAMILY) + "'.");
-      }
-
-      // Get the original value and reverse it
-      String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
-      StringBuilder newValue = new StringBuilder(originalValue);
-      newValue.reverse();
-      // Now set the value to be collected
-      Put outval = new Put(key.get());
-      outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
-      context.write(key, outval);
-    }
-  }
-
-  @Override
-  protected void runTestOnTable(Table table) throws IOException {
-    Job job = null;
-    try {
-      LOG.info("Before map/reduce startup");
-      job = new Job(table.getConfiguration(), "process column contents");
-      job.setNumReduceTasks(1);
-      Scan scan = new Scan();
-      scan.addFamily(INPUT_FAMILY);
-      TableMapReduceUtil.initTableMapperJob(
-        table.getName().getNameAsString(), scan,
-        ProcessContentsMapper.class, ImmutableBytesWritable.class,
-        Put.class, job);
-      TableMapReduceUtil.initTableReducerJob(
-          table.getName().getNameAsString(),
-        IdentityTableReducer.class, job);
-      FileOutputFormat.setOutputPath(job, new Path("test"));
-      LOG.info("Started " + table.getName().getNameAsString());
-      assertTrue(job.waitForCompletion(true));
-      LOG.info("After map/reduce completion");
-
-      // verify map-reduce results
-      verify(table.getName());
-
-      verifyJobCountersAreEmitted(job);
-    } catch (InterruptedException e) {
-      throw new IOException(e);
-    } catch (ClassNotFoundException e) {
-      throw new IOException(e);
-    } finally {
-      table.close();
-      if (job != null) {
-        FileUtil.fullyDelete(
-          new File(job.getConfiguration().get("hadoop.tmp.dir")));
-      }
-    }
-  }
-
-  /**
-   * Verify scan counters are emitted from the job
-   * @param job
-   * @throws IOException
-   */
-  private void verifyJobCountersAreEmitted(Job job) throws IOException {
-    Counters counters = job.getCounters();
-    Counter counter
-      = counters.findCounter(TableRecordReaderImpl.HBASE_COUNTER_GROUP_NAME, "RPC_CALLS");
-    assertNotNull("Unable to find Job counter for HBase scan metrics, RPC_CALLS", counter);
-    assertTrue("Counter value for RPC_CALLS should be larger than 0", counter.getValue() > 0);
-  }
-
-  @Test(expected = TableNotEnabledException.class)
-  public void testWritingToDisabledTable() throws IOException {
-
-    try (Admin admin = UTIL.getConnection().getAdmin();
-      Table table = UTIL.getConnection().getTable(TABLE_FOR_NEGATIVE_TESTS)) {
-      admin.disableTable(table.getName());
-      runTestOnTable(table);
-      fail("Should not have reached here, should have thrown an exception");
-    }
-  }
-
-  @Test(expected = TableNotFoundException.class)
-  public void testWritingToNonExistentTable() throws IOException {
-
-    try (Table table = UTIL.getConnection().getTable(TableName.valueOf("table-does-not-exist"))) {
-      runTestOnTable(table);
-      fail("Should not have reached here, should have thrown an exception");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
deleted file mode 100644
index 27bf063..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
+++ /dev/null
@@ -1,233 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-/**
- * A base class for a test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of a particular cell,
- * and write it back to the table. Implements common components between mapred and mapreduce
- * implementations.
- */
-public abstract class TestTableMapReduceBase {
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-  protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  protected static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
-  protected static final TableName TABLE_FOR_NEGATIVE_TESTS = TableName.valueOf("testfailuretable");
-  protected static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
-  protected static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
-
-  protected static final byte[][] columns = new byte[][] {
-    INPUT_FAMILY,
-    OUTPUT_FAMILY
-  };
-
-  /**
-   * Retrieve my logger instance.
-   */
-  protected abstract Log getLog();
-
-  /**
-   * Handles API-specifics for setting up and executing the job.
-   */
-  protected abstract void runTestOnTable(Table table) throws IOException;
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-    Table table =
-        UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
-            OUTPUT_FAMILY });
-    UTIL.loadTable(table, INPUT_FAMILY, false);
-    UTIL.createTable(TABLE_FOR_NEGATIVE_TESTS, new byte[][] { INPUT_FAMILY, OUTPUT_FAMILY });
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.deleteTable(TABLE_FOR_NEGATIVE_TESTS);
-    UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Test a map/reduce against a multi-region table
-   * @throws IOException
-   */
-  @Test
-  public void testMultiRegionTable() throws IOException {
-    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
-  }
-
-  @Test
-  public void testCombiner() throws IOException {
-    Configuration conf = new Configuration(UTIL.getConfiguration());
-    // force use of combiner for testing purposes
-    conf.setInt("mapreduce.map.combine.minspills", 1);
-    runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
-  }
-
-  /**
-   * Implements mapper logic for use across APIs.
-   */
-  protected static Put map(ImmutableBytesWritable key, Result value) throws IOException {
-    if (value.size() != 1) {
-      throw new IOException("There should only be one input column");
-    }
-    Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
-      cf = value.getMap();
-    if(!cf.containsKey(INPUT_FAMILY)) {
-      throw new IOException("Wrong input columns. Missing: '" +
-        Bytes.toString(INPUT_FAMILY) + "'.");
-    }
-
-    // Get the original value and reverse it
-
-    String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
-    StringBuilder newValue = new StringBuilder(originalValue);
-    newValue.reverse();
-
-    // Now set the value to be collected
-
-    Put outval = new Put(key.get());
-    outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
-    return outval;
-  }
-
-  protected void verify(TableName tableName) throws IOException {
-    Table table = UTIL.getConnection().getTable(tableName);
-    boolean verified = false;
-    long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        getLog().info("Verification attempt #" + i);
-        verifyAttempt(table);
-        verified = true;
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty. Presume its because updates came in
-        // after the scanner had been opened. Wait a while and retry.
-        getLog().debug("Verification attempt failed: " + e.getMessage());
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    assertTrue(verified);
-  }
-
-  /**
-   * Looks at every value of the mapreduce output and verifies that indeed
-   * the values have been reversed.
-   * @param table Table to scan.
-   * @throws IOException
-   * @throws NullPointerException if we failed to find a cell value
-   */
-  private void verifyAttempt(final Table table) throws IOException, NullPointerException {
-    Scan scan = new Scan();
-    TableInputFormat.addColumns(scan, columns);
-    ResultScanner scanner = table.getScanner(scan);
-    try {
-      Iterator<Result> itr = scanner.iterator();
-      assertTrue(itr.hasNext());
-      while(itr.hasNext()) {
-        Result r = itr.next();
-        if (getLog().isDebugEnabled()) {
-          if (r.size() > 2 ) {
-            throw new IOException("Too many results, expected 2 got " +
-              r.size());
-          }
-        }
-        byte[] firstValue = null;
-        byte[] secondValue = null;
-        int count = 0;
-         for(Cell kv : r.listCells()) {
-          if (count == 0) {
-            firstValue = CellUtil.cloneValue(kv);
-          }
-          if (count == 1) {
-            secondValue = CellUtil.cloneValue(kv);
-          }
-          count++;
-          if (count == 2) {
-            break;
-          }
-        }
-
-
-        if (firstValue == null) {
-          throw new NullPointerException(Bytes.toString(r.getRow()) +
-            ": first value is null");
-        }
-        String first = Bytes.toString(firstValue);
-
-        if (secondValue == null) {
-          throw new NullPointerException(Bytes.toString(r.getRow()) +
-            ": second value is null");
-        }
-        byte[] secondReversed = new byte[secondValue.length];
-        for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
-          secondReversed[i] = secondValue[j];
-        }
-        String second = Bytes.toString(secondReversed);
-
-        if (first.compareTo(second) != 0) {
-          if (getLog().isDebugEnabled()) {
-            getLog().debug("second key is not the reverse of first. row=" +
-                Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
-                ", second value=" + second);
-          }
-          fail();
-        }
-      }
-    } finally {
-      scanner.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
deleted file mode 100644
index 303a144..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test different variants of initTableMapperJob method
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestTableMapReduceUtil {
-
-  /*
-   * initTableSnapshotMapperJob is tested in {@link TestTableSnapshotInputFormat} because
-   * the method depends on an online cluster.
-   */
-
-  @Test
-  public void testInitTableMapperJob1() throws Exception {
-    Configuration configuration = new Configuration();
-    Job job = new Job(configuration, "tableName");
-    // test 
-    TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
-        Text.class, job, false, WALInputFormat.class);
-    assertEquals(WALInputFormat.class, job.getInputFormatClass());
-    assertEquals(Import.Importer.class, job.getMapperClass());
-    assertEquals(LongWritable.class, job.getOutputKeyClass());
-    assertEquals(Text.class, job.getOutputValueClass());
-    assertNull(job.getCombinerClass());
-    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
-  }
-
-  @Test
-  public void testInitTableMapperJob2() throws Exception {
-    Configuration configuration = new Configuration();
-    Job job = new Job(configuration, "tableName");
-    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
-        Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
-    assertEquals(WALInputFormat.class, job.getInputFormatClass());
-    assertEquals(Import.Importer.class, job.getMapperClass());
-    assertEquals(LongWritable.class, job.getOutputKeyClass());
-    assertEquals(Text.class, job.getOutputValueClass());
-    assertNull(job.getCombinerClass());
-    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
-  }
-
-  @Test
-  public void testInitTableMapperJob3() throws Exception {
-    Configuration configuration = new Configuration();
-    Job job = new Job(configuration, "tableName");
-    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
-        Import.Importer.class, Text.class, Text.class, job);
-    assertEquals(TableInputFormat.class, job.getInputFormatClass());
-    assertEquals(Import.Importer.class, job.getMapperClass());
-    assertEquals(LongWritable.class, job.getOutputKeyClass());
-    assertEquals(Text.class, job.getOutputValueClass());
-    assertNull(job.getCombinerClass());
-    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
-  }
-
-  @Test
-  public void testInitTableMapperJob4() throws Exception {
-    Configuration configuration = new Configuration();
-    Job job = new Job(configuration, "tableName");
-    TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
-        Import.Importer.class, Text.class, Text.class, job, false);
-    assertEquals(TableInputFormat.class, job.getInputFormatClass());
-    assertEquals(Import.Importer.class, job.getMapperClass());
-    assertEquals(LongWritable.class, job.getOutputKeyClass());
-    assertEquals(Text.class, job.getOutputValueClass());
-    assertNull(job.getCombinerClass());
-    assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
deleted file mode 100644
index 5e63082..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-import java.util.Arrays;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.util.FSUtils;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
-  private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
-  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
-      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-
-  private static final byte[] bbb = Bytes.toBytes("bbb");
-  private static final byte[] yyy = Bytes.toBytes("yyy");
-
-  @Rule
-  public TestName name = new TestName();
-
-  @Override
-  protected byte[] getStartRow() {
-    return bbb;
-  }
-
-  @Override
-  protected byte[] getEndRow() {
-    return yyy;
-  }
-
-  @After
-  public void tearDown() throws Exception {
-  }
-
-  @Test
-  public void testGetBestLocations() throws IOException {
-    TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl();
-    Configuration conf = UTIL.getConfiguration();
-
-    HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
-    Assert.assertEquals(Lists.newArrayList(),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
-    Assert.assertEquals(Lists.newArrayList("h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
-    Assert.assertEquals(Lists.newArrayList("h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
-    Assert.assertEquals(Lists.newArrayList("h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution = new HDFSBlocksDistribution();
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
-    Assert.assertEquals(Lists.newArrayList("h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
-    Assert.assertEquals(Lists.newArrayList("h1", "h2"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
-    Assert.assertEquals(Lists.newArrayList("h2", "h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
-    blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
-
-    Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
-      TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-  }
-
-  public static enum TestTableSnapshotCounters {
-    VALIDATION_ERROR
-  }
-
-  public static class TestTableSnapshotMapper
-    extends TableMapper<ImmutableBytesWritable, NullWritable> {
-    @Override
-    protected void map(ImmutableBytesWritable key, Result value,
-        Context context) throws IOException, InterruptedException {
-      // Validate a single row coming from the snapshot, and emit the row key
-      verifyRowFromMap(key, value);
-      context.write(key, NullWritable.get());
-    }
-  }
-
-  public static class TestTableSnapshotReducer
-    extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
-    HBaseTestingUtility.SeenRowTracker rowTracker =
-        new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
-    @Override
-    protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
-       Context context) throws IOException, InterruptedException {
-      rowTracker.addRow(key.get());
-    }
-
-    @Override
-    protected void cleanup(Context context) throws IOException,
-        InterruptedException {
-      rowTracker.validate();
-    }
-  }
-
-  @Test
-  public void testInitTableSnapshotMapperJobConfig() throws Exception {
-    setupCluster();
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    String snapshotName = "foo";
-
-    try {
-      createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
-      Job job = new Job(UTIL.getConfiguration());
-      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
-      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
-        new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-        NullWritable.class, job, false, tmpTableDir);
-
-      // TODO: would be better to examine directly the cache instance that results from this
-      // config. Currently this is not possible because BlockCache initialization is static.
-      Assert.assertEquals(
-        "Snapshot job should be configured for default LruBlockCache.",
-        HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
-        job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
-      Assert.assertEquals(
-        "Snapshot job should not use BucketCache.",
-        0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
-    } finally {
-      UTIL.getAdmin().deleteSnapshot(snapshotName);
-      UTIL.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  @Override
-  public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
-      String snapshotName, Path tmpTableDir) throws Exception {
-    Job job = new Job(UTIL.getConfiguration());
-    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
-      new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-      NullWritable.class, job, false, tmpTableDir);
-  }
-
-  @Override
-  public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
-      int numRegions, int expectedNumSplits) throws Exception {
-    setupCluster();
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    try {
-      createTableAndSnapshot(
-        util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
-
-      Job job = new Job(util.getConfiguration());
-      Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
-      Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
-
-      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
-          scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-          NullWritable.class, job, false, tmpTableDir);
-
-      verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
-
-    } finally {
-      util.getAdmin().deleteSnapshot(snapshotName);
-      util.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
-      int expectedRegionSize) throws Exception {
-    for (int i = 0; i < 100; i++) {
-      List<HRegionInfo> hRegionInfoList = util.getAdmin().getTableRegions(tableName);
-      if (hRegionInfoList.size() >= expectedRegionSize) {
-        break;
-      }
-      Thread.sleep(1000);
-    }
-  }
-
-  @Test
-  public void testNoDuplicateResultsWhenSplitting() throws Exception {
-    setupCluster();
-    TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
-    String snapshotName = "testSnapshotBug";
-    try {
-      if (UTIL.getAdmin().tableExists(tableName)) {
-        UTIL.deleteTable(tableName);
-      }
-
-      UTIL.createTable(tableName, FAMILIES);
-      Admin admin = UTIL.getAdmin();
-
-      // put some stuff in the table
-      Table table = UTIL.getConnection().getTable(tableName);
-      UTIL.loadTable(table, FAMILIES);
-
-      // split to 2 regions
-      admin.split(tableName, Bytes.toBytes("eee"));
-      blockUntilSplitFinished(UTIL, tableName, 2);
-
-      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
-      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
-
-      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
-        null, snapshotName, rootDir, fs, true);
-
-      // load different values
-      byte[] value = Bytes.toBytes("after_snapshot_value");
-      UTIL.loadTable(table, FAMILIES, value);
-
-      // cause flush to create new files in the region
-      admin.flush(tableName);
-      table.close();
-
-      Job job = new Job(UTIL.getConfiguration());
-      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-      // limit the scan
-      Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
-
-      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
-        TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
-        tmpTableDir);
-
-      verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
-    } finally {
-      UTIL.getAdmin().deleteSnapshot(snapshotName);
-      UTIL.deleteTable(tableName);
-      tearDownCluster();
-    }
-  }
-
-  private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
-      byte[] startRow, byte[] stopRow)
-      throws IOException, InterruptedException {
-    TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
-    List<InputSplit> splits = tsif.getSplits(job);
-
-    Assert.assertEquals(expectedNumSplits, splits.size());
-
-    HBaseTestingUtility.SeenRowTracker rowTracker =
-        new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
-
-    for (int i = 0; i < splits.size(); i++) {
-      // validate input split
-      InputSplit split = splits.get(i);
-      Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
-
-      // validate record reader
-      TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
-      when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
-      RecordReader<ImmutableBytesWritable, Result> rr =
-          tsif.createRecordReader(split, taskAttemptContext);
-      rr.initialize(split, taskAttemptContext);
-
-      // validate we can read all the data back
-      while (rr.nextKeyValue()) {
-        byte[] row = rr.getCurrentKey().get();
-        verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
-        rowTracker.addRow(row);
-      }
-
-      rr.close();
-    }
-
-    // validate all rows are seen
-    rowTracker.validate();
-  }
-
-  @Override
-  protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
-      String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
-      boolean shutdownCluster) throws Exception {
-    doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
-      numRegions, expectedNumSplits, shutdownCluster);
-  }
-
-  // this is also called by the IntegrationTestTableSnapshotInputFormat
-  public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
-      String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
-      int expectedNumSplits, boolean shutdownCluster) throws Exception {
-
-    LOG.info("testing with MapReduce");
-
-    LOG.info("create the table and snapshot");
-    createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
-
-    if (shutdownCluster) {
-      LOG.info("shutting down hbase cluster.");
-      util.shutdownMiniHBaseCluster();
-    }
-
-    try {
-      // create the job
-      Job job = new Job(util.getConfiguration());
-      Scan scan = new Scan(startRow, endRow); // limit the scan
-
-      job.setJarByClass(util.getClass());
-      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
-        TestTableSnapshotInputFormat.class);
-
-      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
-        scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
-        NullWritable.class, job, true, tableDir);
-
-      job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
-      job.setNumReduceTasks(1);
-      job.setOutputFormatClass(NullOutputFormat.class);
-
-      Assert.assertTrue(job.waitForCompletion(true));
-    } finally {
-      if (!shutdownCluster) {
-        util.getAdmin().deleteSnapshot(snapshotName);
-        util.deleteTable(tableName);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
deleted file mode 100644
index 4382c9c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.util.HashSet;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestTableSplit {
-  @Rule
-  public TestName name = new TestName();
-
-  @Test
-  public void testHashCode() {
-    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
-        "row-start".getBytes(),
-        "row-end".getBytes(), "location");
-    TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
-        "row-start".getBytes(),
-        "row-end".getBytes(), "location");
-    assertEquals (split1, split2);
-    assertTrue   (split1.hashCode() == split2.hashCode());
-    HashSet<TableSplit> set = new HashSet<>(2);
-    set.add(split1);
-    set.add(split2);
-    assertTrue(set.size() == 1);
-  }
-
-  /**
-   * length of region should not influence hashcode
-   * */
-  @Test
-  public void testHashCode_length() {
-    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
-            "row-start".getBytes(),
-            "row-end".getBytes(), "location", 1984);
-    TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
-            "row-start".getBytes(),
-            "row-end".getBytes(), "location", 1982);
-
-    assertEquals (split1, split2);
-    assertTrue   (split1.hashCode() == split2.hashCode());
-    HashSet<TableSplit> set = new HashSet<>(2);
-    set.add(split1);
-    set.add(split2);
-    assertTrue(set.size() == 1);
-  }
-
-  /**
-   * Length of region need to be properly serialized.
-   * */
-  @Test
-  public void testLengthIsSerialized() throws Exception {
-    TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
-            "row-start".getBytes(),
-            "row-end".getBytes(), "location", 666);
-
-    TableSplit deserialized = new TableSplit(TableName.valueOf(name.getMethodName()),
-            "row-start2".getBytes(),
-            "row-end2".getBytes(), "location1");
-    ReflectionUtils.copy(new Configuration(), split1, deserialized);
-
-    Assert.assertEquals(666, deserialized.getLength());
-  }
-
-  @Test
-  public void testToString() {
-    TableSplit split =
-        new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
-            "location");
-    String str =
-        "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
-            + "end row: row-end, region location: location, "
-            + "encoded region name: )";
-    Assert.assertEquals(str, split.toString());
-
-    split =
-        new TableSplit(TableName.valueOf(name.getMethodName()), null, "row-start".getBytes(),
-            "row-end".getBytes(), "location", "encoded-region-name", 1000L);
-    str =
-        "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
-            + "end row: row-end, region location: location, "
-            + "encoded region name: encoded-region-name)";
-    Assert.assertEquals(str, split.toString());
-
-    split = new TableSplit((TableName) null, null, null, null);
-    str =
-        "HBase table split(table name: null, scan: , start row: null, "
-            + "end row: null, region location: null, "
-            + "encoded region name: )";
-    Assert.assertEquals(str, split.toString());
-
-    split = new TableSplit((TableName) null, null, null, null, null, null, 1000L);
-    str =
-        "HBase table split(table name: null, scan: , start row: null, "
-            + "end row: null, region location: null, "
-            + "encoded region name: null)";
-    Assert.assertEquals(str, split.toString());
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
deleted file mode 100644
index 6796c94..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTimeRangeMapRed {
-  private final static Log log = LogFactory.getLog(TestTimeRangeMapRed.class);
-  private static final HBaseTestingUtility UTIL =
-    new HBaseTestingUtility();
-  private Admin admin;
-
-  private static final byte [] KEY = Bytes.toBytes("row1");
-  private static final NavigableMap<Long, Boolean> TIMESTAMP = new TreeMap<>();
-  static {
-    TIMESTAMP.put((long)1245620000, false);
-    TIMESTAMP.put((long)1245620005, true); // include
-    TIMESTAMP.put((long)1245620010, true); // include
-    TIMESTAMP.put((long)1245620055, true); // include
-    TIMESTAMP.put((long)1245620100, true); // include
-    TIMESTAMP.put((long)1245620150, false);
-    TIMESTAMP.put((long)1245620250, false);
-  }
-  static final long MINSTAMP = 1245620005;
-  static final long MAXSTAMP = 1245620100 + 1; // maxStamp itself is excluded. so increment it.
-
-  static final TableName TABLE_NAME = TableName.valueOf("table123");
-  static final byte[] FAMILY_NAME = Bytes.toBytes("text");
-  static final byte[] COLUMN_NAME = Bytes.toBytes("input");
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void before() throws Exception {
-    this.admin = UTIL.getAdmin();
-  }
-
-  private static class ProcessTimeRangeMapper
-  extends TableMapper<ImmutableBytesWritable, MapWritable>
-  implements Configurable {
-
-    private Configuration conf = null;
-    private Table table = null;
-
-    @Override
-    public void map(ImmutableBytesWritable key, Result result,
-        Context context)
-    throws IOException {
-      List<Long> tsList = new ArrayList<>();
-      for (Cell kv : result.listCells()) {
-        tsList.add(kv.getTimestamp());
-      }
-
-      List<Put> puts = new ArrayList<>();
-      for (Long ts : tsList) {
-        Put put = new Put(key.get());
-        put.setDurability(Durability.SKIP_WAL);
-        put.addColumn(FAMILY_NAME, COLUMN_NAME, ts, Bytes.toBytes(true));
-        puts.add(put);
-      }
-      table.put(puts);
-    }
-
-    @Override
-    public Configuration getConf() {
-      return conf;
-    }
-
-    @Override
-    public void setConf(Configuration configuration) {
-      this.conf = configuration;
-      try {
-        Connection connection = ConnectionFactory.createConnection(conf);
-        table = connection.getTable(TABLE_NAME);
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-  }
-
-  @Test
-  public void testTimeRangeMapRed()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    final HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
-    final HColumnDescriptor col = new HColumnDescriptor(FAMILY_NAME);
-    col.setMaxVersions(Integer.MAX_VALUE);
-    desc.addFamily(col);
-    admin.createTable(desc);
-    List<Put> puts = new ArrayList<>();
-    for (Map.Entry<Long, Boolean> entry : TIMESTAMP.entrySet()) {
-      Put put = new Put(KEY);
-      put.setDurability(Durability.SKIP_WAL);
-      put.addColumn(FAMILY_NAME, COLUMN_NAME, entry.getKey(), Bytes.toBytes(false));
-      puts.add(put);
-    }
-    Table table = UTIL.getConnection().getTable(desc.getTableName());
-    table.put(puts);
-    runTestOnTable();
-    verify(table);
-    table.close();
-  }
-
-  private void runTestOnTable()
-  throws IOException, InterruptedException, ClassNotFoundException {
-    Job job = null;
-    try {
-      job = new Job(UTIL.getConfiguration(), "test123");
-      job.setOutputFormatClass(NullOutputFormat.class);
-      job.setNumReduceTasks(0);
-      Scan scan = new Scan();
-      scan.addColumn(FAMILY_NAME, COLUMN_NAME);
-      scan.setTimeRange(MINSTAMP, MAXSTAMP);
-      scan.setMaxVersions();
-      TableMapReduceUtil.initTableMapperJob(TABLE_NAME,
-        scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job);
-      job.waitForCompletion(true);
-    } catch (IOException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } finally {
-      if (job != null) {
-        FileUtil.fullyDelete(
-          new File(job.getConfiguration().get("hadoop.tmp.dir")));
-      }
-    }
-  }
-
-  private void verify(final Table table) throws IOException {
-    Scan scan = new Scan();
-    scan.addColumn(FAMILY_NAME, COLUMN_NAME);
-    scan.setMaxVersions(1);
-    ResultScanner scanner = table.getScanner(scan);
-    for (Result r: scanner) {
-      for (Cell kv : r.listCells()) {
-        log.debug(Bytes.toString(r.getRow()) + "\t" + Bytes.toString(CellUtil.cloneFamily(kv))
-            + "\t" + Bytes.toString(CellUtil.cloneQualifier(kv))
-            + "\t" + kv.getTimestamp() + "\t" + Bytes.toBoolean(CellUtil.cloneValue(kv)));
-        org.junit.Assert.assertEquals(TIMESTAMP.get(kv.getTimestamp()),
-          Bytes.toBoolean(CellUtil.cloneValue(kv)));
-      }
-    }
-    scanner.close();
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
deleted file mode 100644
index 427c5cc..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * Basic test for the WALPlayer M/R tool
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestWALPlayer {
-  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  private static MiniHBaseCluster cluster;
-  private static Path rootDir;
-  private static Path walRootDir;
-  private static FileSystem fs;
-  private static FileSystem logFs;
-  private static Configuration conf;
-
-  @Rule
-  public TestName name = new TestName();
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    conf= TEST_UTIL.getConfiguration();
-    rootDir = TEST_UTIL.createRootDir();
-    walRootDir = TEST_UTIL.createWALRootDir();
-    fs = FSUtils.getRootDirFileSystem(conf);
-    logFs = FSUtils.getWALFileSystem(conf);
-    cluster = TEST_UTIL.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void afterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
-    fs.delete(rootDir, true);
-    logFs.delete(walRootDir, true);
-  }
-
-  /**
-   * Simple end-to-end test
-   * @throws Exception
-   */
-  @Test
-  public void testWALPlayer() throws Exception {
-    final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
-    final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
-    final byte[] FAMILY = Bytes.toBytes("family");
-    final byte[] COLUMN1 = Bytes.toBytes("c1");
-    final byte[] COLUMN2 = Bytes.toBytes("c2");
-    final byte[] ROW = Bytes.toBytes("row");
-    Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
-    Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
-
-    // put a row into the first table
-    Put p = new Put(ROW);
-    p.addColumn(FAMILY, COLUMN1, COLUMN1);
-    p.addColumn(FAMILY, COLUMN2, COLUMN2);
-    t1.put(p);
-    // delete one column
-    Delete d = new Delete(ROW);
-    d.addColumns(FAMILY, COLUMN1);
-    t1.delete(d);
-
-    // replay the WAL, map table 1 to table 2
-    WAL log = cluster.getRegionServer(0).getWAL(null);
-    log.rollWriter();
-    String walInputDir = new Path(cluster.getMaster().getMasterFileSystem()
-        .getWALRootDir(), HConstants.HREGION_LOGDIR_NAME).toString();
-
-    Configuration configuration= TEST_UTIL.getConfiguration();
-    WALPlayer player = new WALPlayer(configuration);
-    String optionName="_test_.name";
-    configuration.set(optionName, "1000");
-    player.setupTime(configuration, optionName);
-    assertEquals(1000,configuration.getLong(optionName,0));
-    assertEquals(0, ToolRunner.run(configuration, player,
-        new String[] {walInputDir, tableName1.getNameAsString(),
-        tableName2.getNameAsString() }));
-
-
-    // verify the WAL was player into table 2
-    Get g = new Get(ROW);
-    Result r = t2.get(g);
-    assertEquals(1, r.size());
-    assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN2));
-  }
-
-  /**
-   * Test WALKeyValueMapper setup and map
-   */
-  @Test
-  public void testWALKeyValueMapper() throws Exception {
-    testWALKeyValueMapper(WALPlayer.TABLES_KEY);
-  }
-
-  @Test
-  public void testWALKeyValueMapperWithDeprecatedConfig() throws Exception {
-    testWALKeyValueMapper("hlog.input.tables");
-  }
-
-  private void testWALKeyValueMapper(final String tableConfigKey) throws Exception {
-    Configuration configuration = new Configuration();
-    configuration.set(tableConfigKey, "table");
-    WALKeyValueMapper mapper = new WALKeyValueMapper();
-    WALKey key = mock(WALKey.class);
-    when(key.getTablename()).thenReturn(TableName.valueOf("table"));
-    @SuppressWarnings("unchecked")
-    Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue>.Context context = mock(Context.class);
-    when(context.getConfiguration()).thenReturn(configuration);
-
-    WALEdit value = mock(WALEdit.class);
-    ArrayList<Cell> values = new ArrayList<>();
-    KeyValue kv1 = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), null);
-
-    values.add(kv1);
-    when(value.getCells()).thenReturn(values);
-    mapper.setup(context);
-
-    doAnswer(new Answer<Void>() {
-
-      @Override
-      public Void answer(InvocationOnMock invocation) throws Throwable {
-        ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
-        KeyValue key = (KeyValue) invocation.getArguments()[1];
-        assertEquals("row", Bytes.toString(writer.get()));
-        assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
-        return null;
-      }
-    }).when(context).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
-
-    mapper.map(key, value, context);
-
-  }
-
-  /**
-   * Test main method
-   */
-  @Test
-  public void testMainMethod() throws Exception {
-
-    PrintStream oldPrintStream = System.err;
-    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
-    LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
-    System.setSecurityManager(newSecurityManager);
-    ByteArrayOutputStream data = new ByteArrayOutputStream();
-    String[] args = {};
-    System.setErr(new PrintStream(data));
-    try {
-      System.setErr(new PrintStream(data));
-      try {
-        WALPlayer.main(args);
-        fail("should be SecurityException");
-      } catch (SecurityException e) {
-        assertEquals(-1, newSecurityManager.getExitCode());
-        assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
-        assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
-            " <tables> [<tableMappings>]"));
-        assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
-      }
-
-    } finally {
-      System.setErr(oldPrintStream);
-      System.setSecurityManager(SECURITY_MANAGER);
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
deleted file mode 100644
index 34725b4..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
+++ /dev/null
@@ -1,276 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.util.List;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-import java.util.concurrent.atomic.AtomicLong;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
-import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
-import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALFactory;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.MapReduceTestUtil;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * JUnit tests for the WALRecordReader
- */
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestWALRecordReader {
-  private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
-  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  private static Configuration conf;
-  private static FileSystem fs;
-  private static Path hbaseDir;
-  private static FileSystem walFs;
-  private static Path walRootDir;
-  // visible for TestHLogRecordReader
-  static final TableName tableName = TableName.valueOf(getName());
-  private static final byte [] rowName = tableName.getName();
-  // visible for TestHLogRecordReader
-  static final HRegionInfo info = new HRegionInfo(tableName,
-      Bytes.toBytes(""), Bytes.toBytes(""), false);
-  private static final byte [] family = Bytes.toBytes("column");
-  private static final byte [] value = Bytes.toBytes("value");
-  private static HTableDescriptor htd;
-  private static Path logDir;
-  protected MultiVersionConcurrencyControl mvcc;
-  protected static NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-
-  private static String getName() {
-    return "TestWALRecordReader";
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    fs.delete(hbaseDir, true);
-    walFs.delete(walRootDir, true);
-    mvcc = new MultiVersionConcurrencyControl();
-  }
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    // Make block sizes small.
-    conf = TEST_UTIL.getConfiguration();
-    conf.setInt("dfs.blocksize", 1024 * 1024);
-    conf.setInt("dfs.replication", 1);
-    TEST_UTIL.startMiniDFSCluster(1);
-
-    conf = TEST_UTIL.getConfiguration();
-    fs = TEST_UTIL.getDFSCluster().getFileSystem();
-
-    hbaseDir = TEST_UTIL.createRootDir();
-    walRootDir = TEST_UTIL.createWALRootDir();
-    walFs = FSUtils.getWALFileSystem(conf);
-    logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
-
-    htd = new HTableDescriptor(tableName);
-    htd.addFamily(new HColumnDescriptor(family));
-  }
-
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    fs.delete(hbaseDir, true);
-    walFs.delete(walRootDir, true);
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  /**
-   * Test partial reads from the log based on passed time range
-   * @throws Exception
-   */
-  @Test
-  public void testPartialRead() throws Exception {
-    final WALFactory walfactory = new WALFactory(conf, null, getName());
-    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
-    // This test depends on timestamp being millisecond based and the filename of the WAL also
-    // being millisecond based.
-    long ts = System.currentTimeMillis();
-    WALEdit edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
-    log.append(info, getWalKey(ts, scopes), edit, true);
-    edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
-    log.append(info, getWalKey(ts+1, scopes), edit, true);
-    log.sync();
-    LOG.info("Before 1st WAL roll " + log.toString());
-    log.rollWriter();
-    LOG.info("Past 1st WAL roll " + log.toString());
-
-    Thread.sleep(1);
-    long ts1 = System.currentTimeMillis();
-
-    edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
-    log.append(info, getWalKey(ts1+1, scopes), edit, true);
-    edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
-    log.append(info, getWalKey(ts1+2, scopes), edit, true);
-    log.sync();
-    log.shutdown();
-    walfactory.shutdown();
-    LOG.info("Closed WAL " + log.toString());
-
-
-    WALInputFormat input = new WALInputFormat();
-    Configuration jobConf = new Configuration(conf);
-    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
-    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
-
-    // only 1st file is considered, and only its 1st entry is used
-    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-
-    assertEquals(1, splits.size());
-    testSplit(splits.get(0), Bytes.toBytes("1"));
-
-    jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
-    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
-    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-    // both files need to be considered
-    assertEquals(2, splits.size());
-    // only the 2nd entry from the 1st file is used
-    testSplit(splits.get(0), Bytes.toBytes("2"));
-    // only the 1nd entry from the 2nd file is used
-    testSplit(splits.get(1), Bytes.toBytes("3"));
-  }
-
-  /**
-   * Test basic functionality
-   * @throws Exception
-   */
-  @Test
-  public void testWALRecordReader() throws Exception {
-    final WALFactory walfactory = new WALFactory(conf, null, getName());
-    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
-    byte [] value = Bytes.toBytes("value");
-    final AtomicLong sequenceId = new AtomicLong(0);
-    WALEdit edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
-        System.currentTimeMillis(), value));
-    long txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
-    log.sync(txid);
-
-    Thread.sleep(1); // make sure 2nd log gets a later timestamp
-    long secondTs = System.currentTimeMillis();
-    log.rollWriter();
-
-    edit = new WALEdit();
-    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
-        System.currentTimeMillis(), value));
-    txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
-    log.sync(txid);
-    log.shutdown();
-    walfactory.shutdown();
-    long thirdTs = System.currentTimeMillis();
-
-    // should have 2 log files now
-    WALInputFormat input = new WALInputFormat();
-    Configuration jobConf = new Configuration(conf);
-    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
-
-    // make sure both logs are found
-    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-    assertEquals(2, splits.size());
-
-    // should return exactly one KV
-    testSplit(splits.get(0), Bytes.toBytes("1"));
-    // same for the 2nd split
-    testSplit(splits.get(1), Bytes.toBytes("2"));
-
-    // now test basic time ranges:
-
-    // set an endtime, the 2nd log file can be ignored completely.
-    jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
-    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-    assertEquals(1, splits.size());
-    testSplit(splits.get(0), Bytes.toBytes("1"));
-
-    // now set a start time
-    jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
-    jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
-    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-    // both logs need to be considered
-    assertEquals(2, splits.size());
-    // but both readers skip all edits
-    testSplit(splits.get(0));
-    testSplit(splits.get(1));
-  }
-
-  protected WALKey getWalKey(final long time, NavigableMap<byte[], Integer> scopes) {
-    return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc, scopes);
-  }
-
-  protected WALRecordReader getReader() {
-    return new WALKeyRecordReader();
-  }
-
-  /**
-   * Create a new reader from the split, and match the edits against the passed columns.
-   */
-  private void testSplit(InputSplit split, byte[]... columns) throws Exception {
-    final WALRecordReader reader = getReader();
-    reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
-
-    for (byte[] column : columns) {
-      assertTrue(reader.nextKeyValue());
-      Cell cell = reader.getCurrentValue().getCells().get(0);
-      if (!Bytes.equals(column, 0, column.length, cell.getQualifierArray(),
-        cell.getQualifierOffset(), cell.getQualifierLength())) {
-        assertTrue(
-          "expected ["
-              + Bytes.toString(column)
-              + "], actual ["
-              + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
-                cell.getQualifierLength()) + "]", false);
-      }
-    }
-    assertFalse(reader.nextKeyValue());
-    reader.close();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
deleted file mode 100644
index aea5036..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.KeyValue;
-
-import java.io.IOException;
-
-/**
- * Dummy mapper used for unit tests to verify that the mapper can be injected.
- * This approach would be used if a custom transformation needed to be done after
- * reading the input data before writing it to HFiles.
- */
-public class TsvImporterCustomTestMapper extends TsvImporterMapper {
-
-  @Override
-  protected void setup(Context context) {
-    doSetup(context);
-  }
-
-  /**
-   * Convert a line of TSV text into an HBase table row after transforming the
-   * values by multiplying them by 3.
-   */
-  @Override
-  public void map(LongWritable offset, Text value, Context context)
-        throws IOException {
-    byte[] family = Bytes.toBytes("FAM");
-    final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };
-
-    // do some basic line parsing
-    byte[] lineBytes = value.getBytes();
-    String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");
-
-    // create the rowKey and Put
-    ImmutableBytesWritable rowKey =
-      new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
-    Put put = new Put(rowKey.copyBytes());
-    put.setDurability(Durability.SKIP_WAL);
-
-    //The value should look like this: VALUE1 or VALUE2. Let's multiply
-    //the integer by 3
-    for(int i = 1; i < valueTokens.length; i++) {
-      String prefix = valueTokens[i].substring(0, "VALUE".length());
-      String suffix = valueTokens[i].substring("VALUE".length());
-      String newValue = prefix + Integer.parseInt(suffix) * 3;
-
-      KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
-          qualifiers[i-1], Bytes.toBytes(newValue));
-      put.add(kv);
-    }
-
-    try {
-      context.write(rowKey, put);
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    }
-  }
-}

[15/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
deleted file mode 100644
index c72a0c3..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
+++ /dev/null
@@ -1,786 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Collections;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterators;
-
-public class SyncTable extends Configured implements Tool {
-
-  private static final Log LOG = LogFactory.getLog(SyncTable.class);
-
-  static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
-  static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
-  static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
-  static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
-  static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
-  static final String DRY_RUN_CONF_KEY="sync.table.dry.run";
-
-  Path sourceHashDir;
-  String sourceTableName;
-  String targetTableName;
-
-  String sourceZkCluster;
-  String targetZkCluster;
-  boolean dryRun;
-
-  Counters counters;
-
-  public SyncTable(Configuration conf) {
-    super(conf);
-  }
-
-  public Job createSubmittableJob(String[] args) throws IOException {
-    FileSystem fs = sourceHashDir.getFileSystem(getConf());
-    if (!fs.exists(sourceHashDir)) {
-      throw new IOException("Source hash dir not found: " + sourceHashDir);
-    }
-
-    HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
-    LOG.info("Read source hash manifest: " + tableHash);
-    LOG.info("Read " + tableHash.partitions.size() + " partition keys");
-    if (!tableHash.tableName.equals(sourceTableName)) {
-      LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
-          + tableHash.tableName + " but job is reading from: " + sourceTableName);
-    }
-    if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
-      throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
-          + " should be 1 more than the number of partition keys.  However, the manifest file "
-          + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
-          + " found in the partitions file is " + tableHash.partitions.size());
-    }
-
-    Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
-    int dataSubdirCount = 0;
-    for (FileStatus file : fs.listStatus(dataDir)) {
-      if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
-        dataSubdirCount++;
-      }
-    }
-
-    if (dataSubdirCount != tableHash.numHashFiles) {
-      throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
-          + " should be 1 more than the number of partition keys.  However, the number of data dirs"
-          + " found is " + dataSubdirCount + " but the number of partition keys"
-          + " found in the partitions file is " + tableHash.partitions.size());
-    }
-
-    Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
-        "syncTable_" + sourceTableName + "-" + targetTableName));
-    Configuration jobConf = job.getConfiguration();
-    job.setJarByClass(HashTable.class);
-    jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
-    jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
-    jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
-    if (sourceZkCluster != null) {
-      jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
-    }
-    if (targetZkCluster != null) {
-      jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
-    }
-    jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
-
-    TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
-        SyncMapper.class, null, null, job);
-
-    job.setNumReduceTasks(0);
-
-    if (dryRun) {
-      job.setOutputFormatClass(NullOutputFormat.class);
-    } else {
-      // No reducers.  Just write straight to table.  Call initTableReducerJob
-      // because it sets up the TableOutputFormat.
-      TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
-          targetZkCluster, null, null);
-
-      // would be nice to add an option for bulk load instead
-    }
-
-    // Obtain an authentication token, for the specified cluster, on behalf of the current user
-    if (sourceZkCluster != null) {
-      Configuration peerConf =
-          HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
-      TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
-    }
-    return job;
-  }
-
-  public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
-    Path sourceHashDir;
-
-    Connection sourceConnection;
-    Connection targetConnection;
-    Table sourceTable;
-    Table targetTable;
-    boolean dryRun;
-
-    HashTable.TableHash sourceTableHash;
-    HashTable.TableHash.Reader sourceHashReader;
-    ImmutableBytesWritable currentSourceHash;
-    ImmutableBytesWritable nextSourceKey;
-    HashTable.ResultHasher targetHasher;
-
-    Throwable mapperException;
-
-    public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
-      SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
-      MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
-
-    @Override
-    protected void setup(Context context) throws IOException {
-
-      Configuration conf = context.getConfiguration();
-      sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
-      sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
-      targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
-          TableOutputFormat.OUTPUT_CONF_PREFIX);
-      sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
-      targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
-      dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false);
-
-      sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
-      LOG.info("Read source hash manifest: " + sourceTableHash);
-      LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
-
-      TableSplit split = (TableSplit) context.getInputSplit();
-      ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
-
-      sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
-      findNextKeyHashPair();
-
-      // create a hasher, but don't start it right away
-      // instead, find the first hash batch at or after the start row
-      // and skip any rows that come before.  they will be caught by the previous task
-      targetHasher = new HashTable.ResultHasher();
-    }
-
-    private static Connection openConnection(Configuration conf, String zkClusterConfKey,
-                                             String configPrefix)
-      throws IOException {
-        String zkCluster = conf.get(zkClusterConfKey);
-        Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
-            zkCluster, configPrefix);
-        return ConnectionFactory.createConnection(clusterConf);
-    }
-
-    private static Table openTable(Connection connection, Configuration conf,
-        String tableNameConfKey) throws IOException {
-      return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
-    }
-
-    /**
-     * Attempt to read the next source key/hash pair.
-     * If there are no more, set nextSourceKey to null
-     */
-    private void findNextKeyHashPair() throws IOException {
-      boolean hasNext = sourceHashReader.next();
-      if (hasNext) {
-        nextSourceKey = sourceHashReader.getCurrentKey();
-      } else {
-        // no more keys - last hash goes to the end
-        nextSourceKey = null;
-      }
-    }
-
-    @Override
-    protected void map(ImmutableBytesWritable key, Result value, Context context)
-        throws IOException, InterruptedException {
-      try {
-        // first, finish any hash batches that end before the scanned row
-        while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
-          moveToNextBatch(context);
-        }
-
-        // next, add the scanned row (as long as we've reached the first batch)
-        if (targetHasher.isBatchStarted()) {
-          targetHasher.hashResult(value);
-        }
-      } catch (Throwable t) {
-        mapperException = t;
-        Throwables.propagateIfInstanceOf(t, IOException.class);
-        Throwables.propagateIfInstanceOf(t, InterruptedException.class);
-        Throwables.propagate(t);
-      }
-    }
-
-    /**
-     * If there is an open hash batch, complete it and sync if there are diffs.
-     * Start a new batch, and seek to read the
-     */
-    private void moveToNextBatch(Context context) throws IOException, InterruptedException {
-      if (targetHasher.isBatchStarted()) {
-        finishBatchAndCompareHashes(context);
-      }
-      targetHasher.startBatch(nextSourceKey);
-      currentSourceHash = sourceHashReader.getCurrentHash();
-
-      findNextKeyHashPair();
-    }
-
-    /**
-     * Finish the currently open hash batch.
-     * Compare the target hash to the given source hash.
-     * If they do not match, then sync the covered key range.
-     */
-    private void finishBatchAndCompareHashes(Context context)
-        throws IOException, InterruptedException {
-      targetHasher.finishBatch();
-      context.getCounter(Counter.BATCHES).increment(1);
-      if (targetHasher.getBatchSize() == 0) {
-        context.getCounter(Counter.EMPTY_BATCHES).increment(1);
-      }
-      ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
-      if (targetHash.equals(currentSourceHash)) {
-        context.getCounter(Counter.HASHES_MATCHED).increment(1);
-      } else {
-        context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
-
-        ImmutableBytesWritable stopRow = nextSourceKey == null
-                                          ? new ImmutableBytesWritable(sourceTableHash.stopRow)
-                                          : nextSourceKey;
-
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Hash mismatch.  Key range: " + toHex(targetHasher.getBatchStartKey())
-              + " to " + toHex(stopRow)
-              + " sourceHash: " + toHex(currentSourceHash)
-              + " targetHash: " + toHex(targetHash));
-        }
-
-        syncRange(context, targetHasher.getBatchStartKey(), stopRow);
-      }
-    }
-    private static String toHex(ImmutableBytesWritable bytes) {
-      return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
-    }
-
-    private static final CellScanner EMPTY_CELL_SCANNER
-      = new CellScanner(Collections.<Result>emptyIterator());
-
-    /**
-     * Rescan the given range directly from the source and target tables.
-     * Count and log differences, and if this is not a dry run, output Puts and Deletes
-     * to make the target table match the source table for this range
-     */
-    private void syncRange(Context context, ImmutableBytesWritable startRow,
-        ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
-      Scan scan = sourceTableHash.initScan();
-      scan.setStartRow(startRow.copyBytes());
-      scan.setStopRow(stopRow.copyBytes());
-
-      ResultScanner sourceScanner = sourceTable.getScanner(scan);
-      CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
-
-      ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
-      CellScanner targetCells = new CellScanner(targetScanner.iterator());
-
-      boolean rangeMatched = true;
-      byte[] nextSourceRow = sourceCells.nextRow();
-      byte[] nextTargetRow = targetCells.nextRow();
-      while(nextSourceRow != null || nextTargetRow != null) {
-        boolean rowMatched;
-        int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
-        if (rowComparison < 0) {
-          if (LOG.isInfoEnabled()) {
-            LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
-          }
-          context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
-
-          rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
-          nextSourceRow = sourceCells.nextRow();  // advance only source to next row
-        } else if (rowComparison > 0) {
-          if (LOG.isInfoEnabled()) {
-            LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
-          }
-          context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
-
-          rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
-          nextTargetRow = targetCells.nextRow();  // advance only target to next row
-        } else {
-          // current row is the same on both sides, compare cell by cell
-          rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
-          nextSourceRow = sourceCells.nextRow();
-          nextTargetRow = targetCells.nextRow();
-        }
-
-        if (!rowMatched) {
-          rangeMatched = false;
-        }
-      }
-
-      sourceScanner.close();
-      targetScanner.close();
-
-      context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
-        .increment(1);
-    }
-
-    private static class CellScanner {
-      private final Iterator<Result> results;
-
-      private byte[] currentRow;
-      private Result currentRowResult;
-      private int nextCellInRow;
-
-      private Result nextRowResult;
-
-      public CellScanner(Iterator<Result> results) {
-        this.results = results;
-      }
-
-      /**
-       * Advance to the next row and return its row key.
-       * Returns null iff there are no more rows.
-       */
-      public byte[] nextRow() {
-        if (nextRowResult == null) {
-          // no cached row - check scanner for more
-          while (results.hasNext()) {
-            nextRowResult = results.next();
-            Cell nextCell = nextRowResult.rawCells()[0];
-            if (currentRow == null
-                || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
-                nextCell.getRowOffset(), nextCell.getRowLength())) {
-              // found next row
-              break;
-            } else {
-              // found another result from current row, keep scanning
-              nextRowResult = null;
-            }
-          }
-
-          if (nextRowResult == null) {
-            // end of data, no more rows
-            currentRowResult = null;
-            currentRow = null;
-            return null;
-          }
-        }
-
-        // advance to cached result for next row
-        currentRowResult = nextRowResult;
-        nextCellInRow = 0;
-        currentRow = currentRowResult.getRow();
-        nextRowResult = null;
-        return currentRow;
-      }
-
-      /**
-       * Returns the next Cell in the current row or null iff none remain.
-       */
-      public Cell nextCellInRow() {
-        if (currentRowResult == null) {
-          // nothing left in current row
-          return null;
-        }
-
-        Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
-        nextCellInRow++;
-        if (nextCellInRow == currentRowResult.size()) {
-          if (results.hasNext()) {
-            Result result = results.next();
-            Cell cell = result.rawCells()[0];
-            if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
-                cell.getRowOffset(), cell.getRowLength())) {
-              // result is part of current row
-              currentRowResult = result;
-              nextCellInRow = 0;
-            } else {
-              // result is part of next row, cache it
-              nextRowResult = result;
-              // current row is complete
-              currentRowResult = null;
-            }
-          } else {
-            // end of data
-            currentRowResult = null;
-          }
-        }
-        return nextCell;
-      }
-    }
-
-    /**
-     * Compare the cells for the given row from the source and target tables.
-     * Count and log any differences.
-     * If not a dry run, output a Put and/or Delete needed to sync the target table
-     * to match the source table.
-     */
-    private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
-        CellScanner targetCells) throws IOException, InterruptedException {
-      Put put = null;
-      Delete delete = null;
-      long matchingCells = 0;
-      boolean matchingRow = true;
-      Cell sourceCell = sourceCells.nextCellInRow();
-      Cell targetCell = targetCells.nextCellInRow();
-      while (sourceCell != null || targetCell != null) {
-
-        int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
-        if (cellKeyComparison < 0) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Target missing cell: " + sourceCell);
-          }
-          context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
-          matchingRow = false;
-
-          if (!dryRun) {
-            if (put == null) {
-              put = new Put(rowKey);
-            }
-            put.add(sourceCell);
-          }
-
-          sourceCell = sourceCells.nextCellInRow();
-        } else if (cellKeyComparison > 0) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Source missing cell: " + targetCell);
-          }
-          context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
-          matchingRow = false;
-
-          if (!dryRun) {
-            if (delete == null) {
-              delete = new Delete(rowKey);
-            }
-            // add a tombstone to exactly match the target cell that is missing on the source
-            delete.addColumn(CellUtil.cloneFamily(targetCell),
-                CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
-          }
-
-          targetCell = targetCells.nextCellInRow();
-        } else {
-          // the cell keys are equal, now check values
-          if (CellUtil.matchingValue(sourceCell, targetCell)) {
-            matchingCells++;
-          } else {
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("Different values: ");
-              LOG.debug("  source cell: " + sourceCell
-                  + " value: " + Bytes.toHex(sourceCell.getValueArray(),
-                      sourceCell.getValueOffset(), sourceCell.getValueLength()));
-              LOG.debug("  target cell: " + targetCell
-                  + " value: " + Bytes.toHex(targetCell.getValueArray(),
-                      targetCell.getValueOffset(), targetCell.getValueLength()));
-            }
-            context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
-            matchingRow = false;
-
-            if (!dryRun) {
-              // overwrite target cell
-              if (put == null) {
-                put = new Put(rowKey);
-              }
-              put.add(sourceCell);
-            }
-          }
-          sourceCell = sourceCells.nextCellInRow();
-          targetCell = targetCells.nextCellInRow();
-        }
-
-        if (!dryRun && sourceTableHash.scanBatch > 0) {
-          if (put != null && put.size() >= sourceTableHash.scanBatch) {
-            context.write(new ImmutableBytesWritable(rowKey), put);
-            put = null;
-          }
-          if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
-            context.write(new ImmutableBytesWritable(rowKey), delete);
-            delete = null;
-          }
-        }
-      }
-
-      if (!dryRun) {
-        if (put != null) {
-          context.write(new ImmutableBytesWritable(rowKey), put);
-        }
-        if (delete != null) {
-          context.write(new ImmutableBytesWritable(rowKey), delete);
-        }
-      }
-
-      if (matchingCells > 0) {
-        context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
-      }
-      if (matchingRow) {
-        context.getCounter(Counter.MATCHINGROWS).increment(1);
-        return true;
-      } else {
-        context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
-        return false;
-      }
-    }
-
-    /**
-     * Compare row keys of the given Result objects.
-     * Nulls are after non-nulls
-     */
-    private static int compareRowKeys(byte[] r1, byte[] r2) {
-      if (r1 == null) {
-        return 1;  // source missing row
-      } else if (r2 == null) {
-        return -1; // target missing row
-      } else {
-        // Sync on no META tables only. We can directly do what CellComparator is doing inside.
-        // Never the call going to MetaCellComparator.
-        return Bytes.compareTo(r1, 0, r1.length, r2, 0, r2.length);
-      }
-    }
-
-    /**
-     * Compare families, qualifiers, and timestamps of the given Cells.
-     * They are assumed to be of the same row.
-     * Nulls are after non-nulls.
-     */
-     private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
-      if (c1 == null) {
-        return 1; // source missing cell
-      }
-      if (c2 == null) {
-        return -1; // target missing cell
-      }
-
-      int result = CellComparator.compareFamilies(c1, c2);
-      if (result != 0) {
-        return result;
-      }
-
-      result = CellComparator.compareQualifiers(c1, c2);
-      if (result != 0) {
-        return result;
-      }
-
-      // note timestamp comparison is inverted - more recent cells first
-      return CellComparator.compareTimestamps(c1, c2);
-    }
-
-    @Override
-    protected void cleanup(Context context)
-        throws IOException, InterruptedException {
-      if (mapperException == null) {
-        try {
-          finishRemainingHashRanges(context);
-        } catch (Throwable t) {
-          mapperException = t;
-        }
-      }
-
-      try {
-        sourceTable.close();
-        targetTable.close();
-        sourceConnection.close();
-        targetConnection.close();
-      } catch (Throwable t) {
-        if (mapperException == null) {
-          mapperException = t;
-        } else {
-          LOG.error("Suppressing exception from closing tables", t);
-        }
-      }
-
-      // propagate first exception
-      if (mapperException != null) {
-        Throwables.propagateIfInstanceOf(mapperException, IOException.class);
-        Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
-        Throwables.propagate(mapperException);
-      }
-    }
-
-    private void finishRemainingHashRanges(Context context) throws IOException,
-        InterruptedException {
-      TableSplit split = (TableSplit) context.getInputSplit();
-      byte[] splitEndRow = split.getEndRow();
-      boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
-
-      // if there are more hash batches that begin before the end of this split move to them
-      while (nextSourceKey != null
-          && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
-        moveToNextBatch(context);
-      }
-
-      if (targetHasher.isBatchStarted()) {
-        // need to complete the final open hash batch
-
-        if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
-              || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
-          // the open hash range continues past the end of this region
-          // add a scan to complete the current hash range
-          Scan scan = sourceTableHash.initScan();
-          scan.setStartRow(splitEndRow);
-          if (nextSourceKey == null) {
-            scan.setStopRow(sourceTableHash.stopRow);
-          } else {
-            scan.setStopRow(nextSourceKey.copyBytes());
-          }
-
-          ResultScanner targetScanner = null;
-          try {
-            targetScanner = targetTable.getScanner(scan);
-            for (Result row : targetScanner) {
-              targetHasher.hashResult(row);
-            }
-          } finally {
-            if (targetScanner != null) {
-              targetScanner.close();
-            }
-          }
-        } // else current batch ends exactly at split end row
-
-        finishBatchAndCompareHashes(context);
-      }
-    }
-  }
-
-  private static final int NUM_ARGS = 3;
-  private static void printUsage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-      System.err.println();
-    }
-    System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
-    System.err.println();
-    System.err.println("Options:");
-
-    System.err.println(" sourcezkcluster  ZK cluster key of the source table");
-    System.err.println("                  (defaults to cluster in classpath's config)");
-    System.err.println(" targetzkcluster  ZK cluster key of the target table");
-    System.err.println("                  (defaults to cluster in classpath's config)");
-    System.err.println(" dryrun           if true, output counters but no writes");
-    System.err.println("                  (defaults to false)");
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" sourcehashdir    path to HashTable output dir for source table");
-    System.err.println("                  (see org.apache.hadoop.hbase.mapreduce.HashTable)");
-    System.err.println(" sourcetable      Name of the source table to sync from");
-    System.err.println(" targettable      Name of the target table to sync to");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
-    System.err.println(" to a local target cluster:");
-    System.err.println(" $ hbase " +
-        "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
-        + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
-        + " hdfs://nn:9000/hashes/tableA tableA tableA");
-  }
-
-  private boolean doCommandLine(final String[] args) {
-    if (args.length < NUM_ARGS) {
-      printUsage(null);
-      return false;
-    }
-    try {
-      sourceHashDir = new Path(args[args.length - 3]);
-      sourceTableName = args[args.length - 2];
-      targetTableName = args[args.length - 1];
-
-      for (int i = 0; i < args.length - NUM_ARGS; i++) {
-        String cmd = args[i];
-        if (cmd.equals("-h") || cmd.startsWith("--h")) {
-          printUsage(null);
-          return false;
-        }
-
-        final String sourceZkClusterKey = "--sourcezkcluster=";
-        if (cmd.startsWith(sourceZkClusterKey)) {
-          sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
-          continue;
-        }
-
-        final String targetZkClusterKey = "--targetzkcluster=";
-        if (cmd.startsWith(targetZkClusterKey)) {
-          targetZkCluster = cmd.substring(targetZkClusterKey.length());
-          continue;
-        }
-
-        final String dryRunKey = "--dryrun=";
-        if (cmd.startsWith(dryRunKey)) {
-          dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
-          continue;
-        }
-
-        printUsage("Invalid argument '" + cmd + "'");
-        return false;
-      }
-
-
-    } catch (Exception e) {
-      e.printStackTrace();
-      printUsage("Can't start because " + e.getMessage());
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * Main entry point.
-   */
-  public static void main(String[] args) throws Exception {
-    int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
-    System.exit(ret);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
-    if (!doCommandLine(otherArgs)) {
-      return 1;
-    }
-
-    Job job = createSubmittableJob(otherArgs);
-    if (!job.waitForCompletion(true)) {
-      LOG.info("Map-reduce job failed!");
-      return 1;
-    }
-    counters = job.getCounters();
-    return 0;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
deleted file mode 100644
index 63868da..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Convert HBase tabular data into a format that is consumable by Map/Reduce.
- */
-@InterfaceAudience.Public
-public class TableInputFormat extends TableInputFormatBase
-implements Configurable {
-
-  @SuppressWarnings("hiding")
-  private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
-
-  /** Job parameter that specifies the input table. */
-  public static final String INPUT_TABLE = "hbase.mapreduce.inputtable";
-  /**
-   * If specified, use start keys of this table to split.
-   * This is useful when you are preparing data for bulkload.
-   */
-  private static final String SPLIT_TABLE = "hbase.mapreduce.splittable";
-  /** Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
-   * See {@link TableMapReduceUtil#convertScanToString(Scan)} for more details.
-   */
-  public static final String SCAN = "hbase.mapreduce.scan";
-  /** Scan start row */
-  public static final String SCAN_ROW_START = "hbase.mapreduce.scan.row.start";
-  /** Scan stop row */
-  public static final String SCAN_ROW_STOP = "hbase.mapreduce.scan.row.stop";
-  /** Column Family to Scan */
-  public static final String SCAN_COLUMN_FAMILY = "hbase.mapreduce.scan.column.family";
-  /** Space delimited list of columns and column families to scan. */
-  public static final String SCAN_COLUMNS = "hbase.mapreduce.scan.columns";
-  /** The timestamp used to filter columns with a specific timestamp. */
-  public static final String SCAN_TIMESTAMP = "hbase.mapreduce.scan.timestamp";
-  /** The starting timestamp used to filter columns with a specific range of versions. */
-  public static final String SCAN_TIMERANGE_START = "hbase.mapreduce.scan.timerange.start";
-  /** The ending timestamp used to filter columns with a specific range of versions. */
-  public static final String SCAN_TIMERANGE_END = "hbase.mapreduce.scan.timerange.end";
-  /** The maximum number of version to return. */
-  public static final String SCAN_MAXVERSIONS = "hbase.mapreduce.scan.maxversions";
-  /** Set to false to disable server-side caching of blocks for this scan. */
-  public static final String SCAN_CACHEBLOCKS = "hbase.mapreduce.scan.cacheblocks";
-  /** The number of rows for caching that will be passed to scanners. */
-  public static final String SCAN_CACHEDROWS = "hbase.mapreduce.scan.cachedrows";
-  /** Set the maximum number of values to return for each call to next(). */
-  public static final String SCAN_BATCHSIZE = "hbase.mapreduce.scan.batchsize";
-  /** Specify if we have to shuffle the map tasks. */
-  public static final String SHUFFLE_MAPS = "hbase.mapreduce.inputtable.shufflemaps";
-
-  /** The configuration. */
-  private Configuration conf = null;
-
-  /**
-   * Returns the current configuration.
-   *
-   * @return The current configuration.
-   * @see org.apache.hadoop.conf.Configurable#getConf()
-   */
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  /**
-   * Sets the configuration. This is used to set the details for the table to
-   * be scanned.
-   *
-   * @param configuration  The configuration to set.
-   * @see org.apache.hadoop.conf.Configurable#setConf(
-   *   org.apache.hadoop.conf.Configuration)
-   */
-  @Override
-  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
-    justification="Intentional")
-  public void setConf(Configuration configuration) {
-    this.conf = configuration;
-
-    Scan scan = null;
-
-    if (conf.get(SCAN) != null) {
-      try {
-        scan = TableMapReduceUtil.convertStringToScan(conf.get(SCAN));
-      } catch (IOException e) {
-        LOG.error("An error occurred.", e);
-      }
-    } else {
-      try {
-        scan = createScanFromConfiguration(conf);
-      } catch (Exception e) {
-          LOG.error(StringUtils.stringifyException(e));
-      }
-    }
-
-    setScan(scan);
-  }
-
-  /**
-   * Sets up a {@link Scan} instance, applying settings from the configuration property
-   * constants defined in {@code TableInputFormat}.  This allows specifying things such as:
-   * <ul>
-   *   <li>start and stop rows</li>
-   *   <li>column qualifiers or families</li>
-   *   <li>timestamps or timerange</li>
-   *   <li>scanner caching and batch size</li>
-   * </ul>
-   */
-  public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
-    Scan scan = new Scan();
-
-    if (conf.get(SCAN_ROW_START) != null) {
-      scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
-    }
-
-    if (conf.get(SCAN_ROW_STOP) != null) {
-      scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
-    }
-
-    if (conf.get(SCAN_COLUMNS) != null) {
-      addColumns(scan, conf.get(SCAN_COLUMNS));
-    }
-
-    for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
-      scan.addFamily(Bytes.toBytes(columnFamily));
-    }
-
-    if (conf.get(SCAN_TIMESTAMP) != null) {
-      scan.setTimeStamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
-    }
-
-    if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
-      scan.setTimeRange(
-          Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
-          Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
-    }
-
-    if (conf.get(SCAN_MAXVERSIONS) != null) {
-      scan.setMaxVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
-    }
-
-    if (conf.get(SCAN_CACHEDROWS) != null) {
-      scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
-    }
-
-    if (conf.get(SCAN_BATCHSIZE) != null) {
-      scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
-    }
-
-    // false by default, full table scans generate too much BC churn
-    scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));
-
-    return scan;
-  }
-
-  @Override
-  protected void initialize(JobContext context) throws IOException {
-    // Do we have to worry about mis-matches between the Configuration from setConf and the one
-    // in this context?
-    TableName tableName = TableName.valueOf(conf.get(INPUT_TABLE));
-    try {
-      initializeTable(ConnectionFactory.createConnection(new Configuration(conf)), tableName);
-    } catch (Exception e) {
-      LOG.error(StringUtils.stringifyException(e));
-    }
-  }
-
-  /**
-   * Parses a combined family and qualifier and adds either both or just the
-   * family in case there is no qualifier. This assumes the older colon
-   * divided notation, e.g. "family:qualifier".
-   *
-   * @param scan The Scan to update.
-   * @param familyAndQualifier family and qualifier
-   * @throws IllegalArgumentException When familyAndQualifier is invalid.
-   */
-  private static void addColumn(Scan scan, byte[] familyAndQualifier) {
-    byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
-    if (fq.length == 1) {
-      scan.addFamily(fq[0]);
-    } else if (fq.length == 2) {
-      scan.addColumn(fq[0], fq[1]);
-    } else {
-      throw new IllegalArgumentException("Invalid familyAndQualifier provided.");
-    }
-  }
-
-  /**
-   * Adds an array of columns specified using old format, family:qualifier.
-   * <p>
-   * Overrides previous calls to {@link Scan#addColumn(byte[], byte[])}for any families in the
-   * input.
-   *
-   * @param scan The Scan to update.
-   * @param columns array of columns, formatted as <code>family:qualifier</code>
-   * @see Scan#addColumn(byte[], byte[])
-   */
-  public static void addColumns(Scan scan, byte [][] columns) {
-    for (byte[] column : columns) {
-      addColumn(scan, column);
-    }
-  }
-
-  /**
-   * Calculates the splits that will serve as input for the map tasks. The
-   * number of splits matches the number of regions in a table. Splits are shuffled if
-   * required.
-   * @param context  The current job context.
-   * @return The list of input splits.
-   * @throws IOException When creating the list of splits fails.
-   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
-   *   org.apache.hadoop.mapreduce.JobContext)
-   */
-  @Override
-  public List<InputSplit> getSplits(JobContext context) throws IOException {
-    List<InputSplit> splits = super.getSplits(context);
-    if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) {
-      Collections.shuffle(splits);
-    }
-    return splits;
-  }
-
-  /**
-   * Convenience method to parse a string representation of an array of column specifiers.
-   *
-   * @param scan The Scan to update.
-   * @param columns  The columns to parse.
-   */
-  private static void addColumns(Scan scan, String columns) {
-    String[] cols = columns.split(" ");
-    for (String col : cols) {
-      addColumn(scan, Bytes.toBytes(col));
-    }
-  }
-
-  @Override
-  protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
-    if (conf.get(SPLIT_TABLE) != null) {
-      TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
-      try (Connection conn = ConnectionFactory.createConnection(getConf())) {
-        try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
-          return rl.getStartEndKeys();
-        }
-      }
-    }
-
-    return super.getStartEndKeys();
-  }
-
-  /**
-   * Sets split table in map-reduce job.
-   */
-  public static void configureSplitTable(Job job, TableName tableName) {
-    job.getConfiguration().set(SPLIT_TABLE, tableName.getNameAsString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
deleted file mode 100644
index ce1928e6..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ /dev/null
@@ -1,653 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.InetSocketAddress;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Addressing;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.RegionSizeCalculator;
-import org.apache.hadoop.hbase.util.Strings;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.net.DNS;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName},
- * an {@link Scan} instance that defines the input columns etc. Subclasses may use
- * other TableRecordReader implementations.
- *
- * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
- * function properly. Each of the entry points to this class used by the MapReduce framework,
- * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
- * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
- * retrieving the necessary configuration information. If your subclass overrides either of these
- * methods, either call the parent version or call initialize yourself.
- *
- * <p>
- * An example of a subclass:
- * <pre>
- *   class ExampleTIF extends TableInputFormatBase {
- *
- *     {@literal @}Override
- *     protected void initialize(JobContext context) throws IOException {
- *       // We are responsible for the lifecycle of this connection until we hand it over in
- *       // initializeTable.
- *       Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
- *              job.getConfiguration()));
- *       TableName tableName = TableName.valueOf("exampleTable");
- *       // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
- *       initializeTable(connection, tableName);
- *       byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- *         Bytes.toBytes("columnB") };
- *       // optional, by default we'll get everything for the table.
- *       Scan scan = new Scan();
- *       for (byte[] family : inputColumns) {
- *         scan.addFamily(family);
- *       }
- *       Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- *       scan.setFilter(exampleFilter);
- *       setScan(scan);
- *     }
- *   }
- * </pre>
- */
-@InterfaceAudience.Public
-public abstract class TableInputFormatBase
-extends InputFormat<ImmutableBytesWritable, Result> {
-
-  /** Specify if we enable auto-balance for input in M/R jobs.*/
-  public static final String MAPREDUCE_INPUT_AUTOBALANCE = "hbase.mapreduce.input.autobalance";
-  /** Specify if ratio for data skew in M/R jobs, it goes well with the enabling hbase.mapreduce
-   * .input.autobalance property.*/
-  public static final String INPUT_AUTOBALANCE_MAXSKEWRATIO = "hbase.mapreduce.input.autobalance" +
-          ".maxskewratio";
-  /** Specify if the row key in table is text (ASCII between 32~126),
-   * default is true. False means the table is using binary row key*/
-  public static final String TABLE_ROW_TEXTKEY = "hbase.table.row.textkey";
-
-  private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
-
-  private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
-      "initialized. Ensure you call initializeTable either in your constructor or initialize " +
-      "method";
-  private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
-            " previous error. Please look at the previous logs lines from" +
-            " the task's full log for more details.";
-
-  /** Holds the details for the internal scanner.
-   *
-   * @see Scan */
-  private Scan scan = null;
-  /** The {@link Admin}. */
-  private Admin admin;
-  /** The {@link Table} to scan. */
-  private Table table;
-  /** The {@link RegionLocator} of the table. */
-  private RegionLocator regionLocator;
-  /** The reader scanning the table, can be a custom one. */
-  private TableRecordReader tableRecordReader = null;
-  /** The underlying {@link Connection} of the table. */
-  private Connection connection;
-
-  
-  /** The reverse DNS lookup cache mapping: IPAddress => HostName */
-  private HashMap<InetAddress, String> reverseDNSCacheMap = new HashMap<>();
-
-  /**
-   * Builds a {@link TableRecordReader}. If no {@link TableRecordReader} was provided, uses
-   * the default.
-   *
-   * @param split  The split to work with.
-   * @param context  The current context.
-   * @return The newly created record reader.
-   * @throws IOException When creating the reader fails.
-   * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
-   *   org.apache.hadoop.mapreduce.InputSplit,
-   *   org.apache.hadoop.mapreduce.TaskAttemptContext)
-   */
-  @Override
-  public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
-      InputSplit split, TaskAttemptContext context)
-  throws IOException {
-    // Just in case a subclass is relying on JobConfigurable magic.
-    if (table == null) {
-      initialize(context);
-    }
-    // null check in case our child overrides getTable to not throw.
-    try {
-      if (getTable() == null) {
-        // initialize() must not have been implemented in the subclass.
-        throw new IOException(INITIALIZATION_ERROR);
-      }
-    } catch (IllegalStateException exception) {
-      throw new IOException(INITIALIZATION_ERROR, exception);
-    }
-    TableSplit tSplit = (TableSplit) split;
-    LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
-    final TableRecordReader trr =
-        this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
-    Scan sc = new Scan(this.scan);
-    sc.setStartRow(tSplit.getStartRow());
-    sc.setStopRow(tSplit.getEndRow());
-    trr.setScan(sc);
-    trr.setTable(getTable());
-    return new RecordReader<ImmutableBytesWritable, Result>() {
-
-      @Override
-      public void close() throws IOException {
-        trr.close();
-        closeTable();
-      }
-
-      @Override
-      public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
-        return trr.getCurrentKey();
-      }
-
-      @Override
-      public Result getCurrentValue() throws IOException, InterruptedException {
-        return trr.getCurrentValue();
-      }
-
-      @Override
-      public float getProgress() throws IOException, InterruptedException {
-        return trr.getProgress();
-      }
-
-      @Override
-      public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException,
-          InterruptedException {
-        trr.initialize(inputsplit, context);
-      }
-
-      @Override
-      public boolean nextKeyValue() throws IOException, InterruptedException {
-        return trr.nextKeyValue();
-      }
-    };
-  }
-
-  protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
-    return getRegionLocator().getStartEndKeys();
-  }
-
-  /**
-   * Calculates the splits that will serve as input for the map tasks. The
-   * number of splits matches the number of regions in a table.
-   *
-   * @param context  The current job context.
-   * @return The list of input splits.
-   * @throws IOException When creating the list of splits fails.
-   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
-   *   org.apache.hadoop.mapreduce.JobContext)
-   */
-  @Override
-  public List<InputSplit> getSplits(JobContext context) throws IOException {
-    boolean closeOnFinish = false;
-
-    // Just in case a subclass is relying on JobConfigurable magic.
-    if (table == null) {
-      initialize(context);
-      closeOnFinish = true;
-    }
-
-    // null check in case our child overrides getTable to not throw.
-    try {
-      if (getTable() == null) {
-        // initialize() must not have been implemented in the subclass.
-        throw new IOException(INITIALIZATION_ERROR);
-      }
-    } catch (IllegalStateException exception) {
-      throw new IOException(INITIALIZATION_ERROR, exception);
-    }
-
-    try {
-      RegionSizeCalculator sizeCalculator =
-          new RegionSizeCalculator(getRegionLocator(), getAdmin());
-      
-      TableName tableName = getTable().getName();
-  
-      Pair<byte[][], byte[][]> keys = getStartEndKeys();
-      if (keys == null || keys.getFirst() == null ||
-          keys.getFirst().length == 0) {
-        HRegionLocation regLoc =
-            getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
-        if (null == regLoc) {
-          throw new IOException("Expecting at least one region.");
-        }
-        List<InputSplit> splits = new ArrayList<>(1);
-        long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
-        TableSplit split = new TableSplit(tableName, scan,
-            HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
-                .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
-        splits.add(split);
-        return splits;
-      }
-      List<InputSplit> splits = new ArrayList<>(keys.getFirst().length);
-      for (int i = 0; i < keys.getFirst().length; i++) {
-        if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
-          continue;
-        }
-
-        byte[] startRow = scan.getStartRow();
-        byte[] stopRow = scan.getStopRow();
-        // determine if the given start an stop key fall into the region
-        if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
-            Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
-            (stopRow.length == 0 ||
-             Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
-          byte[] splitStart = startRow.length == 0 ||
-            Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
-              keys.getFirst()[i] : startRow;
-          byte[] splitStop = (stopRow.length == 0 ||
-            Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
-            keys.getSecond()[i].length > 0 ?
-              keys.getSecond()[i] : stopRow;
-
-          HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
-          // The below InetSocketAddress creation does a name resolution.
-          InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
-          if (isa.isUnresolved()) {
-            LOG.warn("Failed resolve " + isa);
-          }
-          InetAddress regionAddress = isa.getAddress();
-          String regionLocation;
-          regionLocation = reverseDNS(regionAddress);
-
-          byte[] regionName = location.getRegionInfo().getRegionName();
-          String encodedRegionName = location.getRegionInfo().getEncodedName();
-          long regionSize = sizeCalculator.getRegionSize(regionName);
-          TableSplit split = new TableSplit(tableName, scan,
-            splitStart, splitStop, regionLocation, encodedRegionName, regionSize);
-          splits.add(split);
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("getSplits: split -> " + i + " -> " + split);
-          }
-        }
-      }
-      //The default value of "hbase.mapreduce.input.autobalance" is false, which means not enabled.
-      boolean enableAutoBalance = context.getConfiguration()
-        .getBoolean(MAPREDUCE_INPUT_AUTOBALANCE, false);
-      if (enableAutoBalance) {
-        long totalRegionSize=0;
-        for (int i = 0; i < splits.size(); i++){
-          TableSplit ts = (TableSplit)splits.get(i);
-          totalRegionSize += ts.getLength();
-        }
-        long averageRegionSize = totalRegionSize / splits.size();
-        // the averageRegionSize must be positive.
-        if (averageRegionSize <= 0) {
-            LOG.warn("The averageRegionSize is not positive: "+ averageRegionSize + ", " +
-                    "set it to 1.");
-            averageRegionSize = 1;
-        }
-        return calculateRebalancedSplits(splits, context, averageRegionSize);
-      } else {
-        return splits;
-      }
-    } finally {
-      if (closeOnFinish) {
-        closeTable();
-      }
-    }
-  }
-
-  String reverseDNS(InetAddress ipAddress) throws UnknownHostException {
-    String hostName = this.reverseDNSCacheMap.get(ipAddress);
-    if (hostName == null) {
-      String ipAddressString = null;
-      try {
-        ipAddressString = DNS.reverseDns(ipAddress, null);
-      } catch (Exception e) {
-        // We can use InetAddress in case the jndi failed to pull up the reverse DNS entry from the
-        // name service. Also, in case of ipv6, we need to use the InetAddress since resolving
-        // reverse DNS using jndi doesn't work well with ipv6 addresses.
-        ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
-      }
-      if (ipAddressString == null) throw new UnknownHostException("No host found for " + ipAddress);
-      hostName = Strings.domainNamePointerToHostName(ipAddressString);
-      this.reverseDNSCacheMap.put(ipAddress, hostName);
-    }
-    return hostName;
-  }
-
-  /**
-   * Calculates the number of MapReduce input splits for the map tasks. The number of
-   * MapReduce input splits depends on the average region size and the "data skew ratio" user set in
-   * configuration.
-   *
-   * @param list  The list of input splits before balance.
-   * @param context  The current job context.
-   * @param average  The average size of all regions .
-   * @return The list of input splits.
-   * @throws IOException When creating the list of splits fails.
-   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
-   *   org.apache.hadoop.mapreduce.JobContext)
-   */
-  private List<InputSplit> calculateRebalancedSplits(List<InputSplit> list, JobContext context,
-                                               long average) throws IOException {
-    List<InputSplit> resultList = new ArrayList<>();
-    Configuration conf = context.getConfiguration();
-    //The default data skew ratio is 3
-    long dataSkewRatio = conf.getLong(INPUT_AUTOBALANCE_MAXSKEWRATIO, 3);
-    //It determines which mode to use: text key mode or binary key mode. The default is text mode.
-    boolean isTextKey = context.getConfiguration().getBoolean(TABLE_ROW_TEXTKEY, true);
-    long dataSkewThreshold = dataSkewRatio * average;
-    int count = 0;
-    while (count < list.size()) {
-      TableSplit ts = (TableSplit)list.get(count);
-      TableName tableName = ts.getTable();
-      String regionLocation = ts.getRegionLocation();
-      String encodedRegionName = ts.getEncodedRegionName();
-      long regionSize = ts.getLength();
-      if (regionSize >= dataSkewThreshold) {
-        // if the current region size is large than the data skew threshold,
-        // split the region into two MapReduce input splits.
-        byte[] splitKey = getSplitKey(ts.getStartRow(), ts.getEndRow(), isTextKey);
-        if (Arrays.equals(ts.getEndRow(), splitKey)) {
-          // Not splitting since the end key is the same as the split key
-          resultList.add(ts);
-        } else {
-          //Set the size of child TableSplit as 1/2 of the region size. The exact size of the
-          // MapReduce input splits is not far off.
-          TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), splitKey,
-              regionLocation, regionSize / 2);
-          TableSplit t2 = new TableSplit(tableName, scan, splitKey, ts.getEndRow(), regionLocation,
-              regionSize - regionSize / 2);
-          resultList.add(t1);
-          resultList.add(t2);
-        }
-        count++;
-      } else if (regionSize >= average) {
-        // if the region size between average size and data skew threshold size,
-        // make this region as one MapReduce input split.
-        resultList.add(ts);
-        count++;
-      } else {
-        // if the total size of several small continuous regions less than the average region size,
-        // combine them into one MapReduce input split.
-        long totalSize = regionSize;
-        byte[] splitStartKey = ts.getStartRow();
-        byte[] splitEndKey = ts.getEndRow();
-        count++;
-        for (; count < list.size(); count++) {
-          TableSplit nextRegion = (TableSplit)list.get(count);
-          long nextRegionSize = nextRegion.getLength();
-          if (totalSize + nextRegionSize <= dataSkewThreshold) {
-            totalSize = totalSize + nextRegionSize;
-            splitEndKey = nextRegion.getEndRow();
-          } else {
-            break;
-          }
-        }
-        TableSplit t = new TableSplit(tableName, scan, splitStartKey, splitEndKey,
-                regionLocation, encodedRegionName, totalSize);
-        resultList.add(t);
-      }
-    }
-    return resultList;
-  }
-
-  /**
-   * select a split point in the region. The selection of the split point is based on an uniform
-   * distribution assumption for the keys in a region.
-   * Here are some examples:
-   *
-   * <table>
-   *   <tr>
-   *     <th>start key</th>
-   *     <th>end key</th>
-   *     <th>is text</th>
-   *     <th>split point</th>
-   *   </tr>
-   *   <tr>
-   *     <td>'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'</td>
-   *     <td>'a', 'a', 'a', 'f', 'f', 'f'</td>
-   *     <td>true</td>
-   *     <td>'a', 'a', 'a', 'd', 'd', -78, 50, -77, 51</td>
-   *   </tr>
-   *   <tr>
-   *     <td>'1', '1', '1', '0', '0', '0'</td>
-   *     <td>'1', '1', '2', '5', '7', '9', '0'</td>
-   *     <td>true</td>
-   *     <td>'1', '1', '1', -78, -77, -76, -104</td>
-   *   </tr>
-   *   <tr>
-   *     <td>'1', '1', '1', '0'</td>
-   *     <td>'1', '1', '2', '0'</td>
-   *     <td>true</td>
-   *     <td>'1', '1', '1', -80</td>
-   *   </tr>
-   *   <tr>
-   *     <td>13, -19, 126, 127</td>
-   *     <td>13, -19, 127, 0</td>
-   *     <td>false</td>
-   *     <td>13, -19, 126, -65</td>
-   *   </tr>
-   * </table>
-   *
-   * Set this function as "public static", make it easier for test.
-   *
-   * @param start Start key of the region
-   * @param end End key of the region
-   * @param isText It determines to use text key mode or binary key mode
-   * @return The split point in the region.
-   */
-  @InterfaceAudience.Private
-  public static byte[] getSplitKey(byte[] start, byte[] end, boolean isText) {
-    byte upperLimitByte;
-    byte lowerLimitByte;
-    //Use text mode or binary mode.
-    if (isText) {
-      //The range of text char set in ASCII is [32,126], the lower limit is space and the upper
-      // limit is '~'.
-      upperLimitByte = '~';
-      lowerLimitByte = ' ';
-    } else {
-      upperLimitByte = -1;
-      lowerLimitByte = 0;
-    }
-    // For special case
-    // Example 1 : startkey=null, endkey="hhhqqqwww", splitKey="h"
-    // Example 2 (text key mode): startKey="ffffaaa", endKey=null, splitkey="f~~~~~~"
-    if (start.length == 0 && end.length == 0){
-      return new byte[]{(byte) ((lowerLimitByte + upperLimitByte) / 2)};
-    }
-    if (start.length == 0 && end.length != 0){
-      return new byte[]{ end[0] };
-    }
-    if (start.length != 0 && end.length == 0){
-      byte[] result =new byte[start.length];
-      result[0]=start[0];
-      for (int k = 1; k < start.length; k++){
-          result[k] = upperLimitByte;
-      }
-      return result;
-    }
-    return Bytes.split(start, end, false, 1)[1];
-  }
-
-  /**
-   * Test if the given region is to be included in the InputSplit while splitting
-   * the regions of a table.
-   * <p>
-   * This optimization is effective when there is a specific reasoning to exclude an entire region from the M-R job,
-   * (and hence, not contributing to the InputSplit), given the start and end keys of the same. <br>
-   * Useful when we need to remember the last-processed top record and revisit the [last, current) interval for M-R processing,
-   * continuously. In addition to reducing InputSplits, reduces the load on the region server as well, due to the ordering of the keys.
-   * <br>
-   * <br>
-   * Note: It is possible that <code>endKey.length() == 0 </code> , for the last (recent) region.
-   * <br>
-   * Override this method, if you want to bulk exclude regions altogether from M-R. By default, no region is excluded( i.e. all regions are included).
-   *
-   *
-   * @param startKey Start key of the region
-   * @param endKey End key of the region
-   * @return true, if this region needs to be included as part of the input (default).
-   *
-   */
-  protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
-    return true;
-  }
-
-  /**
-   * Allows subclasses to get the {@link RegionLocator}.
-   */
-  protected RegionLocator getRegionLocator() {
-    if (regionLocator == null) {
-      throw new IllegalStateException(NOT_INITIALIZED);
-    }
-    return regionLocator;
-  }
-  
-  /**
-   * Allows subclasses to get the {@link Table}.
-   */
-  protected Table getTable() {
-    if (table == null) {
-      throw new IllegalStateException(NOT_INITIALIZED);
-    }
-    return table;
-  }
-
-  /**
-   * Allows subclasses to get the {@link Admin}.
-   */
-  protected Admin getAdmin() {
-    if (admin == null) {
-      throw new IllegalStateException(NOT_INITIALIZED);
-    }
-    return admin;
-  }
-
-  /**
-   * Allows subclasses to initialize the table information.
-   *
-   * @param connection  The Connection to the HBase cluster. MUST be unmanaged. We will close.
-   * @param tableName  The {@link TableName} of the table to process. 
-   * @throws IOException 
-   */
-  protected void initializeTable(Connection connection, TableName tableName) throws IOException {
-    if (this.table != null || this.connection != null) {
-      LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
-          "reference; TableInputFormatBase will not close these old references when done.");
-    }
-    this.table = connection.getTable(tableName);
-    this.regionLocator = connection.getRegionLocator(tableName);
-    this.admin = connection.getAdmin();
-    this.connection = connection;
-  }
-
-  /**
-   * Gets the scan defining the actual details like columns etc.
-   *
-   * @return The internal scan instance.
-   */
-  public Scan getScan() {
-    if (this.scan == null) this.scan = new Scan();
-    return scan;
-  }
-
-  /**
-   * Sets the scan defining the actual details like columns etc.
-   *
-   * @param scan  The scan to set.
-   */
-  public void setScan(Scan scan) {
-    this.scan = scan;
-  }
-
-  /**
-   * Allows subclasses to set the {@link TableRecordReader}.
-   *
-   * @param tableRecordReader A different {@link TableRecordReader}
-   *   implementation.
-   */
-  protected void setTableRecordReader(TableRecordReader tableRecordReader) {
-    this.tableRecordReader = tableRecordReader;
-  }
-  
-  /**
-   * Handle subclass specific set up.
-   * Each of the entry points used by the MapReduce framework,
-   * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
-   * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
-   * retrieving the necessary configuration information and calling
-   * {@link #initializeTable(Connection, TableName)}.
-   *
-   * Subclasses should implement their initialize call such that it is safe to call multiple times.
-   * The current TableInputFormatBase implementation relies on a non-null table reference to decide
-   * if an initialize call is needed, but this behavior may change in the future. In particular,
-   * it is critical that initializeTable not be called multiple times since this will leak
-   * Connection instances.
-   *
-   */
-  protected void initialize(JobContext context) throws IOException {
-  }
-
-  /**
-   * Close the Table and related objects that were initialized via
-   * {@link #initializeTable(Connection, TableName)}.
-   *
-   * @throws IOException
-   */
-  protected void closeTable() throws IOException {
-    close(admin, table, regionLocator, connection);
-    admin = null;
-    table = null;
-    regionLocator = null;
-    connection = null;
-  }
-
-  private void close(Closeable... closables) throws IOException {
-    for (Closeable c : closables) {
-      if(c != null) { c.close(); }
-    }
-  }
-
-}

[18/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
deleted file mode 100644
index 7fea254..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
+++ /dev/null
@@ -1,902 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.InetSocketAddress;
-import java.net.URLDecoder;
-import java.net.URLEncoder;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.UUID;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.fs.HFileSystem;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileContext;
-import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.HStore;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
-import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Writes HFiles. Passed Cells must arrive in order.
- * Writes current time as the sequence id for the file. Sets the major compacted
- * attribute on created @{link {@link HFile}s. Calling write(null,null) will forcibly roll
- * all HFiles being written.
- * <p>
- * Using this class as part of a MapReduce job is best done
- * using {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}.
- */
-@InterfaceAudience.Public
-public class HFileOutputFormat2
-    extends FileOutputFormat<ImmutableBytesWritable, Cell> {
-  private static final Log LOG = LogFactory.getLog(HFileOutputFormat2.class);
-  static class TableInfo {
-    private TableDescriptor tableDesctiptor;
-    private RegionLocator regionLocator;
-
-    public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) {
-      this.tableDesctiptor = tableDesctiptor;
-      this.regionLocator = regionLocator;
-    }
-
-    /**
-     * The modification for the returned HTD doesn't affect the inner TD.
-     * @return A clone of inner table descriptor
-     * @deprecated use {@link #getTableDescriptor}
-     */
-    @Deprecated
-    public HTableDescriptor getHTableDescriptor() {
-      return new HTableDescriptor(tableDesctiptor);
-    }
-
-    public TableDescriptor getTableDescriptor() {
-      return tableDesctiptor;
-    }
-
-    public RegionLocator getRegionLocator() {
-      return regionLocator;
-    }
-  }
-
-  protected static final byte[] tableSeparator = ";".getBytes(StandardCharsets.UTF_8);
-
-  protected static byte[] combineTableNameSuffix(byte[] tableName,
-                                       byte[] suffix ) {
-    return Bytes.add(tableName, tableSeparator, suffix);
-  }
-
-  // The following constants are private since these are used by
-  // HFileOutputFormat2 to internally transfer data between job setup and
-  // reducer run using conf.
-  // These should not be changed by the client.
-  static final String COMPRESSION_FAMILIES_CONF_KEY =
-      "hbase.hfileoutputformat.families.compression";
-  static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
-      "hbase.hfileoutputformat.families.bloomtype";
-  static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
-      "hbase.mapreduce.hfileoutputformat.blocksize";
-  static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
-      "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
-
-  // This constant is public since the client can modify this when setting
-  // up their conf object and thus refer to this symbol.
-  // It is present for backwards compatibility reasons. Use it only to
-  // override the auto-detection of datablock encoding.
-  public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
-      "hbase.mapreduce.hfileoutputformat.datablock.encoding";
-
-  /**
-   * Keep locality while generating HFiles for bulkload. See HBASE-12596
-   */
-  public static final String LOCALITY_SENSITIVE_CONF_KEY =
-      "hbase.bulkload.locality.sensitive.enabled";
-  private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
-  static final String OUTPUT_TABLE_NAME_CONF_KEY =
-      "hbase.mapreduce.hfileoutputformat.table.name";
-  static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY =
-          "hbase.mapreduce.use.multi.table.hfileoutputformat";
-
-  public static final String STORAGE_POLICY_PROPERTY = "hbase.hstore.storagepolicy";
-  public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
-
-  @Override
-  public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
-      final TaskAttemptContext context) throws IOException, InterruptedException {
-    return createRecordWriter(context);
-  }
-
-  protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) {
-    return combineTableNameSuffix(tableName, family);
-  }
-
-  static <V extends Cell> RecordWriter<ImmutableBytesWritable, V>
-      createRecordWriter(final TaskAttemptContext context)
-          throws IOException {
-
-    // Get the path of the temporary output file
-    final Path outputPath = FileOutputFormat.getOutputPath(context);
-    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
-    final Configuration conf = context.getConfiguration();
-    final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
-    final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
-    if (writeTableNames==null || writeTableNames.isEmpty()) {
-      throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY
-              + " cannot be empty");
-    }
-    final FileSystem fs = outputDir.getFileSystem(conf);
-    // These configs. are from hbase-*.xml
-    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
-        HConstants.DEFAULT_MAX_FILE_SIZE);
-    // Invented config.  Add to hbase-*.xml if other than default compression.
-    final String defaultCompressionStr = conf.get("hfile.compression",
-        Compression.Algorithm.NONE.getName());
-    final Algorithm defaultCompression = HFileWriterImpl
-        .compressionByName(defaultCompressionStr);
-    final boolean compactionExclude = conf.getBoolean(
-        "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
-
-    final Set<String> allTableNames = Arrays.stream(writeTableNames.split(
-            Bytes.toString(tableSeparator))).collect(Collectors.toSet());
-
-    // create a map from column family to the compression algorithm
-    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
-    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
-    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
-
-    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
-    final Map<byte[], DataBlockEncoding> datablockEncodingMap
-        = createFamilyDataBlockEncodingMap(conf);
-    final DataBlockEncoding overriddenEncoding;
-    if (dataBlockEncodingStr != null) {
-      overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
-    } else {
-      overriddenEncoding = null;
-    }
-
-    return new RecordWriter<ImmutableBytesWritable, V>() {
-      // Map of families to writers and how much has been output on the writer.
-      private final Map<byte[], WriterLength> writers =
-              new TreeMap<>(Bytes.BYTES_COMPARATOR);
-      private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
-      private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTime());
-      private boolean rollRequested = false;
-
-      @Override
-      public void write(ImmutableBytesWritable row, V cell)
-          throws IOException {
-        KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
-
-        // null input == user explicitly wants to flush
-        if (row == null && kv == null) {
-          rollWriters();
-          return;
-        }
-
-        byte[] rowKey = CellUtil.cloneRow(kv);
-        long length = kv.getLength();
-        byte[] family = CellUtil.cloneFamily(kv);
-        byte[] tableNameBytes = null;
-        if (writeMultipleTables) {
-          tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get());
-          if (!allTableNames.contains(Bytes.toString(tableNameBytes))) {
-            throw new IllegalArgumentException("TableName '" + Bytes.toString(tableNameBytes) +
-                    "' not" + " expected");
-          }
-        } else {
-          tableNameBytes = writeTableNames.getBytes(StandardCharsets.UTF_8);
-        }
-        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
-        WriterLength wl = this.writers.get(tableAndFamily);
-
-        // If this is a new column family, verify that the directory exists
-        if (wl == null) {
-          Path writerPath = null;
-          if (writeMultipleTables) {
-            writerPath = new Path(outputDir, new Path(Bytes.toString(tableNameBytes), Bytes
-                    .toString(family)));
-          }
-          else {
-            writerPath = new Path(outputDir, Bytes.toString(family));
-          }
-          fs.mkdirs(writerPath);
-          configureStoragePolicy(conf, fs, tableAndFamily, writerPath);
-        }
-
-        // If any of the HFiles for the column families has reached
-        // maxsize, we need to roll all the writers
-        if (wl != null && wl.written + length >= maxsize) {
-          this.rollRequested = true;
-        }
-
-        // This can only happen once a row is finished though
-        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
-          rollWriters();
-        }
-
-        // create a new WAL writer, if necessary
-        if (wl == null || wl.writer == null) {
-          if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
-            HRegionLocation loc = null;
-
-            String tableName = Bytes.toString(tableNameBytes);
-            if (tableName != null) {
-              try (Connection connection = ConnectionFactory.createConnection(conf);
-                     RegionLocator locator =
-                       connection.getRegionLocator(TableName.valueOf(tableName))) {
-                loc = locator.getRegionLocation(rowKey);
-              } catch (Throwable e) {
-                LOG.warn("There's something wrong when locating rowkey: " +
-                  Bytes.toString(rowKey) + " for tablename: " + tableName, e);
-                loc = null;
-              } }
-
-            if (null == loc) {
-              if (LOG.isTraceEnabled()) {
-                LOG.trace("failed to get region location, so use default writer for rowkey: " +
-                  Bytes.toString(rowKey));
-              }
-              wl = getNewWriter(tableNameBytes, family, conf, null);
-            } else {
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("first rowkey: [" + Bytes.toString(rowKey) + "]");
-              }
-              InetSocketAddress initialIsa =
-                  new InetSocketAddress(loc.getHostname(), loc.getPort());
-              if (initialIsa.isUnresolved()) {
-                if (LOG.isTraceEnabled()) {
-                  LOG.trace("failed to resolve bind address: " + loc.getHostname() + ":"
-                      + loc.getPort() + ", so use default writer");
-                }
-                wl = getNewWriter(tableNameBytes, family, conf, null);
-              } else {
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("use favored nodes writer: " + initialIsa.getHostString());
-                }
-                wl = getNewWriter(tableNameBytes, family, conf, new InetSocketAddress[] { initialIsa
-                });
-              }
-            }
-          } else {
-            wl = getNewWriter(tableNameBytes, family, conf, null);
-          }
-        }
-
-        // we now have the proper WAL writer. full steam ahead
-        kv.updateLatestStamp(this.now);
-        wl.writer.append(kv);
-        wl.written += length;
-
-        // Copy the row so we know when a row transition.
-        this.previousRow = rowKey;
-      }
-
-      private void rollWriters() throws IOException {
-        for (WriterLength wl : this.writers.values()) {
-          if (wl.writer != null) {
-            LOG.info(
-                "Writer=" + wl.writer.getPath() + ((wl.written == 0)? "": ", wrote=" + wl.written));
-            close(wl.writer);
-          }
-          wl.writer = null;
-          wl.written = 0;
-        }
-        this.rollRequested = false;
-      }
-
-      /*
-       * Create a new StoreFile.Writer.
-       * @param family
-       * @return A WriterLength, containing a new StoreFile.Writer.
-       * @throws IOException
-       */
-      @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
-          justification="Not important")
-      private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration
-              conf, InetSocketAddress[] favoredNodes) throws IOException {
-        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
-        Path familydir = new Path(outputDir, Bytes.toString(family));
-        if (writeMultipleTables) {
-          familydir = new Path(outputDir,
-                  new Path(Bytes.toString(tableName), Bytes.toString(family)));
-        }
-        WriterLength wl = new WriterLength();
-        Algorithm compression = compressionMap.get(tableAndFamily);
-        compression = compression == null ? defaultCompression : compression;
-        BloomType bloomType = bloomTypeMap.get(tableAndFamily);
-        bloomType = bloomType == null ? BloomType.NONE : bloomType;
-        Integer blockSize = blockSizeMap.get(tableAndFamily);
-        blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
-        DataBlockEncoding encoding = overriddenEncoding;
-        encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding;
-        encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
-        Configuration tempConf = new Configuration(conf);
-        tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
-        HFileContextBuilder contextBuilder = new HFileContextBuilder()
-                                    .withCompression(compression)
-                                    .withChecksumType(HStore.getChecksumType(conf))
-                                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
-                                    .withBlockSize(blockSize);
-
-        if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
-          contextBuilder.withIncludesTags(true);
-        }
-
-        contextBuilder.withDataBlockEncoding(encoding);
-        HFileContext hFileContext = contextBuilder.build();
-        if (null == favoredNodes) {
-          wl.writer =
-              new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), fs)
-                  .withOutputDir(familydir).withBloomType(bloomType)
-                  .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext).build();
-        } else {
-          wl.writer =
-              new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
-                  .withOutputDir(familydir).withBloomType(bloomType)
-                  .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext)
-                  .withFavoredNodes(favoredNodes).build();
-        }
-
-        this.writers.put(tableAndFamily, wl);
-        return wl;
-      }
-
-      private void close(final StoreFileWriter w) throws IOException {
-        if (w != null) {
-          w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
-              Bytes.toBytes(System.currentTimeMillis()));
-          w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
-              Bytes.toBytes(context.getTaskAttemptID().toString()));
-          w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
-              Bytes.toBytes(true));
-          w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
-              Bytes.toBytes(compactionExclude));
-          w.appendTrackedTimestampsToMetadata();
-          w.close();
-        }
-      }
-
-      @Override
-      public void close(TaskAttemptContext c)
-      throws IOException, InterruptedException {
-        for (WriterLength wl: this.writers.values()) {
-          close(wl.writer);
-        }
-      }
-    };
-  }
-
-  /**
-   * Configure block storage policy for CF after the directory is created.
-   */
-  static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
-      byte[] tableAndFamily, Path cfPath) {
-    if (null == conf || null == fs || null == tableAndFamily || null == cfPath) {
-      return;
-    }
-
-    String policy =
-        conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily),
-          conf.get(STORAGE_POLICY_PROPERTY));
-    FSUtils.setStoragePolicy(fs, cfPath, policy);
-  }
-
-  /*
-   * Data structure to hold a Writer and amount of data written on it.
-   */
-  static class WriterLength {
-    long written = 0;
-    StoreFileWriter writer = null;
-  }
-
-  /**
-   * Return the start keys of all of the regions in this table,
-   * as a list of ImmutableBytesWritable.
-   */
-  private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators,
-                                                                 boolean writeMultipleTables)
-          throws IOException {
-
-    ArrayList<ImmutableBytesWritable> ret = new ArrayList<>();
-    for(RegionLocator regionLocator : regionLocators)
-    {
-      TableName tableName = regionLocator.getName();
-      LOG.info("Looking up current regions for table " + tableName);
-      byte[][] byteKeys = regionLocator.getStartKeys();
-      for (byte[] byteKey : byteKeys) {
-        byte[] fullKey = byteKey; //HFileOutputFormat2 use case
-        if (writeMultipleTables)
-        {
-          //MultiTableHFileOutputFormat use case
-          fullKey = combineTableNameSuffix(tableName.getName(), byteKey);
-        }
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("SplitPoint startkey for table [" + tableName + "]: [" + Bytes.toStringBinary
-                  (fullKey) + "]");
-        }
-        ret.add(new ImmutableBytesWritable(fullKey));
-      }
-    }
-    return ret;
-  }
-
-  /**
-   * Write out a {@link SequenceFile} that can be read by
-   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
-   */
-  @SuppressWarnings("deprecation")
-  private static void writePartitions(Configuration conf, Path partitionsPath,
-      List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException {
-    LOG.info("Writing partition information to " + partitionsPath);
-    if (startKeys.isEmpty()) {
-      throw new IllegalArgumentException("No regions passed");
-    }
-
-    // We're generating a list of split points, and we don't ever
-    // have keys < the first region (which has an empty start key)
-    // so we need to remove it. Otherwise we would end up with an
-    // empty reducer with index 0
-    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
-    ImmutableBytesWritable first = sorted.first();
-    if (writeMultipleTables) {
-      first = new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first
-              ().get()));
-    }
-    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
-      throw new IllegalArgumentException(
-          "First region of table should have empty start key. Instead has: "
-          + Bytes.toStringBinary(first.get()));
-    }
-    sorted.remove(sorted.first());
-
-    // Write the actual file
-    FileSystem fs = partitionsPath.getFileSystem(conf);
-    SequenceFile.Writer writer = SequenceFile.createWriter(
-      fs, conf, partitionsPath, ImmutableBytesWritable.class,
-      NullWritable.class);
-
-    try {
-      for (ImmutableBytesWritable startKey : sorted) {
-        writer.append(startKey, NullWritable.get());
-      }
-    } finally {
-      writer.close();
-    }
-  }
-
-  /**
-   * Configure a MapReduce Job to perform an incremental load into the given
-   * table. This
-   * <ul>
-   *   <li>Inspects the table to configure a total order partitioner</li>
-   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
-   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
-   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
-   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
-   *     PutSortReducer)</li>
-   * </ul>
-   * The user should be sure to set the map output value class to either KeyValue or Put before
-   * running this function.
-   */
-  public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
-      throws IOException {
-    configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
-  }
-
-  /**
-   * Configure a MapReduce Job to perform an incremental load into the given
-   * table. This
-   * <ul>
-   *   <li>Inspects the table to configure a total order partitioner</li>
-   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
-   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
-   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
-   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
-   *     PutSortReducer)</li>
-   * </ul>
-   * The user should be sure to set the map output value class to either KeyValue or Put before
-   * running this function.
-   */
-  public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor,
-      RegionLocator regionLocator) throws IOException {
-    ArrayList<TableInfo> singleTableInfo = new ArrayList<>();
-    singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator));
-    configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class);
-  }
-
-  static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, Class<? extends OutputFormat<?, ?>> cls) throws IOException {
-    Configuration conf = job.getConfiguration();
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(KeyValue.class);
-    job.setOutputFormatClass(cls);
-
-    if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) {
-      throw new IllegalArgumentException("Duplicate entries found in TableInfo argument");
-    }
-    boolean writeMultipleTables = false;
-    if (MultiTableHFileOutputFormat.class.equals(cls)) {
-      writeMultipleTables = true;
-      conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true);
-    }
-    // Based on the configured map output class, set the correct reducer to properly
-    // sort the incoming values.
-    // TODO it would be nice to pick one or the other of these formats.
-    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
-      job.setReducerClass(KeyValueSortReducer.class);
-    } else if (Put.class.equals(job.getMapOutputValueClass())) {
-      job.setReducerClass(PutSortReducer.class);
-    } else if (Text.class.equals(job.getMapOutputValueClass())) {
-      job.setReducerClass(TextSortReducer.class);
-    } else {
-      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
-    }
-
-    conf.setStrings("io.serializations", conf.get("io.serializations"),
-        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
-        KeyValueSerialization.class.getName());
-
-    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
-      LOG.info("bulkload locality sensitive enabled");
-    }
-
-    /* Now get the region start keys for every table required */
-    List<String> allTableNames = new ArrayList<>(multiTableInfo.size());
-    List<RegionLocator> regionLocators = new ArrayList<>( multiTableInfo.size());
-    List<TableDescriptor> tableDescriptors = new ArrayList<>( multiTableInfo.size());
-
-    for( TableInfo tableInfo : multiTableInfo )
-    {
-      regionLocators.add(tableInfo.getRegionLocator());
-      allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString());
-      tableDescriptors.add(tableInfo.getTableDescriptor());
-    }
-    // Record tablenames for creating writer by favored nodes, and decoding compression, block size and other attributes of columnfamily per table
-    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, StringUtils.join(allTableNames, Bytes
-            .toString(tableSeparator)));
-    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocators, writeMultipleTables);
-    // Use table's region boundaries for TOP split points.
-    LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
-        "to match current region count for all tables");
-    job.setNumReduceTasks(startKeys.size());
-
-    configurePartitioner(job, startKeys, writeMultipleTables);
-    // Set compression algorithms based on column families
-
-    conf.set(COMPRESSION_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(compressionDetails,
-            tableDescriptors));
-    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(blockSizeDetails,
-            tableDescriptors));
-    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomTypeDetails,
-            tableDescriptors));
-    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
-            serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors));
-
-    TableMapReduceUtil.addDependencyJars(job);
-    TableMapReduceUtil.initCredentials(job);
-    LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
-  }
-
-  public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
-      IOException {
-    Configuration conf = job.getConfiguration();
-
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(KeyValue.class);
-    job.setOutputFormatClass(HFileOutputFormat2.class);
-
-    ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
-    singleTableDescriptor.add(tableDescriptor);
-
-    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
-    // Set compression algorithms based on column families
-    conf.set(COMPRESSION_FAMILIES_CONF_KEY,
-        serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
-    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
-        serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
-    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
-        serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
-    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
-        serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
-
-    TableMapReduceUtil.addDependencyJars(job);
-    TableMapReduceUtil.initCredentials(job);
-    LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
-  }
-
-  /**
-   * Runs inside the task to deserialize column family to compression algorithm
-   * map from the configuration.
-   *
-   * @param conf to read the serialized values from
-   * @return a map from column family to the configured compression algorithm
-   */
-  @VisibleForTesting
-  static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
-      conf) {
-    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
-        COMPRESSION_FAMILIES_CONF_KEY);
-    Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
-      Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
-      compressionMap.put(e.getKey(), algorithm);
-    }
-    return compressionMap;
-  }
-
-  /**
-   * Runs inside the task to deserialize column family to bloom filter type
-   * map from the configuration.
-   *
-   * @param conf to read the serialized values from
-   * @return a map from column family to the the configured bloom filter type
-   */
-  @VisibleForTesting
-  static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
-    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
-        BLOOM_TYPE_FAMILIES_CONF_KEY);
-    Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
-      BloomType bloomType = BloomType.valueOf(e.getValue());
-      bloomTypeMap.put(e.getKey(), bloomType);
-    }
-    return bloomTypeMap;
-  }
-
-  /**
-   * Runs inside the task to deserialize column family to block size
-   * map from the configuration.
-   *
-   * @param conf to read the serialized values from
-   * @return a map from column family to the configured block size
-   */
-  @VisibleForTesting
-  static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
-    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
-        BLOCK_SIZE_FAMILIES_CONF_KEY);
-    Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
-      Integer blockSize = Integer.parseInt(e.getValue());
-      blockSizeMap.put(e.getKey(), blockSize);
-    }
-    return blockSizeMap;
-  }
-
-  /**
-   * Runs inside the task to deserialize column family to data block encoding
-   * type map from the configuration.
-   *
-   * @param conf to read the serialized values from
-   * @return a map from column family to HFileDataBlockEncoder for the
-   *         configured data block type for the family
-   */
-  @VisibleForTesting
-  static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
-      Configuration conf) {
-    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
-        DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
-    Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
-      encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
-    }
-    return encoderMap;
-  }
-
-
-  /**
-   * Run inside the task to deserialize column family to given conf value map.
-   *
-   * @param conf to read the serialized values from
-   * @param confName conf key to read from the configuration
-   * @return a map of column family to the given configuration value
-   */
-  private static Map<byte[], String> createFamilyConfValueMap(
-      Configuration conf, String confName) {
-    Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-    String confVal = conf.get(confName, "");
-    for (String familyConf : confVal.split("&")) {
-      String[] familySplit = familyConf.split("=");
-      if (familySplit.length != 2) {
-        continue;
-      }
-      try {
-        confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(StandardCharsets.UTF_8),
-            URLDecoder.decode(familySplit[1], "UTF-8"));
-      } catch (UnsupportedEncodingException e) {
-        // will not happen with UTF-8 encoding
-        throw new AssertionError(e);
-      }
-    }
-    return confValMap;
-  }
-
-  /**
-   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
-   * <code>splitPoints</code>. Cleans up the partitions file after job exists.
-   */
-  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
-          writeMultipleTables)
-      throws IOException {
-    Configuration conf = job.getConfiguration();
-    // create the partitions file
-    FileSystem fs = FileSystem.get(conf);
-    String hbaseTmpFsDir =
-        conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
-          HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
-    Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
-    fs.makeQualified(partitionsPath);
-    writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
-    fs.deleteOnExit(partitionsPath);
-
-    // configure job to use it
-    job.setPartitionerClass(TotalOrderPartitioner.class);
-    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
-  }
-
-  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
-  @VisibleForTesting
-  static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, List<TableDescriptor> allTables)
-      throws UnsupportedEncodingException {
-    StringBuilder attributeValue = new StringBuilder();
-    int i = 0;
-    for (TableDescriptor tableDescriptor : allTables) {
-      if (tableDescriptor == null) {
-        // could happen with mock table instance
-        // CODEREVIEW: Can I set an empty string in conf if mock table instance?
-        return "";
-      }
-      for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) {
-        if (i++ > 0) {
-          attributeValue.append('&');
-        }
-        attributeValue.append(URLEncoder.encode(
-            Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), familyDescriptor.getName())),
-            "UTF-8"));
-        attributeValue.append('=');
-        attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8"));
-      }
-    }
-    // Get rid of the last ampersand
-    return attributeValue.toString();
-  }
-
-  /**
-   * Serialize column family to compression algorithm map to configuration.
-   * Invoked while configuring the MR job for incremental load.
-   *
-   * @param tableDescriptor to read the properties from
-   * @param conf to persist serialized values into
-   * @throws IOException
-   *           on failure to read column family descriptors
-   */
-  @VisibleForTesting
-  static Function<ColumnFamilyDescriptor, String> compressionDetails = familyDescriptor ->
-          familyDescriptor.getCompressionType().getName();
-
-  /**
-   * Serialize column family to block size map to configuration. Invoked while
-   * configuring the MR job for incremental load.
-   *
-   * @param tableDescriptor
-   *          to read the properties from
-   * @param conf
-   *          to persist serialized values into
-   *
-   * @throws IOException
-   *           on failure to read column family descriptors
-   */
-  @VisibleForTesting
-  static Function<ColumnFamilyDescriptor, String> blockSizeDetails = familyDescriptor -> String
-          .valueOf(familyDescriptor.getBlocksize());
-
-  /**
-   * Serialize column family to bloom type map to configuration. Invoked while
-   * configuring the MR job for incremental load.
-   *
-   * @param tableDescriptor
-   *          to read the properties from
-   * @param conf
-   *          to persist serialized values into
-   *
-   * @throws IOException
-   *           on failure to read column family descriptors
-   */
-  @VisibleForTesting
-  static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> {
-    String bloomType = familyDescriptor.getBloomFilterType().toString();
-    if (bloomType == null) {
-      bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name();
-    }
-    return bloomType;
-  };
-
-  /**
-   * Serialize column family to data block encoding map to configuration.
-   * Invoked while configuring the MR job for incremental load.
-   *
-   * @param tableDescriptor
-   *          to read the properties from
-   * @param conf
-   *          to persist serialized values into
-   * @throws IOException
-   *           on failure to read column family descriptors
-   */
-  @VisibleForTesting
-  static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> {
-    DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
-    if (encoding == null) {
-      encoding = DataBlockEncoding.NONE;
-    }
-    return encoding.toString();
-  };
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
deleted file mode 100644
index 3475a48..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapred.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * This is used to partition the output keys into groups of keys.
- * Keys are grouped according to the regions that currently exist
- * so that each reducer fills a single region so load is distributed.
- *
- * <p>This class is not suitable as partitioner creating hfiles
- * for incremental bulk loads as region spread will likely change between time of
- * hfile creation and load time. See {@link LoadIncrementalHFiles}
- * and <a href="http://hbase.apache.org/book.html#arch.bulk.load">Bulk Load</a>.
- *
- * @param <KEY>  The type of the key.
- * @param <VALUE>  The type of the value.
- */
-@InterfaceAudience.Public
-public class HRegionPartitioner<KEY, VALUE>
-extends Partitioner<ImmutableBytesWritable, VALUE>
-implements Configurable {
-
-  private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
-  private Configuration conf = null;
-  // Connection and locator are not cleaned up; they just die when partitioner is done.
-  private Connection connection;
-  private RegionLocator locator;
-  private byte[][] startKeys;
-
-  /**
-   * Gets the partition number for a given key (hence record) given the total
-   * number of partitions i.e. number of reduce-tasks for the job.
-   *
-   * <p>Typically a hash function on a all or a subset of the key.</p>
-   *
-   * @param key  The key to be partitioned.
-   * @param value  The entry value.
-   * @param numPartitions  The total number of partitions.
-   * @return The partition number for the <code>key</code>.
-   * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
-   *   java.lang.Object, java.lang.Object, int)
-   */
-  @Override
-  public int getPartition(ImmutableBytesWritable key,
-      VALUE value, int numPartitions) {
-    byte[] region = null;
-    // Only one region return 0
-    if (this.startKeys.length == 1){
-      return 0;
-    }
-    try {
-      // Not sure if this is cached after a split so we could have problems
-      // here if a region splits while mapping
-      region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-    for (int i = 0; i < this.startKeys.length; i++){
-      if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
-        if (i >= numPartitions-1){
-          // cover if we have less reduces then regions.
-          return (Integer.toString(i).hashCode()
-              & Integer.MAX_VALUE) % numPartitions;
-        }
-        return i;
-      }
-    }
-    // if above fails to find start key that match we need to return something
-    return 0;
-  }
-
-  /**
-   * Returns the current configuration.
-   *
-   * @return The current configuration.
-   * @see org.apache.hadoop.conf.Configurable#getConf()
-   */
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  /**
-   * Sets the configuration. This is used to determine the start keys for the
-   * given table.
-   *
-   * @param configuration  The configuration to set.
-   * @see org.apache.hadoop.conf.Configurable#setConf(
-   *   org.apache.hadoop.conf.Configuration)
-   */
-  @Override
-  public void setConf(Configuration configuration) {
-    this.conf = HBaseConfiguration.create(configuration);
-    try {
-      this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(conf));
-      TableName tableName = TableName.valueOf(conf.get(TableOutputFormat.OUTPUT_TABLE));
-      this.locator = this.connection.getRegionLocator(tableName);
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-    try {
-      this.startKeys = this.locator.getStartKeys();
-    } catch (IOException e) {
-      LOG.error(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
deleted file mode 100644
index dfac471..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
+++ /dev/null
@@ -1,747 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.MapFile;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Charsets;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Ordering;
-
-public class HashTable extends Configured implements Tool {
-
-  private static final Log LOG = LogFactory.getLog(HashTable.class);
-  
-  private static final int DEFAULT_BATCH_SIZE = 8000;
-  
-  private final static String HASH_BATCH_SIZE_CONF_KEY = "hash.batch.size";
-  final static String PARTITIONS_FILE_NAME = "partitions";
-  final static String MANIFEST_FILE_NAME = "manifest";
-  final static String HASH_DATA_DIR = "hashes";
-  final static String OUTPUT_DATA_FILE_PREFIX = "part-r-";
-  private final static String TMP_MANIFEST_FILE_NAME = "manifest.tmp";
-  
-  TableHash tableHash = new TableHash();
-  Path destPath;
-  
-  public HashTable(Configuration conf) {
-    super(conf);
-  }
-  
-  public static class TableHash {
-    
-    Path hashDir;
-    
-    String tableName;
-    String families = null;
-    long batchSize = DEFAULT_BATCH_SIZE;
-    int numHashFiles = 0;
-    byte[] startRow = HConstants.EMPTY_START_ROW;
-    byte[] stopRow = HConstants.EMPTY_END_ROW;
-    int scanBatch = 0;
-    int versions = -1;
-    long startTime = 0;
-    long endTime = 0;
-    
-    List<ImmutableBytesWritable> partitions;
-    
-    public static TableHash read(Configuration conf, Path hashDir) throws IOException {
-      TableHash tableHash = new TableHash();
-      FileSystem fs = hashDir.getFileSystem(conf);
-      tableHash.hashDir = hashDir;
-      tableHash.readPropertiesFile(fs, new Path(hashDir, MANIFEST_FILE_NAME));
-      tableHash.readPartitionFile(fs, conf, new Path(hashDir, PARTITIONS_FILE_NAME));
-      return tableHash;
-    }
-    
-    void writePropertiesFile(FileSystem fs, Path path) throws IOException {
-      Properties p = new Properties();
-      p.setProperty("table", tableName);
-      if (families != null) {
-        p.setProperty("columnFamilies", families);
-      }
-      p.setProperty("targetBatchSize", Long.toString(batchSize));
-      p.setProperty("numHashFiles", Integer.toString(numHashFiles));
-      if (!isTableStartRow(startRow)) {
-        p.setProperty("startRowHex", Bytes.toHex(startRow));
-      }
-      if (!isTableEndRow(stopRow)) {
-        p.setProperty("stopRowHex", Bytes.toHex(stopRow));
-      }
-      if (scanBatch > 0) {
-        p.setProperty("scanBatch", Integer.toString(scanBatch));
-      }
-      if (versions >= 0) {
-        p.setProperty("versions", Integer.toString(versions));
-      }
-      if (startTime != 0) {
-        p.setProperty("startTimestamp", Long.toString(startTime));
-      }
-      if (endTime != 0) {
-        p.setProperty("endTimestamp", Long.toString(endTime));
-      }
-      
-      try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) {
-        p.store(osw, null);
-      }
-    }
-
-    void readPropertiesFile(FileSystem fs, Path path) throws IOException {
-      Properties p = new Properties();
-      try (FSDataInputStream in = fs.open(path)) {
-        try (InputStreamReader isr = new InputStreamReader(in, Charsets.UTF_8)) {
-          p.load(isr);
-        }
-      }
-      tableName = p.getProperty("table");
-      families = p.getProperty("columnFamilies");
-      batchSize = Long.parseLong(p.getProperty("targetBatchSize"));
-      numHashFiles = Integer.parseInt(p.getProperty("numHashFiles"));
-      
-      String startRowHex = p.getProperty("startRowHex");
-      if (startRowHex != null) {
-        startRow = Bytes.fromHex(startRowHex);
-      }
-      String stopRowHex = p.getProperty("stopRowHex");
-      if (stopRowHex != null) {
-        stopRow = Bytes.fromHex(stopRowHex);
-      }
-      
-      String scanBatchString = p.getProperty("scanBatch");
-      if (scanBatchString != null) {
-        scanBatch = Integer.parseInt(scanBatchString);
-      }
-      
-      String versionString = p.getProperty("versions");
-      if (versionString != null) {
-        versions = Integer.parseInt(versionString);
-      }
-      
-      String startTimeString = p.getProperty("startTimestamp");
-      if (startTimeString != null) {
-        startTime = Long.parseLong(startTimeString);
-      }
-      
-      String endTimeString = p.getProperty("endTimestamp");
-      if (endTimeString != null) {
-        endTime = Long.parseLong(endTimeString);
-      }
-    }
-    
-    Scan initScan() throws IOException {
-      Scan scan = new Scan();
-      scan.setCacheBlocks(false);
-      if (startTime != 0 || endTime != 0) {
-        scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
-      }
-      if (scanBatch > 0) {
-        scan.setBatch(scanBatch);
-      }
-      if (versions >= 0) {
-        scan.setMaxVersions(versions);
-      }
-      if (!isTableStartRow(startRow)) {
-        scan.setStartRow(startRow);
-      }
-      if (!isTableEndRow(stopRow)) {
-        scan.setStopRow(stopRow);
-      }
-      if(families != null) {
-        for(String fam : families.split(",")) {
-          scan.addFamily(Bytes.toBytes(fam));
-        }
-      }
-      return scan;
-    }
-    
-    /**
-     * Choose partitions between row ranges to hash to a single output file
-     * Selects region boundaries that fall within the scan range, and groups them
-     * into the desired number of partitions.
-     */
-    void selectPartitions(Pair<byte[][], byte[][]> regionStartEndKeys) {
-      List<byte[]> startKeys = new ArrayList<>();
-      for (int i = 0; i < regionStartEndKeys.getFirst().length; i++) {
-        byte[] regionStartKey = regionStartEndKeys.getFirst()[i];
-        byte[] regionEndKey = regionStartEndKeys.getSecond()[i];
-        
-        // if scan begins after this region, or starts before this region, then drop this region
-        // in other words:
-        //   IF (scan begins before the end of this region
-        //      AND scan ends before the start of this region)
-        //   THEN include this region
-        if ((isTableStartRow(startRow) || isTableEndRow(regionEndKey)
-            || Bytes.compareTo(startRow, regionEndKey) < 0)
-          && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey)
-            || Bytes.compareTo(stopRow, regionStartKey) > 0)) {
-          startKeys.add(regionStartKey);
-        }
-      }
-      
-      int numRegions = startKeys.size();
-      if (numHashFiles == 0) {
-        numHashFiles = numRegions / 100;
-      }
-      if (numHashFiles == 0) {
-        numHashFiles = 1;
-      }
-      if (numHashFiles > numRegions) {
-        // can't partition within regions
-        numHashFiles = numRegions;
-      }
-      
-      // choose a subset of start keys to group regions into ranges
-      partitions = new ArrayList<>(numHashFiles - 1);
-      // skip the first start key as it is not a partition between ranges.
-      for (long i = 1; i < numHashFiles; i++) {
-        int splitIndex = (int) (numRegions * i / numHashFiles);
-        partitions.add(new ImmutableBytesWritable(startKeys.get(splitIndex)));
-      }
-    }
-    
-    void writePartitionFile(Configuration conf, Path path) throws IOException {
-      FileSystem fs = path.getFileSystem(conf);
-      @SuppressWarnings("deprecation")
-      SequenceFile.Writer writer = SequenceFile.createWriter(
-        fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
-      
-      for (int i = 0; i < partitions.size(); i++) {
-        writer.append(partitions.get(i), NullWritable.get());
-      }
-      writer.close();
-    }
-    
-    private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
-         throws IOException {
-      @SuppressWarnings("deprecation")
-      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
-      ImmutableBytesWritable key = new ImmutableBytesWritable();
-      partitions = new ArrayList<>();
-      while (reader.next(key)) {
-        partitions.add(new ImmutableBytesWritable(key.copyBytes()));
-      }
-      reader.close();
-      
-      if (!Ordering.natural().isOrdered(partitions)) {
-        throw new IOException("Partitions are not ordered!");
-      }
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append("tableName=").append(tableName);
-      if (families != null) {
-        sb.append(", families=").append(families);
-      }
-      sb.append(", batchSize=").append(batchSize);
-      sb.append(", numHashFiles=").append(numHashFiles);
-      if (!isTableStartRow(startRow)) {
-        sb.append(", startRowHex=").append(Bytes.toHex(startRow));
-      }
-      if (!isTableEndRow(stopRow)) {
-        sb.append(", stopRowHex=").append(Bytes.toHex(stopRow));
-      }
-      if (scanBatch >= 0) {
-        sb.append(", scanBatch=").append(scanBatch);
-      }
-      if (versions >= 0) {
-        sb.append(", versions=").append(versions);
-      }
-      if (startTime != 0) {
-        sb.append("startTime=").append(startTime);
-      }
-      if (endTime != 0) {
-        sb.append("endTime=").append(endTime);
-      }
-      return sb.toString();
-    }
-    
-    static String getDataFileName(int hashFileIndex) {
-      return String.format(HashTable.OUTPUT_DATA_FILE_PREFIX + "%05d", hashFileIndex);
-    }
-    
-    /**
-     * Open a TableHash.Reader starting at the first hash at or after the given key.
-     * @throws IOException 
-     */
-    public Reader newReader(Configuration conf, ImmutableBytesWritable startKey)
-        throws IOException {
-      return new Reader(conf, startKey);
-    }
-    
-    public class Reader implements java.io.Closeable {
-      private final Configuration conf;
-      
-      private int hashFileIndex;
-      private MapFile.Reader mapFileReader;
-      
-      private boolean cachedNext;
-      private ImmutableBytesWritable key;
-      private ImmutableBytesWritable hash;
-      
-      Reader(Configuration conf, ImmutableBytesWritable startKey) throws IOException {
-        this.conf = conf;
-        int partitionIndex = Collections.binarySearch(partitions, startKey);
-        if (partitionIndex >= 0) {
-          // if the key is equal to a partition, then go the file after that partition
-          hashFileIndex = partitionIndex+1;
-        } else {
-          // if the key is between partitions, then go to the file between those partitions
-          hashFileIndex = -1-partitionIndex;
-        }
-        openHashFile();
-        
-        // MapFile's don't make it easy to seek() so that the subsequent next() returns
-        // the desired key/value pair.  So we cache it for the first call of next().
-        hash = new ImmutableBytesWritable();
-        key = (ImmutableBytesWritable) mapFileReader.getClosest(startKey, hash);
-        if (key == null) {
-          cachedNext = false;
-          hash = null;
-        } else {
-          cachedNext = true;
-        }
-      }
-      
-      /**
-       * Read the next key/hash pair.
-       * Returns true if such a pair exists and false when at the end of the data.
-       */
-      public boolean next() throws IOException {
-        if (cachedNext) {
-          cachedNext = false;
-          return true;
-        }
-        key = new ImmutableBytesWritable();
-        hash = new ImmutableBytesWritable();
-        while (true) {
-          boolean hasNext = mapFileReader.next(key, hash);
-          if (hasNext) {
-            return true;
-          }
-          hashFileIndex++;
-          if (hashFileIndex < TableHash.this.numHashFiles) {
-            mapFileReader.close();
-            openHashFile();
-          } else {
-            key = null;
-            hash = null;
-            return false;
-          }
-        }
-      }
-      
-      /**
-       * Get the current key
-       * @return the current key or null if there is no current key
-       */
-      public ImmutableBytesWritable getCurrentKey() {
-        return key;
-      }
-      
-      /**
-       * Get the current hash
-       * @return the current hash or null if there is no current hash
-       */
-      public ImmutableBytesWritable getCurrentHash() {
-        return hash;
-      }
-      
-      private void openHashFile() throws IOException {
-        if (mapFileReader != null) {
-          mapFileReader.close();
-        }
-        Path dataDir = new Path(TableHash.this.hashDir, HASH_DATA_DIR);
-        Path dataFile = new Path(dataDir, getDataFileName(hashFileIndex));
-        mapFileReader = new MapFile.Reader(dataFile, conf);
-      }
-
-      @Override
-      public void close() throws IOException {
-        mapFileReader.close();
-      }
-    }
-  }
-  
-  static boolean isTableStartRow(byte[] row) {
-    return Bytes.equals(HConstants.EMPTY_START_ROW, row);
-  }
-  
-  static boolean isTableEndRow(byte[] row) {
-    return Bytes.equals(HConstants.EMPTY_END_ROW, row);
-  }
-  
-  public Job createSubmittableJob(String[] args) throws IOException {
-    Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
-    generatePartitions(partitionsPath);
-    
-    Job job = Job.getInstance(getConf(),
-          getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
-    Configuration jobConf = job.getConfiguration();
-    jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
-    job.setJarByClass(HashTable.class);
-
-    TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
-        HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
-    
-    // use a TotalOrderPartitioner and reducers to group region output into hash files
-    job.setPartitionerClass(TotalOrderPartitioner.class);
-    TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
-    job.setReducerClass(Reducer.class);  // identity reducer
-    job.setNumReduceTasks(tableHash.numHashFiles);
-    job.setOutputKeyClass(ImmutableBytesWritable.class);
-    job.setOutputValueClass(ImmutableBytesWritable.class);
-    job.setOutputFormatClass(MapFileOutputFormat.class);
-    FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
-    
-    return job;
-  }
-  
-  private void generatePartitions(Path partitionsPath) throws IOException {
-    Connection connection = ConnectionFactory.createConnection(getConf());
-    Pair<byte[][], byte[][]> regionKeys
-      = connection.getRegionLocator(TableName.valueOf(tableHash.tableName)).getStartEndKeys();
-    connection.close();
-    
-    tableHash.selectPartitions(regionKeys);
-    LOG.info("Writing " + tableHash.partitions.size() + " partition keys to " + partitionsPath);
-    
-    tableHash.writePartitionFile(getConf(), partitionsPath);
-  }
-  
-  static class ResultHasher {
-    private MessageDigest digest;
-    
-    private boolean batchStarted = false;
-    private ImmutableBytesWritable batchStartKey;
-    private ImmutableBytesWritable batchHash;
-    private long batchSize = 0;
-    
-    
-    public ResultHasher() {
-      try {
-        digest = MessageDigest.getInstance("MD5");
-      } catch (NoSuchAlgorithmException e) {
-        Throwables.propagate(e);
-      }
-    }
-    
-    public void startBatch(ImmutableBytesWritable row) {
-      if (batchStarted) {
-        throw new RuntimeException("Cannot start new batch without finishing existing one.");
-      }
-      batchStarted = true;
-      batchSize = 0;
-      batchStartKey = row;
-      batchHash = null;
-    }
-    
-    public void hashResult(Result result) {
-      if (!batchStarted) {
-        throw new RuntimeException("Cannot add to batch that has not been started.");
-      }
-      for (Cell cell : result.rawCells()) {
-        int rowLength = cell.getRowLength();
-        int familyLength = cell.getFamilyLength();
-        int qualifierLength = cell.getQualifierLength();
-        int valueLength = cell.getValueLength();
-        digest.update(cell.getRowArray(), cell.getRowOffset(), rowLength);
-        digest.update(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);
-        digest.update(cell.getQualifierArray(), cell.getQualifierOffset(), qualifierLength);
-        long ts = cell.getTimestamp();
-        for (int i = 8; i > 0; i--) {
-          digest.update((byte) ts);
-          ts >>>= 8;
-        }
-        digest.update(cell.getValueArray(), cell.getValueOffset(), valueLength);
-        
-        batchSize += rowLength + familyLength + qualifierLength + 8 + valueLength;
-      }
-    }
-    
-    public void finishBatch() {
-      if (!batchStarted) {
-        throw new RuntimeException("Cannot finish batch that has not started.");
-      }
-      batchStarted = false;
-      batchHash = new ImmutableBytesWritable(digest.digest());
-    }
-
-    public boolean isBatchStarted() {
-      return batchStarted;
-    }
-
-    public ImmutableBytesWritable getBatchStartKey() {
-      return batchStartKey;
-    }
-
-    public ImmutableBytesWritable getBatchHash() {
-      return batchHash;
-    }
-
-    public long getBatchSize() {
-      return batchSize;
-    }
-  }
-  
-  public static class HashMapper
-    extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
-    
-    private ResultHasher hasher;
-    private long targetBatchSize;
-    
-    private ImmutableBytesWritable currentRow;
-    
-    @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
-      targetBatchSize = context.getConfiguration()
-          .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
-      hasher = new ResultHasher();
-      
-      TableSplit split = (TableSplit) context.getInputSplit();
-      hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
-    }
-    
-    @Override
-    protected void map(ImmutableBytesWritable key, Result value, Context context)
-        throws IOException, InterruptedException {
-      
-      if (currentRow == null || !currentRow.equals(key)) {
-        currentRow = new ImmutableBytesWritable(key); // not immutable
-        
-        if (hasher.getBatchSize() >= targetBatchSize) {
-          hasher.finishBatch();
-          context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
-          hasher.startBatch(currentRow);
-        }
-      }
-      
-      hasher.hashResult(value);
-    }
-
-    @Override
-    protected void cleanup(Context context) throws IOException, InterruptedException {
-      hasher.finishBatch();
-      context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
-    }
-  }
-  
-  private void writeTempManifestFile() throws IOException {
-    Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
-    FileSystem fs = tempManifestPath.getFileSystem(getConf());
-    tableHash.writePropertiesFile(fs, tempManifestPath);
-  }
-  
-  private void completeManifest() throws IOException {
-    Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
-    Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME);
-    FileSystem fs = tempManifestPath.getFileSystem(getConf());
-    fs.rename(tempManifestPath, manifestPath);
-  }
-  
-  private static final int NUM_ARGS = 2;
-  private static void printUsage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-      System.err.println();
-    }
-    System.err.println("Usage: HashTable [options] <tablename> <outputpath>");
-    System.err.println();
-    System.err.println("Options:");
-    System.err.println(" batchsize     the target amount of bytes to hash in each batch");
-    System.err.println("               rows are added to the batch until this size is reached");
-    System.err.println("               (defaults to " + DEFAULT_BATCH_SIZE + " bytes)");
-    System.err.println(" numhashfiles  the number of hash files to create");
-    System.err.println("               if set to fewer than number of regions then");
-    System.err.println("               the job will create this number of reducers");
-    System.err.println("               (defaults to 1/100 of regions -- at least 1)");
-    System.err.println(" startrow      the start row");
-    System.err.println(" stoprow       the stop row");
-    System.err.println(" starttime     beginning of the time range (unixtime in millis)");
-    System.err.println("               without endtime means from starttime to forever");
-    System.err.println(" endtime       end of the time range.  Ignored if no starttime specified.");
-    System.err.println(" scanbatch     scanner batch size to support intra row scans");
-    System.err.println(" versions      number of cell versions to include");
-    System.err.println(" families      comma-separated list of families to include");
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" tablename     Name of the table to hash");
-    System.err.println(" outputpath    Filesystem path to put the output data");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" To hash 'TestTable' in 32kB batches for a 1 hour window into 50 files:");
-    System.err.println(" $ hbase " +
-        "org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=32000 --numhashfiles=50"
-        + " --starttime=1265875194289 --endtime=1265878794289 --families=cf2,cf3"
-        + " TestTable /hashes/testTable");
-  }
-
-  private boolean doCommandLine(final String[] args) {
-    if (args.length < NUM_ARGS) {
-      printUsage(null);
-      return false;
-    }
-    try {
-      
-      tableHash.tableName = args[args.length-2];
-      destPath = new Path(args[args.length-1]);
-      
-      for (int i = 0; i < args.length - NUM_ARGS; i++) {
-        String cmd = args[i];
-        if (cmd.equals("-h") || cmd.startsWith("--h")) {
-          printUsage(null);
-          return false;
-        }
-        
-        final String batchSizeArgKey = "--batchsize=";
-        if (cmd.startsWith(batchSizeArgKey)) {
-          tableHash.batchSize = Long.parseLong(cmd.substring(batchSizeArgKey.length()));
-          continue;
-        }
-        
-        final String numHashFilesArgKey = "--numhashfiles=";
-        if (cmd.startsWith(numHashFilesArgKey)) {
-          tableHash.numHashFiles = Integer.parseInt(cmd.substring(numHashFilesArgKey.length()));
-          continue;
-        }
-         
-        final String startRowArgKey = "--startrow=";
-        if (cmd.startsWith(startRowArgKey)) {
-          tableHash.startRow = Bytes.fromHex(cmd.substring(startRowArgKey.length()));
-          continue;
-        }
-        
-        final String stopRowArgKey = "--stoprow=";
-        if (cmd.startsWith(stopRowArgKey)) {
-          tableHash.stopRow = Bytes.fromHex(cmd.substring(stopRowArgKey.length()));
-          continue;
-        }
-        
-        final String startTimeArgKey = "--starttime=";
-        if (cmd.startsWith(startTimeArgKey)) {
-          tableHash.startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
-          continue;
-        }
-
-        final String endTimeArgKey = "--endtime=";
-        if (cmd.startsWith(endTimeArgKey)) {
-          tableHash.endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
-          continue;
-        }
-
-        final String scanBatchArgKey = "--scanbatch=";
-        if (cmd.startsWith(scanBatchArgKey)) {
-          tableHash.scanBatch = Integer.parseInt(cmd.substring(scanBatchArgKey.length()));
-          continue;
-        }
-
-        final String versionsArgKey = "--versions=";
-        if (cmd.startsWith(versionsArgKey)) {
-          tableHash.versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
-          continue;
-        }
-
-        final String familiesArgKey = "--families=";
-        if (cmd.startsWith(familiesArgKey)) {
-          tableHash.families = cmd.substring(familiesArgKey.length());
-          continue;
-        }
-
-        printUsage("Invalid argument '" + cmd + "'");
-        return false;
-      }
-      if ((tableHash.startTime != 0 || tableHash.endTime != 0)
-          && (tableHash.startTime >= tableHash.endTime)) {
-        printUsage("Invalid time range filter: starttime="
-            + tableHash.startTime + " >=  endtime=" + tableHash.endTime);
-        return false;
-      }
-      
-    } catch (Exception e) {
-      e.printStackTrace();
-      printUsage("Can't start because " + e.getMessage());
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * Main entry point.
-   */
-  public static void main(String[] args) throws Exception {
-    int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args);
-    System.exit(ret);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
-    if (!doCommandLine(otherArgs)) {
-      return 1;
-    }
-
-    Job job = createSubmittableJob(otherArgs);
-    writeTempManifestFile();
-    if (!job.waitForCompletion(true)) {
-      LOG.info("Map-reduce job failed!");
-      return 1;
-    }
-    completeManifest();
-    return 0;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
deleted file mode 100644
index 7103ef8..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Pass the given key and record as-is to the reduce phase.
- */
-@InterfaceAudience.Public
-public class IdentityTableMapper
-extends TableMapper<ImmutableBytesWritable, Result> {
-
-  /**
-   * Use this before submitting a TableMap job. It will appropriately set up
-   * the job.
-   *
-   * @param table  The table name.
-   * @param scan  The scan with the columns to scan.
-   * @param mapper  The mapper class.
-   * @param job  The job configuration.
-   * @throws IOException When setting up the job fails.
-   */
-  @SuppressWarnings("rawtypes")
-  public static void initJob(String table, Scan scan,
-    Class<? extends TableMapper> mapper, Job job) throws IOException {
-    TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
-      ImmutableBytesWritable.class, Result.class, job);
-  }
-
-  /**
-   * Pass the key, value to reduce.
-   *
-   * @param key  The current key.
-   * @param value  The current value.
-   * @param context  The current context.
-   * @throws IOException When writing the record fails.
-   * @throws InterruptedException When the job is aborted.
-   */
-  public void map(ImmutableBytesWritable key, Result value, Context context)
-  throws IOException, InterruptedException {
-    context.write(key, value);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
deleted file mode 100644
index 5289f46..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.io.Writable;
-
-/**
- * Convenience class that simply writes all values (which must be
- * {@link org.apache.hadoop.hbase.client.Put Put} or
- * {@link org.apache.hadoop.hbase.client.Delete Delete} instances)
- * passed to it out to the configured HBase table. This works in combination
- * with {@link TableOutputFormat} which actually does the writing to HBase.<p>
- *
- * Keys are passed along but ignored in TableOutputFormat.  However, they can
- * be used to control how your values will be divided up amongst the specified
- * number of reducers. <p>
- *
- * You can also use the {@link TableMapReduceUtil} class to set up the two
- * classes in one step:
- * <blockquote><code>
- * TableMapReduceUtil.initTableReducerJob("table", IdentityTableReducer.class, job);
- * </code></blockquote>
- * This will also set the proper {@link TableOutputFormat} which is given the
- * <code>table</code> parameter. The
- * {@link org.apache.hadoop.hbase.client.Put Put} or
- * {@link org.apache.hadoop.hbase.client.Delete Delete} define the
- * row and columns implicitly.
- */
-@InterfaceAudience.Public
-public class IdentityTableReducer
-extends TableReducer<Writable, Mutation, Writable> {
-
-  @SuppressWarnings("unused")
-  private static final Log LOG = LogFactory.getLog(IdentityTableReducer.class);
-
-  /**
-   * Writes each given record, consisting of the row key and the given values,
-   * to the configured {@link org.apache.hadoop.mapreduce.OutputFormat}. 
-   * It is emitting the row key and each {@link org.apache.hadoop.hbase.client.Put Put} 
-   * or {@link org.apache.hadoop.hbase.client.Delete Delete} as separate pairs.
-   *
-   * @param key  The current row key.
-   * @param values  The {@link org.apache.hadoop.hbase.client.Put Put} or
-   *   {@link org.apache.hadoop.hbase.client.Delete Delete} list for the given
-   *   row.
-   * @param context  The context of the reduce.
-   * @throws IOException When writing the record fails.
-   * @throws InterruptedException When the job gets interrupted.
-   */
-  @Override
-  public void reduce(Writable key, Iterable<Mutation> values, Context context)
-  throws IOException, InterruptedException {
-    for(Mutation putOrDelete : values) {
-      context.write(key, putOrDelete);
-    }
-  }
-}

[41/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

HBASE-18640 Move mapreduce out of hbase-server into separate module.

- Moves out o.a.h.h.{mapred, mapreduce} to new hbase-mapreduce module which depends
  on hbase-server because of classes like *Snapshot{Input,Output}Format.java, WALs, replication, etc
- hbase-backup depends on it for WALPlayer and MR job stuff
- A bunch of tools needed to be pulled into hbase-mapreduce becuase of their dependencies on MR.
  These are: CompactionTool, LoadTestTool, PerformanceEvaluation, ExportSnapshot
  This is better place of them than hbase-server. But ideal place would be in separate hbase-tools module.
- There were some tests in hbase-server which were digging into these tools for static util funtions or
  confs. Moved these to better/easily shared place. For eg. security related stuff to HBaseKerberosUtils.
- Note that hbase-mapreduce has secondPartExecution tests. On my machine they took like 20 min, so maybe
  more on apache jenkins. That's basically equal reduction of runtime of hbase-server tests, which is a
  big win!

Change-Id: Ieeb7235014717ca83ee5cb13b2a27fddfa6838e8


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/664b6be0
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/664b6be0
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/664b6be0

Branch: refs/heads/master
Commit: 664b6be0ef65218328847ea501fa88cb877e6759
Parents: 8d33949
Author: Apekshit Sharma <ap...@apache.org>
Authored: Sun Aug 20 14:34:16 2017 -0700
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Fri Aug 25 18:38:48 2017 -0700

----------------------------------------------------------------------
 hbase-assembly/pom.xml                          |    4 +
 .../src/main/assembly/hadoop-two-compat.xml     |    1 +
 hbase-assembly/src/main/assembly/src.xml        |    1 +
 hbase-backup/pom.xml                            |   10 +
 hbase-examples/pom.xml                          |    4 +
 hbase-it/pom.xml                                |   16 +
 .../hadoop/hbase/IntegrationTestIngest.java     |    5 +-
 .../IntegrationTestIngestStripeCompactions.java |    4 +-
 .../hbase/IntegrationTestIngestWithMOB.java     |    5 +-
 .../hbase/IntegrationTestRegionReplicaPerf.java |    3 +-
 .../mapreduce/IntegrationTestImportTsv.java     |    1 -
 .../test/IntegrationTestLoadAndVerify.java      |    2 +-
 hbase-mapreduce/pom.xml                         |  316 +++
 .../org/apache/hadoop/hbase/mapred/Driver.java  |   52 +
 .../hadoop/hbase/mapred/GroupingTableMap.java   |  157 ++
 .../hadoop/hbase/mapred/HRegionPartitioner.java |   95 +
 .../hadoop/hbase/mapred/IdentityTableMap.java   |   76 +
 .../hbase/mapred/IdentityTableReduce.java       |   61 +
 .../mapred/MultiTableSnapshotInputFormat.java   |  128 +
 .../apache/hadoop/hbase/mapred/RowCounter.java  |  121 +
 .../hadoop/hbase/mapred/TableInputFormat.java   |   90 +
 .../hbase/mapred/TableInputFormatBase.java      |  313 +++
 .../apache/hadoop/hbase/mapred/TableMap.java    |   38 +
 .../hadoop/hbase/mapred/TableMapReduceUtil.java |  376 +++
 .../hadoop/hbase/mapred/TableOutputFormat.java  |  134 +
 .../hadoop/hbase/mapred/TableRecordReader.java  |  139 +
 .../hbase/mapred/TableRecordReaderImpl.java     |  259 ++
 .../apache/hadoop/hbase/mapred/TableReduce.java |   38 +
 .../hbase/mapred/TableSnapshotInputFormat.java  |  166 ++
 .../apache/hadoop/hbase/mapred/TableSplit.java  |  154 +
 .../hadoop/hbase/mapred/package-info.java       |   26 +
 .../hadoop/hbase/mapreduce/CellCounter.java     |  333 +++
 .../hadoop/hbase/mapreduce/CellCreator.java     |  134 +
 .../hadoop/hbase/mapreduce/CopyTable.java       |  386 +++
 .../DefaultVisibilityExpressionResolver.java    |  144 +
 .../apache/hadoop/hbase/mapreduce/Driver.java   |   64 +
 .../apache/hadoop/hbase/mapreduce/Export.java   |  197 ++
 .../hbase/mapreduce/GroupingTableMapper.java    |  177 ++
 .../hbase/mapreduce/HFileInputFormat.java       |  174 ++
 .../hbase/mapreduce/HFileOutputFormat2.java     |  902 ++++++
 .../hbase/mapreduce/HRegionPartitioner.java     |  140 +
 .../hadoop/hbase/mapreduce/HashTable.java       |  747 +++++
 .../hbase/mapreduce/IdentityTableMapper.java    |   67 +
 .../hbase/mapreduce/IdentityTableReducer.java   |   79 +
 .../apache/hadoop/hbase/mapreduce/Import.java   |  780 ++++++
 .../hadoop/hbase/mapreduce/ImportTsv.java       |  793 ++++++
 .../hadoop/hbase/mapreduce/JarFinder.java       |  186 ++
 .../hbase/mapreduce/KeyValueSerialization.java  |   88 +
 .../hbase/mapreduce/KeyValueSortReducer.java    |   57 +
 .../mapreduce/MultiTableHFileOutputFormat.java  |  122 +
 .../hbase/mapreduce/MultiTableInputFormat.java  |  104 +
 .../mapreduce/MultiTableInputFormatBase.java    |  296 ++
 .../hbase/mapreduce/MultiTableOutputFormat.java |  176 ++
 .../MultiTableSnapshotInputFormat.java          |  106 +
 .../MultiTableSnapshotInputFormatImpl.java      |  252 ++
 .../mapreduce/MultithreadedTableMapper.java     |  301 ++
 .../hbase/mapreduce/MutationSerialization.java  |   98 +
 .../hadoop/hbase/mapreduce/PutCombiner.java     |   98 +
 .../hadoop/hbase/mapreduce/PutSortReducer.java  |  147 +
 .../hbase/mapreduce/RegionSizeCalculator.java   |  127 +
 .../hbase/mapreduce/ResultSerialization.java    |  158 ++
 .../hadoop/hbase/mapreduce/RowCounter.java      |  265 ++
 .../mapreduce/SimpleTotalOrderPartitioner.java  |  143 +
 .../hadoop/hbase/mapreduce/SyncTable.java       |  786 ++++++
 .../hbase/mapreduce/TableInputFormat.java       |  294 ++
 .../hbase/mapreduce/TableInputFormatBase.java   |  652 +++++
 .../hbase/mapreduce/TableMapReduceUtil.java     | 1027 +++++++
 .../hadoop/hbase/mapreduce/TableMapper.java     |   38 +
 .../hbase/mapreduce/TableOutputCommitter.java   |   67 +
 .../hbase/mapreduce/TableOutputFormat.java      |  239 ++
 .../hbase/mapreduce/TableRecordReader.java      |  147 +
 .../hbase/mapreduce/TableRecordReaderImpl.java  |  315 +++
 .../hadoop/hbase/mapreduce/TableReducer.java    |   45 +
 .../mapreduce/TableSnapshotInputFormat.java     |  209 ++
 .../mapreduce/TableSnapshotInputFormatImpl.java |  410 +++
 .../hadoop/hbase/mapreduce/TableSplit.java      |  395 +++
 .../hadoop/hbase/mapreduce/TextSortReducer.java |  213 ++
 .../hbase/mapreduce/TsvImporterMapper.java      |  232 ++
 .../hbase/mapreduce/TsvImporterTextMapper.java  |  128 +
 .../mapreduce/VisibilityExpressionResolver.java |   45 +
 .../hadoop/hbase/mapreduce/WALInputFormat.java  |  344 +++
 .../hadoop/hbase/mapreduce/WALPlayer.java       |  384 +++
 .../hadoop/hbase/mapreduce/package-info.java    |   26 +
 .../replication/VerifyReplication.java          |  700 +++++
 .../hbase/regionserver/CompactionTool.java      |  470 ++++
 .../hadoop/hbase/snapshot/ExportSnapshot.java   | 1111 ++++++++
 .../util/MapreduceDependencyClasspathTool.java  |   73 +
 .../hadoop/hbase/PerformanceEvaluation.java     | 2627 ++++++++++++++++++
 .../hadoop/hbase/ScanPerformanceEvaluation.java |  406 +++
 .../hadoop/hbase/TestPerformanceEvaluation.java |  218 ++
 .../apache/hadoop/hbase/mapred/TestDriver.java  |   41 +
 .../hbase/mapred/TestGroupingTableMap.java      |  181 ++
 .../hbase/mapred/TestIdentityTableMap.java      |   64 +
 .../TestMultiTableSnapshotInputFormat.java      |  135 +
 .../hadoop/hbase/mapred/TestRowCounter.java     |  163 ++
 .../hadoop/hbase/mapred/TestSplitTable.java     |  116 +
 .../hbase/mapred/TestTableInputFormat.java      |  460 +++
 .../hadoop/hbase/mapred/TestTableMapReduce.java |  103 +
 .../hbase/mapred/TestTableMapReduceUtil.java    |  272 ++
 .../TestTableOutputFormatConnectionExhaust.java |  104 +
 .../mapred/TestTableSnapshotInputFormat.java    |  271 ++
 ...opSecurityEnabledUserProviderForTesting.java |   41 +
 .../MultiTableInputFormatTestBase.java          |  277 ++
 .../hadoop/hbase/mapreduce/NMapInputFormat.java |  134 +
 .../TableSnapshotInputFormatTestBase.java       |  231 ++
 .../hadoop/hbase/mapreduce/TestCellCounter.java |  376 +++
 .../hadoop/hbase/mapreduce/TestCopyTable.java   |  262 ++
 .../mapreduce/TestGroupingTableMapper.java      |   68 +
 .../hbase/mapreduce/TestHFileOutputFormat2.java | 1496 ++++++++++
 .../hbase/mapreduce/TestHRegionPartitioner.java |   71 +
 .../hadoop/hbase/mapreduce/TestHashTable.java   |  194 ++
 .../hbase/mapreduce/TestImportExport.java       |  726 +++++
 .../TestImportTSVWithOperationAttributes.java   |  266 ++
 .../hbase/mapreduce/TestImportTSVWithTTLs.java  |  175 ++
 .../TestImportTSVWithVisibilityLabels.java      |  495 ++++
 .../hadoop/hbase/mapreduce/TestImportTsv.java   |  571 ++++
 .../hbase/mapreduce/TestImportTsvParser.java    |  314 +++
 .../hadoop/hbase/mapreduce/TestJarFinder.java   |  132 +
 .../TestLoadIncrementalHFilesSplitRecovery.java |  669 +++++
 .../mapreduce/TestMultiTableInputFormat.java    |   49 +
 .../TestMultiTableSnapshotInputFormat.java      |   92 +
 .../TestMultiTableSnapshotInputFormatImpl.java  |  186 ++
 .../mapreduce/TestMultithreadedTableMapper.java |  264 ++
 .../mapreduce/TestRegionSizeCalculator.java     |  160 ++
 .../hadoop/hbase/mapreduce/TestRowCounter.java  |  400 +++
 .../TestSecureLoadIncrementalHFiles.java        |   70 +
 ...ecureLoadIncrementalHFilesSplitRecovery.java |   69 +
 .../TestSimpleTotalOrderPartitioner.java        |   81 +
 .../hadoop/hbase/mapreduce/TestSyncTable.java   |  339 +++
 .../hbase/mapreduce/TestTableInputFormat.java   |  481 ++++
 .../mapreduce/TestTableInputFormatBase.java     |   53 +
 .../mapreduce/TestTableInputFormatScan1.java    |  200 ++
 .../mapreduce/TestTableInputFormatScan2.java    |  118 +
 .../mapreduce/TestTableInputFormatScanBase.java |  287 ++
 .../hbase/mapreduce/TestTableMapReduce.java     |  174 ++
 .../hbase/mapreduce/TestTableMapReduceBase.java |  233 ++
 .../hbase/mapreduce/TestTableMapReduceUtil.java |   99 +
 .../mapreduce/TestTableSnapshotInputFormat.java |  373 +++
 .../hadoop/hbase/mapreduce/TestTableSplit.java  |  129 +
 .../hbase/mapreduce/TestTimeRangeMapRed.java    |  211 ++
 .../hadoop/hbase/mapreduce/TestWALPlayer.java   |  231 ++
 .../hbase/mapreduce/TestWALRecordReader.java    |  276 ++
 .../mapreduce/TsvImporterCustomTestMapper.java  |   80 +
 .../TsvImporterCustomTestMapperForOprAttr.java  |   57 +
 .../replication/TestReplicationSmallTests.java  | 1059 +++++++
 .../hbase/snapshot/TestExportSnapshot.java      |  381 +++
 .../snapshot/TestExportSnapshotHelpers.java     |   91 +
 .../snapshot/TestExportSnapshotNoCluster.java   |  112 +
 .../hbase/snapshot/TestMobExportSnapshot.java   |   65 +
 .../snapshot/TestMobSecureExportSnapshot.java   |   59 +
 .../snapshot/TestSecureExportSnapshot.java      |   64 +
 .../apache/hadoop/hbase/util/LoadTestTool.java  |  915 ++++++
 .../src/test/resources/hbase-site.xml           |  161 ++
 .../src/test/resources/hbase-site2.xml          |  146 +
 .../src/test/resources/hdfs-site.xml            |   32 +
 .../src/test/resources/log4j.properties         |   68 +
 .../src/test/resources/mapred-queues.xml        |   75 +
 .../src/test/resources/mapred-site.xml          |   34 +
 .../PerformanceEvaluation_Counter.properties    |   28 +
 .../hbase/mapreduce/exportedTableIn94Format     |  Bin 0 -> 374 bytes
 hbase-rest/pom.xml                              |   10 +
 .../hbase/rest/PerformanceEvaluation.java       |    6 +-
 .../hbase/client/TableSnapshotScanner.java      |    4 +-
 .../org/apache/hadoop/hbase/mapred/Driver.java  |   52 -
 .../hadoop/hbase/mapred/GroupingTableMap.java   |  157 --
 .../hadoop/hbase/mapred/HRegionPartitioner.java |   96 -
 .../hadoop/hbase/mapred/IdentityTableMap.java   |   76 -
 .../hbase/mapred/IdentityTableReduce.java       |   61 -
 .../mapred/MultiTableSnapshotInputFormat.java   |  128 -
 .../apache/hadoop/hbase/mapred/RowCounter.java  |  121 -
 .../hadoop/hbase/mapred/TableInputFormat.java   |   90 -
 .../hbase/mapred/TableInputFormatBase.java      |  313 ---
 .../apache/hadoop/hbase/mapred/TableMap.java    |   38 -
 .../hadoop/hbase/mapred/TableMapReduceUtil.java |  376 ---
 .../hadoop/hbase/mapred/TableOutputFormat.java  |  134 -
 .../hadoop/hbase/mapred/TableRecordReader.java  |  139 -
 .../hbase/mapred/TableRecordReaderImpl.java     |  259 --
 .../apache/hadoop/hbase/mapred/TableReduce.java |   38 -
 .../hbase/mapred/TableSnapshotInputFormat.java  |  166 --
 .../apache/hadoop/hbase/mapred/TableSplit.java  |  154 -
 .../hadoop/hbase/mapred/package-info.java       |   26 -
 .../hadoop/hbase/mapreduce/CellCounter.java     |  333 ---
 .../hadoop/hbase/mapreduce/CellCreator.java     |  134 -
 .../hadoop/hbase/mapreduce/CopyTable.java       |  386 ---
 .../DefaultVisibilityExpressionResolver.java    |  144 -
 .../apache/hadoop/hbase/mapreduce/Driver.java   |   64 -
 .../apache/hadoop/hbase/mapreduce/Export.java   |  197 --
 .../hbase/mapreduce/GroupingTableMapper.java    |  177 --
 .../hbase/mapreduce/HFileInputFormat.java       |  174 --
 .../hbase/mapreduce/HFileOutputFormat2.java     |  902 ------
 .../hbase/mapreduce/HRegionPartitioner.java     |  140 -
 .../hadoop/hbase/mapreduce/HashTable.java       |  747 -----
 .../hbase/mapreduce/IdentityTableMapper.java    |   67 -
 .../hbase/mapreduce/IdentityTableReducer.java   |   79 -
 .../apache/hadoop/hbase/mapreduce/Import.java   |  780 ------
 .../hadoop/hbase/mapreduce/ImportTsv.java       |  793 ------
 .../hadoop/hbase/mapreduce/JarFinder.java       |  186 --
 .../hbase/mapreduce/KeyValueSerialization.java  |   88 -
 .../hbase/mapreduce/KeyValueSortReducer.java    |   56 -
 .../mapreduce/MultiTableHFileOutputFormat.java  |  122 -
 .../hbase/mapreduce/MultiTableInputFormat.java  |  104 -
 .../mapreduce/MultiTableInputFormatBase.java    |  297 --
 .../hbase/mapreduce/MultiTableOutputFormat.java |  176 --
 .../MultiTableSnapshotInputFormat.java          |  106 -
 .../MultiTableSnapshotInputFormatImpl.java      |  252 --
 .../mapreduce/MultithreadedTableMapper.java     |  301 --
 .../hbase/mapreduce/MutationSerialization.java  |   98 -
 .../hadoop/hbase/mapreduce/PutCombiner.java     |   98 -
 .../hadoop/hbase/mapreduce/PutSortReducer.java  |  147 -
 .../hbase/mapreduce/ResultSerialization.java    |  158 --
 .../hadoop/hbase/mapreduce/RowCounter.java      |  265 --
 .../mapreduce/SimpleTotalOrderPartitioner.java  |  143 -
 .../hadoop/hbase/mapreduce/SyncTable.java       |  786 ------
 .../hbase/mapreduce/TableInputFormat.java       |  294 --
 .../hbase/mapreduce/TableInputFormatBase.java   |  653 -----
 .../hbase/mapreduce/TableMapReduceUtil.java     | 1027 -------
 .../hadoop/hbase/mapreduce/TableMapper.java     |   38 -
 .../hbase/mapreduce/TableOutputCommitter.java   |   67 -
 .../hbase/mapreduce/TableOutputFormat.java      |  239 --
 .../hbase/mapreduce/TableRecordReader.java      |  147 -
 .../hbase/mapreduce/TableRecordReaderImpl.java  |  315 ---
 .../hadoop/hbase/mapreduce/TableReducer.java    |   45 -
 .../mapreduce/TableSnapshotInputFormat.java     |  210 --
 .../mapreduce/TableSnapshotInputFormatImpl.java |  412 ---
 .../hadoop/hbase/mapreduce/TableSplit.java      |  395 ---
 .../hadoop/hbase/mapreduce/TextSortReducer.java |  213 --
 .../hbase/mapreduce/TsvImporterMapper.java      |  232 --
 .../hbase/mapreduce/TsvImporterTextMapper.java  |  128 -
 .../mapreduce/VisibilityExpressionResolver.java |   45 -
 .../hadoop/hbase/mapreduce/WALInputFormat.java  |  344 ---
 .../hadoop/hbase/mapreduce/WALPlayer.java       |  384 ---
 .../hadoop/hbase/mapreduce/package-info.java    |   26 -
 .../replication/VerifyReplication.java          |  700 -----
 .../hbase/regionserver/CompactionTool.java      |  470 ----
 .../hadoop/hbase/snapshot/ExportSnapshot.java   | 1111 --------
 .../util/MapreduceDependencyClasspathTool.java  |   73 -
 .../hadoop/hbase/util/RegionSizeCalculator.java |  146 -
 .../hadoop/hbase/PerformanceEvaluation.java     | 2626 -----------------
 .../hadoop/hbase/ScanPerformanceEvaluation.java |  406 ---
 .../hadoop/hbase/TestPerformanceEvaluation.java |  218 --
 .../hbase/client/TestTableSnapshotScanner.java  |   18 +-
 .../apache/hadoop/hbase/mapred/TestDriver.java  |   41 -
 .../hbase/mapred/TestGroupingTableMap.java      |  181 --
 .../hbase/mapred/TestIdentityTableMap.java      |   64 -
 .../TestMultiTableSnapshotInputFormat.java      |  135 -
 .../hadoop/hbase/mapred/TestRowCounter.java     |  163 --
 .../hadoop/hbase/mapred/TestSplitTable.java     |  116 -
 .../hbase/mapred/TestTableInputFormat.java      |  461 ---
 .../hadoop/hbase/mapred/TestTableMapReduce.java |  103 -
 .../hbase/mapred/TestTableMapReduceUtil.java    |  272 --
 .../TestTableOutputFormatConnectionExhaust.java |  104 -
 .../mapred/TestTableSnapshotInputFormat.java    |  271 --
 ...opSecurityEnabledUserProviderForTesting.java |   41 -
 .../MultiTableInputFormatTestBase.java          |  277 --
 .../hadoop/hbase/mapreduce/NMapInputFormat.java |  134 -
 .../TableSnapshotInputFormatTestBase.java       |  231 --
 .../hadoop/hbase/mapreduce/TestCellCounter.java |  376 ---
 .../hadoop/hbase/mapreduce/TestCopyTable.java   |  262 --
 .../mapreduce/TestGroupingTableMapper.java      |   68 -
 .../hbase/mapreduce/TestHFileOutputFormat2.java | 1495 ----------
 .../hbase/mapreduce/TestHRegionPartitioner.java |   71 -
 .../hadoop/hbase/mapreduce/TestHashTable.java   |  194 --
 .../hbase/mapreduce/TestImportExport.java       |  727 -----
 .../TestImportTSVWithOperationAttributes.java   |  266 --
 .../hbase/mapreduce/TestImportTSVWithTTLs.java  |  175 --
 .../TestImportTSVWithVisibilityLabels.java      |  495 ----
 .../hadoop/hbase/mapreduce/TestImportTsv.java   |  571 ----
 .../hbase/mapreduce/TestImportTsvParser.java    |  314 ---
 .../hadoop/hbase/mapreduce/TestJarFinder.java   |  132 -
 .../mapreduce/TestLoadIncrementalHFiles.java    |    1 -
 .../TestLoadIncrementalHFilesSplitRecovery.java |  669 -----
 .../mapreduce/TestMultiTableInputFormat.java    |   49 -
 .../TestMultiTableSnapshotInputFormat.java      |   92 -
 .../TestMultiTableSnapshotInputFormatImpl.java  |  186 --
 .../mapreduce/TestMultithreadedTableMapper.java |  264 --
 .../hadoop/hbase/mapreduce/TestRowCounter.java  |  400 ---
 .../TestSecureLoadIncrementalHFiles.java        |   70 -
 ...ecureLoadIncrementalHFilesSplitRecovery.java |   69 -
 .../TestSimpleTotalOrderPartitioner.java        |   81 -
 .../hadoop/hbase/mapreduce/TestSyncTable.java   |  339 ---
 .../hbase/mapreduce/TestTableInputFormat.java   |  481 ----
 .../mapreduce/TestTableInputFormatBase.java     |   53 -
 .../mapreduce/TestTableInputFormatScan1.java    |  200 --
 .../mapreduce/TestTableInputFormatScan2.java    |  118 -
 .../mapreduce/TestTableInputFormatScanBase.java |  287 --
 .../hbase/mapreduce/TestTableMapReduce.java     |  174 --
 .../hbase/mapreduce/TestTableMapReduceBase.java |  233 --
 .../hbase/mapreduce/TestTableMapReduceUtil.java |   99 -
 .../mapreduce/TestTableSnapshotInputFormat.java |  384 ---
 .../hadoop/hbase/mapreduce/TestTableSplit.java  |  129 -
 .../hbase/mapreduce/TestTimeRangeMapRed.java    |  211 --
 .../hadoop/hbase/mapreduce/TestWALPlayer.java   |  231 --
 .../hbase/mapreduce/TestWALRecordReader.java    |  276 --
 .../mapreduce/TsvImporterCustomTestMapper.java  |   80 -
 .../TsvImporterCustomTestMapperForOprAttr.java  |   58 -
 .../hbase/namespace/TestNamespaceAuditor.java   |    8 +-
 .../regionserver/TestHRegionFileSystem.java     |    7 +-
 .../replication/TestReplicationSmallTests.java  | 1059 -------
 .../hbase/security/HBaseKerberosUtils.java      |   26 +-
 .../hbase/snapshot/TestExportSnapshot.java      |  381 ---
 .../snapshot/TestExportSnapshotHelpers.java     |   91 -
 .../snapshot/TestExportSnapshotNoCluster.java   |  112 -
 .../hbase/snapshot/TestMobExportSnapshot.java   |   65 -
 .../snapshot/TestMobSecureExportSnapshot.java   |   59 -
 .../snapshot/TestSecureExportSnapshot.java      |   64 -
 .../apache/hadoop/hbase/util/HFileTestUtil.java |   14 +
 .../util/LoadTestDataGeneratorWithTags.java     |    3 +-
 .../apache/hadoop/hbase/util/LoadTestTool.java  |  968 -------
 .../hadoop/hbase/util/MultiThreadedAction.java  |    2 +-
 .../hbase/util/MultiThreadedReaderWithACL.java  |    3 +-
 .../hbase/util/MultiThreadedUpdaterWithACL.java |    3 +-
 .../hadoop/hbase/util/RestartMetaTest.java      |    8 +-
 .../hbase/util/TestRegionSizeCalculator.java    |  159 --
 .../hbase/util/test/LoadTestDataGenerator.java  |   23 +
 .../PerformanceEvaluation_Counter.properties    |   28 -
 .../hbase/mapreduce/exportedTableIn94Format     |  Bin 374 -> 0 bytes
 hbase-spark/pom.xml                             |    4 +
 .../hbase/spark/TestJavaHBaseContext.java       |    1 -
 pom.xml                                         |   13 +
 src/main/asciidoc/_chapters/ops_mgt.adoc        |   32 +-
 320 files changed, 38781 insertions(+), 37899 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/pom.xml b/hbase-assembly/pom.xml
index c9488ca..18b63b5 100644
--- a/hbase-assembly/pom.xml
+++ b/hbase-assembly/pom.xml
@@ -195,6 +195,10 @@
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-server</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
     <!-- To dump tools in hbase-procedure into cached_classpath.txt. -->
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
index 1592a3b..a66237b 100644
--- a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
+++ b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
@@ -50,6 +50,7 @@
         <include>org.apache.hbase:hbase-thrift</include>
         <include>org.apache.hbase:hbase-external-blockcache</include>
         <include>org.apache.hbase:hbase-backup</include>
+        <include>org.apache.hbase:hbase-mapreduce</include>
       </includes>
       <!-- Binaries for the dependencies also go in the hbase-jars directory -->
       <binaries>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/src/main/assembly/src.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/src.xml b/hbase-assembly/src/main/assembly/src.xml
index b00f05f..a0b700c 100644
--- a/hbase-assembly/src/main/assembly/src.xml
+++ b/hbase-assembly/src/main/assembly/src.xml
@@ -62,6 +62,7 @@
         <include>org.apache.hbase:hbase-testing-util</include>
         <include>org.apache.hbase:hbase-thrift</include>
         <include>org.apache.hbase:hbase-backup</include>
+        <include>org.apache.hbase:hbase-mapreduce</include>
       </includes>
       <!-- Include all the sources in the top directory -->
       <sources>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-backup/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-backup/pom.xml b/hbase-backup/pom.xml
index 7c7d8b5..9b3aac6 100644
--- a/hbase-backup/pom.xml
+++ b/hbase-backup/pom.xml
@@ -109,6 +109,16 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-common</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-examples/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-examples/pom.xml b/hbase-examples/pom.xml
index 7a6a51a..422b28e 100644
--- a/hbase-examples/pom.xml
+++ b/hbase-examples/pom.xml
@@ -146,6 +146,10 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-endpoint</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-it/pom.xml b/hbase-it/pom.xml
index b9b12a4..5dc4d4f 100644
--- a/hbase-it/pom.xml
+++ b/hbase-it/pom.xml
@@ -200,6 +200,22 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-rsgroup</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
index 76be4e8a..9bc3131 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
 import org.apache.hadoop.hbase.util.LoadTestTool;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.util.StringUtils;
@@ -70,7 +71,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
   protected String[] LOAD_TEST_TOOL_INIT_ARGS = {
       LoadTestTool.OPT_COLUMN_FAMILIES,
       LoadTestTool.OPT_COMPRESSION,
-      LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+      HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
       LoadTestTool.OPT_INMEMORY,
       LoadTestTool.OPT_ENCRYPTION,
       LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
@@ -138,7 +139,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
     String familiesString = getConf().get(
       String.format("%s.%s", clazz, LoadTestTool.OPT_COLUMN_FAMILIES));
     if (familiesString == null) {
-      for (byte[] family : LoadTestTool.DEFAULT_COLUMN_FAMILIES) {
+      for (byte[] family : HFileTestUtil.DEFAULT_COLUMN_FAMILIES) {
         families.add(Bytes.toString(family));
       }
     } else {

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
index d64fbb0..fc79abb 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.regionserver.HStore;
 import org.apache.hadoop.hbase.regionserver.StoreEngine;
 import org.apache.hadoop.hbase.regionserver.StripeStoreEngine;
 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
-import org.apache.hadoop.hbase.util.LoadTestTool;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
 import org.apache.hadoop.util.ToolRunner;
 import org.junit.experimental.categories.Category;
 
@@ -41,7 +41,7 @@ public class IntegrationTestIngestStripeCompactions extends IntegrationTestInges
     HTableDescriptor htd = new HTableDescriptor(getTablename());
     htd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY, StripeStoreEngine.class.getName());
     htd.setConfiguration(HStore.BLOCKING_STOREFILES_KEY, "100");
-    HColumnDescriptor hcd = new HColumnDescriptor(LoadTestTool.DEFAULT_COLUMN_FAMILY);
+    HColumnDescriptor hcd = new HColumnDescriptor(HFileTestUtil.DEFAULT_COLUMN_FAMILY);
     HBaseTestingUtility.createPreSplitLoadTestTable(util.getConfiguration(), htd, hcd);
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
index 5bbb12b..010e4b9 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
 import org.apache.hadoop.hbase.util.LoadTestDataGeneratorWithMOB;
 import org.apache.hadoop.hbase.util.LoadTestTool;
 import org.apache.hadoop.util.ToolRunner;
@@ -44,7 +45,7 @@ import org.junit.experimental.categories.Category;
 public class IntegrationTestIngestWithMOB extends IntegrationTestIngest {
   private static final char COLON = ':';
 
-  private byte[] mobColumnFamily = LoadTestTool.DEFAULT_COLUMN_FAMILY;
+  private byte[] mobColumnFamily = HFileTestUtil.DEFAULT_COLUMN_FAMILY;
   public static final String THRESHOLD = "threshold";
   public static final String MIN_MOB_DATA_SIZE = "minMobDataSize";
   public static final String MAX_MOB_DATA_SIZE = "maxMobDataSize";
@@ -56,7 +57,7 @@ public class IntegrationTestIngestWithMOB extends IntegrationTestIngest {
   //similar to LOAD_TEST_TOOL_INIT_ARGS except OPT_IN_MEMORY is removed
   protected String[] LOAD_TEST_TOOL_MOB_INIT_ARGS = {
       LoadTestTool.OPT_COMPRESSION,
-      LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+      HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
       LoadTestTool.OPT_ENCRYPTION,
       LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
       LoadTestTool.OPT_REGION_REPLICATION,

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
index d649bdb..3135bd0 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
@@ -72,6 +72,7 @@ public class IntegrationTestRegionReplicaPerf extends IntegrationTestBase {
   private static final String PRIMARY_TIMEOUT_DEFAULT = "" + 10 * 1000; // 10 ms
   private static final String NUM_RS_KEY = "numRs";
   private static final String NUM_RS_DEFAULT = "" + 3;
+  public static final String FAMILY_NAME = "info";
 
   /** Extract a descriptive statistic from a {@link com.codahale.metrics.Histogram}. */
   private enum Stat {
@@ -236,7 +237,7 @@ public class IntegrationTestRegionReplicaPerf extends IntegrationTestBase {
 
   @Override
   protected Set<String> getColumnFamilies() {
-    return Sets.newHashSet(Bytes.toString(PerformanceEvaluation.FAMILY_NAME));
+    return Sets.newHashSet(FAMILY_NAME);
   }
 
   /** Compute the mean of the given {@code stat} from a timing results. */

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
index 9d04bf9..fb7acf4 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
@@ -29,7 +29,6 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
-import java.util.UUID;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
index f042521..b9d16a1 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.IntegrationTestBase;
 import org.apache.hadoop.hbase.IntegrationTestingUtility;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
@@ -55,7 +56,6 @@ import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.ScannerCallable;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
 import org.apache.hadoop.hbase.mapreduce.TableMapper;
 import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
 import org.apache.hadoop.hbase.util.AbstractHBaseTool;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml
new file mode 100644
index 0000000..f75c9f9
--- /dev/null
+++ b/hbase-mapreduce/pom.xml
@@ -0,0 +1,316 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <!--
+  /**
+   * Licensed to the Apache Software Foundation (ASF) under one
+   * or more contributor license agreements.  See the NOTICE file
+   * distributed with this work for additional information
+   * regarding copyright ownership.  The ASF licenses this file
+   * to you under the Apache License, Version 2.0 (the
+   * "License"); you may not use this file except in compliance
+   * with the License.  You may obtain a copy of the License at
+   *
+   *     http://www.apache.org/licenses/LICENSE-2.0
+   *
+   * Unless required by applicable law or agreed to in writing, software
+   * distributed under the License is distributed on an "AS IS" BASIS,
+   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   * See the License for the specific language governing permissions and
+   * limitations under the License.
+   */
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>hbase-build-configuration</artifactId>
+    <groupId>org.apache.hbase</groupId>
+    <version>3.0.0-SNAPSHOT</version>
+    <relativePath>../hbase-build-configuration</relativePath>
+  </parent>
+  <artifactId>hbase-mapreduce</artifactId>
+  <name>Apache HBase - MapReduce</name>
+  <description>
+    This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which
+    are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs.
+    It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool,
+    ExportSnapshot, WALPlayer, etc
+  </description>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <!--Make it so assembly:single does nothing in here-->
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <skipAssembly>true</skipAssembly>
+        </configuration>
+      </plugin>
+      <!-- Testing plugins -->
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.hbase.ServerResourceCheckerJUnitListener</value>
+            </property>
+          </properties>
+          <systemPropertyVariables>
+            <org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>org.apache.hadoop.hbase.shaded.</org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>
+          </systemPropertyVariables>
+        </configuration>
+      </plugin>
+      <!-- Make a jar and put the sources in the jar -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+    </plugins>
+    <pluginManagement>
+      <plugins>
+        <!--This plugin's configuration is used to store Eclipse m2e settings
+             only. It has no influence on the Maven build itself.-->
+        <plugin>
+          <groupId>org.eclipse.m2e</groupId>
+          <artifactId>lifecycle-mapping</artifactId>
+          <version>1.0.0</version>
+          <configuration>
+            <lifecycleMappingMetadata>
+              <pluginExecutions>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <versionRange>[3.2,)</versionRange>
+                    <goals>
+                      <goal>compile</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore></ignore>
+                  </action>
+                </pluginExecution>
+              </pluginExecutions>
+            </lifecycleMappingMetadata>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <dependencies>
+    <!-- Intra-project dependencies -->
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-annotations</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>jdk.tools</groupId>
+          <artifactId>jdk.tools</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-annotations</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop-compat</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop2-compat</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <type>test-jar</type>
+    </dependency>
+    <!-- General dependencies -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <profiles>
+    <!-- Skip the tests in this module -->
+    <profile>
+      <id>skipMapReduceTests</id>
+      <activation>
+        <property>
+          <name>skipMapReduceTests</name>
+        </property>
+      </activation>
+      <properties>
+        <surefire.skipFirstPart>true</surefire.skipFirstPart>
+        <surefire.skipSecondPart>true</surefire.skipSecondPart>
+      </properties>
+    </profile>
+    <!-- profile against Hadoop 2.x: This is the default. -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+            <!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
+            <!--h2--><name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+           <groupId>com.github.stephenc.findbugs</groupId>
+           <artifactId>findbugs-annotations</artifactId>
+           <optional>true</optional>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <exclusions>
+            <exclusion>
+              <groupId>net.java.dev.jets3t</groupId>
+              <artifactId>jets3t</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>javax.servlet.jsp</groupId>
+              <artifactId>jsp-api</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>org.mortbay.jetty</groupId>
+              <artifactId>jetty</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>com.sun.jersey</groupId>
+              <artifactId>jersey-server</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>com.sun.jersey</groupId>
+              <artifactId>jersey-core</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>com.sun.jersey</groupId>
+              <artifactId>jersey-json</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>javax.servlet</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>tomcat</groupId>
+              <artifactId>jasper-compiler</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>tomcat</groupId>
+              <artifactId>jasper-runtime</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>com.google.code.findbugs</groupId>
+              <artifactId>jsr305</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <exclusions>
+            <exclusion>
+              <groupId>com.google.guava</groupId>
+              <artifactId>guava</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-minicluster</artifactId>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+
+    </profile>
+
+    <!--
+      profile for building against Hadoop 3.0.x. Activate using:
+       mvn -Dhadoop.profile=3.0
+    -->
+    <profile>
+      <id>hadoop-3.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>3.0</value>
+        </property>
+      </activation>
+      <properties>
+        <hadoop.version>${hadoop-three.version}</hadoop.version>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-minicluster</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+</project>

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
new file mode 100644
index 0000000..618c14a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.util.ProgramDriver;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing name of job
+ * to this main.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+@InterfaceStability.Stable
+public class Driver {
+
+  private static ProgramDriver pgd = new ProgramDriver();
+
+  @VisibleForTesting
+  static void setProgramDriver(ProgramDriver pgd0) {
+    pgd = pgd0;
+  }
+
+  /**
+   * @param args
+   * @throws Throwable
+   */
+  public static void main(String[] args) throws Throwable {
+    pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase table");
+    ProgramDriver.class.getMethod("driver", new Class[] { String[].class })
+        .invoke(pgd, new Object[] { args });
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
new file mode 100644
index 0000000..a534224
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
@@ -0,0 +1,157 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+
+/**
+ * Extract grouping columns from input record
+ */
+@InterfaceAudience.Public
+public class GroupingTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable,Result> {
+
+  /**
+   * JobConf parameter to specify the columns used to produce the key passed to
+   * collect from the map phase
+   */
+  public static final String GROUP_COLUMNS =
+    "hbase.mapred.groupingtablemap.columns";
+
+  protected byte [][] columns;
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up the
+   * JobConf.
+   *
+   * @param table table to be processed
+   * @param columns space separated list of columns to fetch
+   * @param groupColumns space separated list of columns used to form the key
+   * used in collect
+   * @param mapper map class
+   * @param job job configuration object
+   */
+  @SuppressWarnings("unchecked")
+  public static void initJob(String table, String columns, String groupColumns,
+    Class<? extends TableMap> mapper, JobConf job) {
+
+    TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+        ImmutableBytesWritable.class, Result.class, job);
+    job.set(GROUP_COLUMNS, groupColumns);
+  }
+
+  @Override
+  public void configure(JobConf job) {
+    super.configure(job);
+    String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
+    columns = new byte[cols.length][];
+    for(int i = 0; i < cols.length; i++) {
+      columns[i] = Bytes.toBytes(cols[i]);
+    }
+  }
+
+  /**
+   * Extract the grouping columns from value to construct a new key.
+   *
+   * Pass the new key and value to reduce.
+   * If any of the grouping columns are not found in the value, the record is skipped.
+   * @param key
+   * @param value
+   * @param output
+   * @param reporter
+   * @throws IOException
+   */
+  public void map(ImmutableBytesWritable key, Result value,
+      OutputCollector<ImmutableBytesWritable,Result> output,
+      Reporter reporter) throws IOException {
+
+    byte[][] keyVals = extractKeyValues(value);
+    if(keyVals != null) {
+      ImmutableBytesWritable tKey = createGroupKey(keyVals);
+      output.collect(tKey, value);
+    }
+  }
+
+  /**
+   * Extract columns values from the current record. This method returns
+   * null if any of the columns are not found.
+   *
+   * Override this method if you want to deal with nulls differently.
+   *
+   * @param r
+   * @return array of byte values
+   */
+  protected byte[][] extractKeyValues(Result r) {
+    byte[][] keyVals = null;
+    ArrayList<byte[]> foundList = new ArrayList<>();
+    int numCols = columns.length;
+    if (numCols > 0) {
+      for (Cell value: r.listCells()) {
+        byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
+            CellUtil.cloneQualifier(value));
+        for (int i = 0; i < numCols; i++) {
+          if (Bytes.equals(column, columns[i])) {
+            foundList.add(CellUtil.cloneValue(value));
+            break;
+          }
+        }
+      }
+      if(foundList.size() == numCols) {
+        keyVals = foundList.toArray(new byte[numCols][]);
+      }
+    }
+    return keyVals;
+  }
+
+  /**
+   * Create a key by concatenating multiple column values.
+   * Override this function in order to produce different types of keys.
+   *
+   * @param vals
+   * @return key generated by concatenating multiple column values
+   */
+  protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
+    if(vals == null) {
+      return null;
+    }
+    StringBuilder sb =  new StringBuilder();
+    for(int i = 0; i < vals.length; i++) {
+      if(i > 0) {
+        sb.append(" ");
+      }
+      sb.append(Bytes.toString(vals[i]));
+    }
+    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
new file mode 100644
index 0000000..4f5323a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
@@ -0,0 +1,95 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+
+/**
+ * This is used to partition the output keys into groups of keys.
+ * Keys are grouped according to the regions that currently exist
+ * so that each reducer fills a single region so load is distributed.
+ *
+ * @param <K2>
+ * @param <V2>
+ */
+@InterfaceAudience.Public
+public class HRegionPartitioner<K2,V2>
+implements Partitioner<ImmutableBytesWritable, V2> {
+  private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
+  // Connection and locator are not cleaned up; they just die when partitioner is done.
+  private Connection connection;
+  private RegionLocator locator;
+  private byte[][] startKeys;
+
+  public void configure(JobConf job) {
+    try {
+      this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+      TableName tableName = TableName.valueOf(job.get(TableOutputFormat.OUTPUT_TABLE));
+      this.locator = this.connection.getRegionLocator(tableName);
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+
+    try {
+      this.startKeys = this.locator.getStartKeys();
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+  }
+
+  public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
+    byte[] region = null;
+    // Only one region return 0
+    if (this.startKeys.length == 1){
+      return 0;
+    }
+    try {
+      // Not sure if this is cached after a split so we could have problems
+      // here if a region splits while mapping
+      region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
+    } catch (IOException e) {
+      LOG.error(e);
+    }
+    for (int i = 0; i < this.startKeys.length; i++){
+      if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
+        if (i >= numPartitions-1){
+          // cover if we have less reduces then regions.
+          return (Integer.toString(i).hashCode()
+              & Integer.MAX_VALUE) % numPartitions;
+        }
+        return i;
+      }
+    }
+    // if above fails to find start key that match we need to return something
+    return 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
new file mode 100644
index 0000000..dfacff9
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
@@ -0,0 +1,76 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Pass the given key and record as-is to reduce
+ */
+@InterfaceAudience.Public
+public class IdentityTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable, Result> {
+
+  /** constructor */
+  public IdentityTableMap() {
+    super();
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table table name
+   * @param columns columns to scan
+   * @param mapper mapper class
+   * @param job job configuration
+   */
+  @SuppressWarnings("unchecked")
+  public static void initJob(String table, String columns,
+    Class<? extends TableMap> mapper, JobConf job) {
+    TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+      ImmutableBytesWritable.class,
+      Result.class, job);
+  }
+
+  /**
+   * Pass the key, value to reduce
+   * @param key
+   * @param value
+   * @param output
+   * @param reporter
+   * @throws IOException
+   */
+  public void map(ImmutableBytesWritable key, Result value,
+      OutputCollector<ImmutableBytesWritable,Result> output,
+      Reporter reporter) throws IOException {
+
+    // convert
+    output.collect(key, value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
new file mode 100644
index 0000000..9c2e604
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
@@ -0,0 +1,61 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Write to table each key, record pair
+ */
+@InterfaceAudience.Public
+public class IdentityTableReduce
+extends MapReduceBase
+implements TableReduce<ImmutableBytesWritable, Put> {
+  @SuppressWarnings("unused")
+  private static final Log LOG =
+    LogFactory.getLog(IdentityTableReduce.class.getName());
+
+  /**
+   * No aggregation, output pairs of (key, record)
+   * @param key
+   * @param values
+   * @param output
+   * @param reporter
+   * @throws IOException
+   */
+  public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
+      OutputCollector<ImmutableBytesWritable, Put> output,
+      Reporter reporter)
+      throws IOException {
+
+    while(values.hasNext()) {
+      output.collect(key, values.next());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..81dbb15
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * MultiTableSnapshotInputFormat generalizes {@link org.apache.hadoop.hbase.mapred
+ * .TableSnapshotInputFormat}
+ * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
+ * configured for each.
+ * Internally, the input format delegates to {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * and thus has the same performance advantages; see {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat} for
+ * more details.
+ * Usage is similar to TableSnapshotInputFormat, with the following exception:
+ * initMultiTableSnapshotMapperJob takes in a map
+ * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
+ * scan will be applied;
+ * the overall dataset for the job is defined by the concatenation of the regions and tables
+ * included in each snapshot/scan
+ * pair.
+ * {@link TableMapReduceUtil#initMultiTableSnapshotMapperJob(Map,
+ * Class, Class, Class, JobConf, boolean, Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
+ *    "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
+ *    "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
+ * );
+ * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
+ * TableMapReduceUtil.initTableSnapshotMapperJob(
+ *     snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
+ *      MyMapOutputValueWritable.class, job, true, restoreDir);
+ * }
+ * </pre>
+ * Internally, this input format restores each snapshot into a subdirectory of the given tmp
+ * directory. Input splits and
+ * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * (one per region).
+ * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
+ * permissioning; the
+ * same caveats apply here.
+ *
+ * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat
+    implements InputFormat<ImmutableBytesWritable, Result> {
+
+  private final MultiTableSnapshotInputFormatImpl delegate;
+
+  public MultiTableSnapshotInputFormat() {
+    this.delegate = new MultiTableSnapshotInputFormatImpl();
+  }
+
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    List<TableSnapshotInputFormatImpl.InputSplit> splits = delegate.getSplits(job);
+    InputSplit[] results = new InputSplit[splits.size()];
+    for (int i = 0; i < splits.size(); i++) {
+      results[i] = new TableSnapshotRegionSplit(splits.get(i));
+    }
+    return results;
+  }
+
+  @Override
+  public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
+      Reporter reporter) throws IOException {
+    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
+  }
+
+  /**
+   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
+   * restoreDir.
+   * Sets: {@link org.apache.hadoop.hbase.mapreduce
+   * .MultiTableSnapshotInputFormatImpl#RESTORE_DIRS_KEY},
+   * {@link org.apache.hadoop.hbase.mapreduce
+   * .MultiTableSnapshotInputFormatImpl#SNAPSHOT_TO_SCANS_KEY}
+   *
+   * @param conf
+   * @param snapshotScans
+   * @param restoreDir
+   * @throws IOException
+   */
+  public static void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
+      Path restoreDir) throws IOException {
+    new MultiTableSnapshotInputFormatImpl().setInput(conf, snapshotScans, restoreDir);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
new file mode 100644
index 0000000..43560fd
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
@@ -0,0 +1,121 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a map to count rows.
+ * Map outputs table rows IF the input row has columns that have content.
+ * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
+ */
+@InterfaceAudience.Public
+public class RowCounter extends Configured implements Tool {
+  // Name of this 'program'
+  static final String NAME = "rowcounter";
+
+  /**
+   * Mapper that runs the count.
+   */
+  static class RowCounterMapper
+  implements TableMap<ImmutableBytesWritable, Result> {
+    private static enum Counters {ROWS}
+
+    public void map(ImmutableBytesWritable row, Result values,
+        OutputCollector<ImmutableBytesWritable, Result> output,
+        Reporter reporter)
+    throws IOException {
+        // Count every row containing data, whether it's in qualifiers or values
+        reporter.incrCounter(Counters.ROWS, 1);
+    }
+
+    public void configure(JobConf jc) {
+      // Nothing to do.
+    }
+
+    public void close() throws IOException {
+      // Nothing to do.
+    }
+  }
+
+  /**
+   * @param args
+   * @return the JobConf
+   * @throws IOException
+   */
+  public JobConf createSubmittableJob(String[] args) throws IOException {
+    JobConf c = new JobConf(getConf(), getClass());
+    c.setJobName(NAME);
+    // Columns are space delimited
+    StringBuilder sb = new StringBuilder();
+    final int columnoffset = 2;
+    for (int i = columnoffset; i < args.length; i++) {
+      if (i > columnoffset) {
+        sb.append(" ");
+      }
+      sb.append(args[i]);
+    }
+    // Second argument is the table name.
+    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
+      RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
+    c.setNumReduceTasks(0);
+    // First arg is the output directory.
+    FileOutputFormat.setOutputPath(c, new Path(args[0]));
+    return c;
+  }
+
+  static int printUsage() {
+    System.out.println(NAME +
+      " <outputdir> <tablename> <column1> [<column2>...]");
+    return -1;
+  }
+
+  public int run(final String[] args) throws Exception {
+    // Make sure there are at least 3 parameters
+    if (args.length < 3) {
+      System.err.println("ERROR: Wrong number of parameters: " + args.length);
+      return printUsage();
+    }
+    JobClient.runJob(createSubmittableJob(args));
+    return 0;
+  }
+
+  /**
+   * @param args
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
+    System.exit(errCode);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
new file mode 100644
index 0000000..208849a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
@@ -0,0 +1,90 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Convert HBase tabular data into a format that is consumable by Map/Reduce.
+ */
+@InterfaceAudience.Public
+public class TableInputFormat extends TableInputFormatBase implements
+    JobConfigurable {
+  private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
+
+  /**
+   * space delimited list of columns
+   */
+  public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
+
+  public void configure(JobConf job) {
+    try {
+      initialize(job);
+    } catch (Exception e) {
+      LOG.error(StringUtils.stringifyException(e));
+    }
+  }
+
+  @Override
+  protected void initialize(JobConf job) throws IOException {
+    Path[] tableNames = FileInputFormat.getInputPaths(job);
+    String colArg = job.get(COLUMN_LIST);
+    String[] colNames = colArg.split(" ");
+    byte [][] m_cols = new byte[colNames.length][];
+    for (int i = 0; i < m_cols.length; i++) {
+      m_cols[i] = Bytes.toBytes(colNames[i]);
+    }
+    setInputColumns(m_cols);
+    Connection connection = ConnectionFactory.createConnection(job);
+    initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
+  }
+
+  public void validateInput(JobConf job) throws IOException {
+    // expecting exactly one path
+    Path [] tableNames = FileInputFormat.getInputPaths(job);
+    if (tableNames == null || tableNames.length > 1) {
+      throw new IOException("expecting one table name");
+    }
+
+    // connected to table?
+    if (getTable() == null) {
+      throw new IOException("could not connect to table '" +
+        tableNames[0].getName() + "'");
+    }
+
+    // expecting at least one column
+    String colArg = job.get(COLUMN_LIST);
+    if (colArg == null || colArg.length() == 0) {
+      throw new IOException("expecting at least one column");
+    }
+  }
+}

[32/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
new file mode 100644
index 0000000..acf6ff8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
@@ -0,0 +1,700 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce.replication;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableSnapshotScanner;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.mapreduce.TableSplit;
+import org.apache.hadoop.hbase.replication.ReplicationException;
+import org.apache.hadoop.hbase.replication.ReplicationFactory;
+import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
+import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
+import org.apache.hadoop.hbase.replication.ReplicationPeers;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * This map-only job compares the data from a local table with a remote one.
+ * Every cell is compared and must have exactly the same keys (even timestamp)
+ * as well as same value. It is possible to restrict the job by time range and
+ * families. The peer id that's provided must match the one given when the
+ * replication stream was setup.
+ * <p>
+ * Two counters are provided, Verifier.Counters.GOODROWS and BADROWS. The reason
+ * for a why a row is different is shown in the map's log.
+ */
+public class VerifyReplication extends Configured implements Tool {
+
+  private static final Log LOG =
+      LogFactory.getLog(VerifyReplication.class);
+
+  public final static String NAME = "verifyrep";
+  private final static String PEER_CONFIG_PREFIX = NAME + ".peer.";
+  long startTime = 0;
+  long endTime = Long.MAX_VALUE;
+  int batch = -1;
+  int versions = -1;
+  String tableName = null;
+  String families = null;
+  String delimiter = "";
+  String peerId = null;
+  String rowPrefixes = null;
+  int sleepMsBeforeReCompare = 0;
+  boolean verbose = false;
+  boolean includeDeletedCells = false;
+  //Source table snapshot name
+  String sourceSnapshotName = null;
+  //Temp location in source cluster to restore source snapshot
+  String sourceSnapshotTmpDir = null;
+  //Peer table snapshot name
+  String peerSnapshotName = null;
+  //Temp location in peer cluster to restore peer snapshot
+  String peerSnapshotTmpDir = null;
+  //Peer cluster Hadoop FS address
+  String peerFSAddress = null;
+  //Peer cluster HBase root dir location
+  String peerHBaseRootAddress = null;
+
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  /**
+   * Map-only comparator for 2 tables
+   */
+  public static class Verifier
+      extends TableMapper<ImmutableBytesWritable, Put> {
+
+
+
+    public static enum Counters {
+      GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
+
+    private Connection sourceConnection;
+    private Table sourceTable;
+    private Connection replicatedConnection;
+    private Table replicatedTable;
+    private ResultScanner replicatedScanner;
+    private Result currentCompareRowInPeerTable;
+    private int sleepMsBeforeReCompare;
+    private String delimiter = "";
+    private boolean verbose = false;
+    private int batch = -1;
+
+    /**
+     * Map method that compares every scanned row with the equivalent from
+     * a distant cluster.
+     * @param row  The current table row key.
+     * @param value  The columns.
+     * @param context  The current context.
+     * @throws IOException When something is broken with the data.
+     */
+    @Override
+    public void map(ImmutableBytesWritable row, final Result value,
+                    Context context)
+        throws IOException {
+      if (replicatedScanner == null) {
+        Configuration conf = context.getConfiguration();
+        sleepMsBeforeReCompare = conf.getInt(NAME +".sleepMsBeforeReCompare", 0);
+        delimiter = conf.get(NAME + ".delimiter", "");
+        verbose = conf.getBoolean(NAME +".verbose", false);
+        batch = conf.getInt(NAME + ".batch", -1);
+        final Scan scan = new Scan();
+        if (batch > 0) {
+          scan.setBatch(batch);
+        }
+        scan.setCacheBlocks(false);
+        scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
+        long startTime = conf.getLong(NAME + ".startTime", 0);
+        long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
+        String families = conf.get(NAME + ".families", null);
+        if(families != null) {
+          String[] fams = families.split(",");
+          for(String fam : fams) {
+            scan.addFamily(Bytes.toBytes(fam));
+          }
+        }
+        boolean includeDeletedCells = conf.getBoolean(NAME + ".includeDeletedCells", false);
+        scan.setRaw(includeDeletedCells);
+        String rowPrefixes = conf.get(NAME + ".rowPrefixes", null);
+        setRowPrefixFilter(scan, rowPrefixes);
+        scan.setTimeRange(startTime, endTime);
+        int versions = conf.getInt(NAME+".versions", -1);
+        LOG.info("Setting number of version inside map as: " + versions);
+        if (versions >= 0) {
+          scan.setMaxVersions(versions);
+        }
+        TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName"));
+        sourceConnection = ConnectionFactory.createConnection(conf);
+        sourceTable = sourceConnection.getTable(tableName);
+
+        final InputSplit tableSplit = context.getInputSplit();
+
+        String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
+        Configuration peerConf = HBaseConfiguration.createClusterConf(conf,
+            zkClusterKey, PEER_CONFIG_PREFIX);
+
+        replicatedConnection = ConnectionFactory.createConnection(peerConf);
+        replicatedTable = replicatedConnection.getTable(tableName);
+        scan.setStartRow(value.getRow());
+
+        byte[] endRow = null;
+        if (tableSplit instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit) {
+          endRow = ((TableSnapshotInputFormat.TableSnapshotRegionSplit) tableSplit).getRegionInfo()
+              .getEndKey();
+        } else {
+          endRow = ((TableSplit) tableSplit).getEndRow();
+        }
+
+        scan.setStopRow(endRow);
+
+        String peerSnapshotName = conf.get(NAME + ".peerSnapshotName", null);
+        if (peerSnapshotName != null) {
+          String peerSnapshotTmpDir = conf.get(NAME + ".peerSnapshotTmpDir", null);
+          String peerFSAddress = conf.get(NAME + ".peerFSAddress", null);
+          String peerHBaseRootAddress = conf.get(NAME + ".peerHBaseRootAddress", null);
+          FileSystem.setDefaultUri(peerConf, peerFSAddress);
+          FSUtils.setRootDir(peerConf, new Path(peerHBaseRootAddress));
+          LOG.info("Using peer snapshot:" + peerSnapshotName + " with temp dir:"
+              + peerSnapshotTmpDir + " peer root uri:" + FSUtils.getRootDir(peerConf)
+              + " peerFSAddress:" + peerFSAddress);
+
+          replicatedScanner = new TableSnapshotScanner(peerConf,
+              new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName, scan);
+        } else {
+          replicatedScanner = replicatedTable.getScanner(scan);
+        }
+        currentCompareRowInPeerTable = replicatedScanner.next();
+      }
+      while (true) {
+        if (currentCompareRowInPeerTable == null) {
+          // reach the region end of peer table, row only in source table
+          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
+          break;
+        }
+        int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
+        if (rowCmpRet == 0) {
+          // rowkey is same, need to compare the content of the row
+          try {
+            Result.compareResults(value, currentCompareRowInPeerTable);
+            context.getCounter(Counters.GOODROWS).increment(1);
+            if (verbose) {
+              LOG.info("Good row key: " + delimiter
+                  + Bytes.toStringBinary(value.getRow()) + delimiter);
+            }
+          } catch (Exception e) {
+            logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
+          }
+          currentCompareRowInPeerTable = replicatedScanner.next();
+          break;
+        } else if (rowCmpRet < 0) {
+          // row only exists in source table
+          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
+          break;
+        } else {
+          // row only exists in peer table
+          logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
+            currentCompareRowInPeerTable);
+          currentCompareRowInPeerTable = replicatedScanner.next();
+        }
+      }
+    }
+
+    private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
+      if (sleepMsBeforeReCompare > 0) {
+        Threads.sleep(sleepMsBeforeReCompare);
+        try {
+          Result sourceResult = sourceTable.get(new Get(row.getRow()));
+          Result replicatedResult = replicatedTable.get(new Get(row.getRow()));
+          Result.compareResults(sourceResult, replicatedResult);
+          if (!sourceResult.isEmpty()) {
+            context.getCounter(Counters.GOODROWS).increment(1);
+            if (verbose) {
+              LOG.info("Good row key (with recompare): " + delimiter + Bytes.toStringBinary(row.getRow())
+              + delimiter);
+            }
+          }
+          return;
+        } catch (Exception e) {
+          LOG.error("recompare fail after sleep, rowkey=" + delimiter +
+              Bytes.toStringBinary(row.getRow()) + delimiter);
+        }
+      }
+      context.getCounter(counter).increment(1);
+      context.getCounter(Counters.BADROWS).increment(1);
+      LOG.error(counter.toString() + ", rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) +
+          delimiter);
+    }
+
+    @Override
+    protected void cleanup(Context context) {
+      if (replicatedScanner != null) {
+        try {
+          while (currentCompareRowInPeerTable != null) {
+            logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
+              currentCompareRowInPeerTable);
+            currentCompareRowInPeerTable = replicatedScanner.next();
+          }
+        } catch (Exception e) {
+          LOG.error("fail to scan peer table in cleanup", e);
+        } finally {
+          replicatedScanner.close();
+          replicatedScanner = null;
+        }
+      }
+
+      if (sourceTable != null) {
+        try {
+          sourceTable.close();
+        } catch (IOException e) {
+          LOG.error("fail to close source table in cleanup", e);
+        }
+      }
+      if(sourceConnection != null){
+        try {
+          sourceConnection.close();
+        } catch (Exception e) {
+          LOG.error("fail to close source connection in cleanup", e);
+        }
+      }
+
+      if(replicatedTable != null){
+        try{
+          replicatedTable.close();
+        } catch (Exception e) {
+          LOG.error("fail to close replicated table in cleanup", e);
+        }
+      }
+      if(replicatedConnection != null){
+        try {
+          replicatedConnection.close();
+        } catch (Exception e) {
+          LOG.error("fail to close replicated connection in cleanup", e);
+        }
+      }
+    }
+  }
+
+  private static Pair<ReplicationPeerConfig, Configuration> getPeerQuorumConfig(
+      final Configuration conf, String peerId) throws IOException {
+    ZooKeeperWatcher localZKW = null;
+    ReplicationPeerZKImpl peer = null;
+    try {
+      localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
+          new Abortable() {
+            @Override public void abort(String why, Throwable e) {}
+            @Override public boolean isAborted() {return false;}
+          });
+
+      ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
+      rp.init();
+
+      Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
+      if (pair == null) {
+        throw new IOException("Couldn't get peer conf!");
+      }
+
+      return pair;
+    } catch (ReplicationException e) {
+      throw new IOException(
+          "An error occurred while trying to connect to the remove peer cluster", e);
+    } finally {
+      if (peer != null) {
+        peer.close();
+      }
+      if (localZKW != null) {
+        localZKW.close();
+      }
+    }
+  }
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param conf  The current configuration.
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws java.io.IOException When setting up the job fails.
+   */
+  public Job createSubmittableJob(Configuration conf, String[] args)
+  throws IOException {
+    if (!doCommandLine(args)) {
+      return null;
+    }
+    conf.set(NAME+".peerId", peerId);
+    conf.set(NAME+".tableName", tableName);
+    conf.setLong(NAME+".startTime", startTime);
+    conf.setLong(NAME+".endTime", endTime);
+    conf.setInt(NAME +".sleepMsBeforeReCompare", sleepMsBeforeReCompare);
+    conf.set(NAME + ".delimiter", delimiter);
+    conf.setInt(NAME + ".batch", batch);
+    conf.setBoolean(NAME +".verbose", verbose);
+    conf.setBoolean(NAME +".includeDeletedCells", includeDeletedCells);
+    if (families != null) {
+      conf.set(NAME+".families", families);
+    }
+    if (rowPrefixes != null){
+      conf.set(NAME+".rowPrefixes", rowPrefixes);
+    }
+
+    Pair<ReplicationPeerConfig, Configuration> peerConfigPair = getPeerQuorumConfig(conf, peerId);
+    ReplicationPeerConfig peerConfig = peerConfigPair.getFirst();
+    String peerQuorumAddress = peerConfig.getClusterKey();
+    LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " +
+        peerConfig.getConfiguration());
+    conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
+    HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX,
+        peerConfig.getConfiguration().entrySet());
+
+    conf.setInt(NAME + ".versions", versions);
+    LOG.info("Number of version: " + versions);
+
+    //Set Snapshot specific parameters
+    if (peerSnapshotName != null) {
+      conf.set(NAME + ".peerSnapshotName", peerSnapshotName);
+      conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir);
+      conf.set(NAME + ".peerFSAddress", peerFSAddress);
+      conf.set(NAME + ".peerHBaseRootAddress", peerHBaseRootAddress);
+
+      // This is to create HDFS delegation token for peer cluster in case of secured
+      conf.setStrings(MRJobConfig.JOB_NAMENODES, peerFSAddress);
+    }
+
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+    job.setJarByClass(VerifyReplication.class);
+
+    Scan scan = new Scan();
+    scan.setTimeRange(startTime, endTime);
+    scan.setRaw(includeDeletedCells);
+    scan.setCacheBlocks(false);
+    if (batch > 0) {
+      scan.setBatch(batch);
+    }
+    if (versions >= 0) {
+      scan.setMaxVersions(versions);
+      LOG.info("Number of versions set to " + versions);
+    }
+    if(families != null) {
+      String[] fams = families.split(",");
+      for(String fam : fams) {
+        scan.addFamily(Bytes.toBytes(fam));
+      }
+    }
+
+    setRowPrefixFilter(scan, rowPrefixes);
+
+    if (sourceSnapshotName != null) {
+      Path snapshotTempPath = new Path(sourceSnapshotTmpDir);
+      LOG.info(
+        "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir);
+      TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null,
+        null, job, true, snapshotTempPath);
+    } else {
+      TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job);
+    }
+    Configuration peerClusterConf = peerConfigPair.getSecond();
+    // Obtain the auth token from peer cluster
+    TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf);
+
+    job.setOutputFormatClass(NullOutputFormat.class);
+    job.setNumReduceTasks(0);
+    return job;
+  }
+
+  private static void setRowPrefixFilter(Scan scan, String rowPrefixes) {
+    if (rowPrefixes != null && !rowPrefixes.isEmpty()) {
+      String[] rowPrefixArray = rowPrefixes.split(",");
+      Arrays.sort(rowPrefixArray);
+      FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
+      for (String prefix : rowPrefixArray) {
+        Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
+        filterList.addFilter(filter);
+      }
+      scan.setFilter(filterList);
+      byte[] startPrefixRow = Bytes.toBytes(rowPrefixArray[0]);
+      byte[] lastPrefixRow = Bytes.toBytes(rowPrefixArray[rowPrefixArray.length -1]);
+      setStartAndStopRows(scan, startPrefixRow, lastPrefixRow);
+    }
+  }
+
+  private static void setStartAndStopRows(Scan scan, byte[] startPrefixRow, byte[] lastPrefixRow) {
+    scan.setStartRow(startPrefixRow);
+    byte[] stopRow = Bytes.add(Bytes.head(lastPrefixRow, lastPrefixRow.length - 1),
+        new byte[]{(byte) (lastPrefixRow[lastPrefixRow.length - 1] + 1)});
+    scan.setStopRow(stopRow);
+  }
+
+  @VisibleForTesting
+  public boolean doCommandLine(final String[] args) {
+    if (args.length < 2) {
+      printUsage(null);
+      return false;
+    }
+    try {
+      for (int i = 0; i < args.length; i++) {
+        String cmd = args[i];
+        if (cmd.equals("-h") || cmd.startsWith("--h")) {
+          printUsage(null);
+          return false;
+        }
+
+        final String startTimeArgKey = "--starttime=";
+        if (cmd.startsWith(startTimeArgKey)) {
+          startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+          continue;
+        }
+
+        final String endTimeArgKey = "--endtime=";
+        if (cmd.startsWith(endTimeArgKey)) {
+          endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+          continue;
+        }
+
+        final String includeDeletedCellsArgKey = "--raw";
+        if (cmd.equals(includeDeletedCellsArgKey)) {
+          includeDeletedCells = true;
+          continue;
+        }
+
+        final String versionsArgKey = "--versions=";
+        if (cmd.startsWith(versionsArgKey)) {
+          versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+          continue;
+        }
+
+        final String batchArgKey = "--batch=";
+        if (cmd.startsWith(batchArgKey)) {
+          batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
+          continue;
+        }
+
+        final String familiesArgKey = "--families=";
+        if (cmd.startsWith(familiesArgKey)) {
+          families = cmd.substring(familiesArgKey.length());
+          continue;
+        }
+
+        final String rowPrefixesKey = "--row-prefixes=";
+        if (cmd.startsWith(rowPrefixesKey)){
+          rowPrefixes = cmd.substring(rowPrefixesKey.length());
+          continue;
+        }
+
+        final String delimiterArgKey = "--delimiter=";
+        if (cmd.startsWith(delimiterArgKey)) {
+          delimiter = cmd.substring(delimiterArgKey.length());
+          continue;
+        }
+
+        final String sleepToReCompareKey = "--recomparesleep=";
+        if (cmd.startsWith(sleepToReCompareKey)) {
+          sleepMsBeforeReCompare = Integer.parseInt(cmd.substring(sleepToReCompareKey.length()));
+          continue;
+        }
+        final String verboseKey = "--verbose";
+        if (cmd.startsWith(verboseKey)) {
+          verbose = true;
+          continue;
+        }
+
+        final String sourceSnapshotNameArgKey = "--sourceSnapshotName=";
+        if (cmd.startsWith(sourceSnapshotNameArgKey)) {
+          sourceSnapshotName = cmd.substring(sourceSnapshotNameArgKey.length());
+          continue;
+        }
+
+        final String sourceSnapshotTmpDirArgKey = "--sourceSnapshotTmpDir=";
+        if (cmd.startsWith(sourceSnapshotTmpDirArgKey)) {
+          sourceSnapshotTmpDir = cmd.substring(sourceSnapshotTmpDirArgKey.length());
+          continue;
+        }
+
+        final String peerSnapshotNameArgKey = "--peerSnapshotName=";
+        if (cmd.startsWith(peerSnapshotNameArgKey)) {
+          peerSnapshotName = cmd.substring(peerSnapshotNameArgKey.length());
+          continue;
+        }
+
+        final String peerSnapshotTmpDirArgKey = "--peerSnapshotTmpDir=";
+        if (cmd.startsWith(peerSnapshotTmpDirArgKey)) {
+          peerSnapshotTmpDir = cmd.substring(peerSnapshotTmpDirArgKey.length());
+          continue;
+        }
+
+        final String peerFSAddressArgKey = "--peerFSAddress=";
+        if (cmd.startsWith(peerFSAddressArgKey)) {
+          peerFSAddress = cmd.substring(peerFSAddressArgKey.length());
+          continue;
+        }
+
+        final String peerHBaseRootAddressArgKey = "--peerHBaseRootAddress=";
+        if (cmd.startsWith(peerHBaseRootAddressArgKey)) {
+          peerHBaseRootAddress = cmd.substring(peerHBaseRootAddressArgKey.length());
+          continue;
+        }
+
+        if (cmd.startsWith("--")) {
+          printUsage("Invalid argument '" + cmd + "'");
+          return false;
+        }
+
+        if (i == args.length-2) {
+          peerId = cmd;
+        }
+
+        if (i == args.length-1) {
+          tableName = cmd;
+        }
+      }
+
+      if ((sourceSnapshotName != null && sourceSnapshotTmpDir == null)
+          || (sourceSnapshotName == null && sourceSnapshotTmpDir != null)) {
+        printUsage("Source snapshot name and snapshot temp location should be provided"
+            + " to use snapshots in source cluster");
+        return false;
+      }
+
+      if (peerSnapshotName != null || peerSnapshotTmpDir != null || peerFSAddress != null
+          || peerHBaseRootAddress != null) {
+        if (peerSnapshotName == null || peerSnapshotTmpDir == null || peerFSAddress == null
+            || peerHBaseRootAddress == null) {
+          printUsage(
+            "Peer snapshot name, peer snapshot temp location, Peer HBase root address and  "
+                + "peer FSAddress should be provided to use snapshots in peer cluster");
+          return false;
+        }
+      }
+
+      // This is to avoid making recompare calls to source/peer tables when snapshots are used
+      if ((sourceSnapshotName != null || peerSnapshotName != null) && sleepMsBeforeReCompare > 0) {
+        printUsage(
+          "Using sleepMsBeforeReCompare along with snapshots is not allowed as snapshots are immutable");
+        return false;
+      }
+
+    } catch (Exception e) {
+      e.printStackTrace();
+      printUsage("Can't start because " + e.getMessage());
+      return false;
+    }
+    return true;
+  }
+
+  /*
+   * @param errorMsg Error message.  Can be null.
+   */
+  private static void printUsage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    System.err.println("Usage: verifyrep [--starttime=X]" +
+        " [--endtime=Y] [--families=A] [--row-prefixes=B] [--delimiter=] [--recomparesleep=] " +
+        "[--batch=] [--verbose] [--sourceSnapshotName=P] [--sourceSnapshotTmpDir=Q] [--peerSnapshotName=R] "
+            + "[--peerSnapshotTmpDir=S] [--peerFSAddress=T] [--peerHBaseRootAddress=U]  <peerid> <tablename>");
+    System.err.println();
+    System.err.println("Options:");
+    System.err.println(" starttime    beginning of the time range");
+    System.err.println("              without endtime means from starttime to forever");
+    System.err.println(" endtime      end of the time range");
+    System.err.println(" versions     number of cell versions to verify");
+    System.err.println(" batch        batch count for scan, " +
+        "note that result row counts will no longer be actual number of rows when you use this option");
+    System.err.println(" raw          includes raw scan if given in options");
+    System.err.println(" families     comma-separated list of families to copy");
+    System.err.println(" row-prefixes comma-separated list of row key prefixes to filter on ");
+    System.err.println(" delimiter    the delimiter used in display around rowkey");
+    System.err.println(" recomparesleep   milliseconds to sleep before recompare row, " +
+        "default value is 0 which disables the recompare.");
+    System.err.println(" verbose      logs row keys of good rows");
+    System.err.println(" sourceSnapshotName  Source Snapshot Name");
+    System.err.println(" sourceSnapshotTmpDir Tmp location to restore source table snapshot");
+    System.err.println(" peerSnapshotName  Peer Snapshot Name");
+    System.err.println(" peerSnapshotTmpDir Tmp location to restore peer table snapshot");
+    System.err.println(" peerFSAddress      Peer cluster Hadoop FS address");
+    System.err.println(" peerHBaseRootAddress  Peer cluster HBase root location");
+    System.err.println();
+    System.err.println("Args:");
+    System.err.println(" peerid       Id of the peer used for verification, must match the one given for replication");
+    System.err.println(" tablename    Name of the table to verify");
+    System.err.println();
+    System.err.println("Examples:");
+    System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
+    System.err.println(" $ hbase " +
+        "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
+        " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    Configuration conf = this.getConf();
+    Job job = createSubmittableJob(conf, args);
+    if (job != null) {
+      return job.waitForCompletion(true) ? 0 : 1;
+    }
+    return 1;
+  }
+
+  /**
+   * Main entry point.
+   *
+   * @param args  The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
+    System.exit(res);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
new file mode 100644
index 0000000..eb9a5f7
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
@@ -0,0 +1,470 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.mapreduce.JobUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
+import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.LineReader;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/*
+ * The CompactionTool allows to execute a compaction specifying a:
+ * <ul>
+ *  <li>table folder (all regions and families will be compacted)
+ *  <li>region folder (all families in the region will be compacted)
+ *  <li>family folder (the store files will be compacted)
+ * </ul>
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class CompactionTool extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(CompactionTool.class);
+
+  private final static String CONF_TMP_DIR = "hbase.tmp.dir";
+  private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
+  private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
+  private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
+  private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
+
+  /**
+   * Class responsible to execute the Compaction on the specified path.
+   * The path can be a table, region or family directory.
+   */
+  private static class CompactionWorker {
+    private final boolean keepCompactedFiles;
+    private final boolean deleteCompacted;
+    private final Configuration conf;
+    private final FileSystem fs;
+    private final Path tmpDir;
+
+    public CompactionWorker(final FileSystem fs, final Configuration conf) {
+      this.conf = conf;
+      this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
+      this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
+      this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
+      this.fs = fs;
+    }
+
+    /**
+     * Execute the compaction on the specified path.
+     *
+     * @param path Directory path on which to run compaction.
+     * @param compactOnce Execute just a single step of compaction.
+     * @param major Request major compaction.
+     */
+    public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
+      if (isFamilyDir(fs, path)) {
+        Path regionDir = path.getParent();
+        Path tableDir = regionDir.getParent();
+        TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+        HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
+        compactStoreFiles(tableDir, htd, hri,
+            path.getName(), compactOnce, major);
+      } else if (isRegionDir(fs, path)) {
+        Path tableDir = path.getParent();
+        TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+        compactRegion(tableDir, htd, path, compactOnce, major);
+      } else if (isTableDir(fs, path)) {
+        compactTable(path, compactOnce, major);
+      } else {
+        throw new IOException(
+          "Specified path is not a table, region or family directory. path=" + path);
+      }
+    }
+
+    private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
+        throws IOException {
+      TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+      for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
+        compactRegion(tableDir, htd, regionDir, compactOnce, major);
+      }
+    }
+
+    private void compactRegion(final Path tableDir, final TableDescriptor htd,
+        final Path regionDir, final boolean compactOnce, final boolean major)
+        throws IOException {
+      HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
+      for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
+        compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
+      }
+    }
+
+    /**
+     * Execute the actual compaction job.
+     * If the compact once flag is not specified, execute the compaction until
+     * no more compactions are needed. Uses the Configuration settings provided.
+     */
+    private void compactStoreFiles(final Path tableDir, final TableDescriptor htd,
+        final HRegionInfo hri, final String familyName, final boolean compactOnce,
+        final boolean major) throws IOException {
+      HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
+      LOG.info("Compact table=" + htd.getTableName() +
+        " region=" + hri.getRegionNameAsString() +
+        " family=" + familyName);
+      if (major) {
+        store.triggerMajorCompaction();
+      }
+      do {
+        CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
+        if (compaction == null) break;
+        List<StoreFile> storeFiles =
+            store.compact(compaction, NoLimitThroughputController.INSTANCE);
+        if (storeFiles != null && !storeFiles.isEmpty()) {
+          if (keepCompactedFiles && deleteCompacted) {
+            for (StoreFile storeFile: storeFiles) {
+              fs.delete(storeFile.getPath(), false);
+            }
+          }
+        }
+      } while (store.needsCompaction() && !compactOnce);
+    }
+
+    /**
+     * Create a "mock" HStore that uses the tmpDir specified by the user and
+     * the store dir to compact as source.
+     */
+    private static HStore getStore(final Configuration conf, final FileSystem fs,
+        final Path tableDir, final TableDescriptor htd, final HRegionInfo hri,
+        final String familyName, final Path tempDir) throws IOException {
+      HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
+        @Override
+        public Path getTempDir() {
+          return tempDir;
+        }
+      };
+      HRegion region = new HRegion(regionFs, null, conf, htd, null);
+      return new HStore(region, htd.getColumnFamily(Bytes.toBytes(familyName)), conf);
+    }
+  }
+
+  private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
+    Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
+    return fs.exists(regionInfo);
+  }
+
+  private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
+    return FSTableDescriptors.getTableInfoPath(fs, path) != null;
+  }
+
+  private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
+    return isRegionDir(fs, path.getParent());
+  }
+
+  private static class CompactionMapper
+      extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
+    private CompactionWorker compactor = null;
+    private boolean compactOnce = false;
+    private boolean major = false;
+
+    @Override
+    public void setup(Context context) {
+      Configuration conf = context.getConfiguration();
+      compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
+      major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
+
+      try {
+        FileSystem fs = FileSystem.get(conf);
+        this.compactor = new CompactionWorker(fs, conf);
+      } catch (IOException e) {
+        throw new RuntimeException("Could not get the input FileSystem", e);
+      }
+    }
+
+    @Override
+    public void map(LongWritable key, Text value, Context context)
+        throws InterruptedException, IOException {
+      Path path = new Path(value.toString());
+      this.compactor.compact(path, compactOnce, major);
+    }
+  }
+
+  /**
+   * Input format that uses store files block location as input split locality.
+   */
+  private static class CompactionInputFormat extends TextInputFormat {
+    @Override
+    protected boolean isSplitable(JobContext context, Path file) {
+      return true;
+    }
+
+    /**
+     * Returns a split for each store files directory using the block location
+     * of each file as locality reference.
+     */
+    @Override
+    public List<InputSplit> getSplits(JobContext job) throws IOException {
+      List<InputSplit> splits = new ArrayList<>();
+      List<FileStatus> files = listStatus(job);
+
+      Text key = new Text();
+      for (FileStatus file: files) {
+        Path path = file.getPath();
+        FileSystem fs = path.getFileSystem(job.getConfiguration());
+        LineReader reader = new LineReader(fs.open(path));
+        long pos = 0;
+        int n;
+        try {
+          while ((n = reader.readLine(key)) > 0) {
+            String[] hosts = getStoreDirHosts(fs, path);
+            splits.add(new FileSplit(path, pos, n, hosts));
+            pos += n;
+          }
+        } finally {
+          reader.close();
+        }
+      }
+
+      return splits;
+    }
+
+    /**
+     * return the top hosts of the store files, used by the Split
+     */
+    private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
+        throws IOException {
+      FileStatus[] files = FSUtils.listStatus(fs, path);
+      if (files == null) {
+        return new String[] {};
+      }
+
+      HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
+      for (FileStatus hfileStatus: files) {
+        HDFSBlocksDistribution storeFileBlocksDistribution =
+          FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
+        hdfsBlocksDistribution.add(storeFileBlocksDistribution);
+      }
+
+      List<String> hosts = hdfsBlocksDistribution.getTopHosts();
+      return hosts.toArray(new String[hosts.size()]);
+    }
+
+    /**
+     * Create the input file for the given directories to compact.
+     * The file is a TextFile with each line corrisponding to a
+     * store files directory to compact.
+     */
+    public static void createInputFile(final FileSystem fs, final Path path,
+        final Set<Path> toCompactDirs) throws IOException {
+      // Extract the list of store dirs
+      List<Path> storeDirs = new LinkedList<>();
+      for (Path compactDir: toCompactDirs) {
+        if (isFamilyDir(fs, compactDir)) {
+          storeDirs.add(compactDir);
+        } else if (isRegionDir(fs, compactDir)) {
+          for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
+            storeDirs.add(familyDir);
+          }
+        } else if (isTableDir(fs, compactDir)) {
+          // Lookup regions
+          for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
+            for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
+              storeDirs.add(familyDir);
+            }
+          }
+        } else {
+          throw new IOException(
+            "Specified path is not a table, region or family directory. path=" + compactDir);
+        }
+      }
+
+      // Write Input File
+      FSDataOutputStream stream = fs.create(path);
+      LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
+      try {
+        final byte[] newLine = Bytes.toBytes("\n");
+        for (Path storeDir: storeDirs) {
+          stream.write(Bytes.toBytes(storeDir.toString()));
+          stream.write(newLine);
+        }
+      } finally {
+        stream.close();
+      }
+    }
+  }
+
+  /**
+   * Execute compaction, using a Map-Reduce job.
+   */
+  private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
+      final boolean compactOnce, final boolean major) throws Exception {
+    Configuration conf = getConf();
+    conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
+    conf.setBoolean(CONF_COMPACT_MAJOR, major);
+
+    Job job = new Job(conf);
+    job.setJobName("CompactionTool");
+    job.setJarByClass(CompactionTool.class);
+    job.setMapperClass(CompactionMapper.class);
+    job.setInputFormatClass(CompactionInputFormat.class);
+    job.setOutputFormatClass(NullOutputFormat.class);
+    job.setMapSpeculativeExecution(false);
+    job.setNumReduceTasks(0);
+
+    // add dependencies (including HBase ones)
+    TableMapReduceUtil.addDependencyJars(job);
+
+    Path stagingDir = JobUtil.getStagingDir(conf);
+    try {
+      // Create input file with the store dirs
+      Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
+      CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
+      CompactionInputFormat.addInputPath(job, inputPath);
+
+      // Initialize credential for secure cluster
+      TableMapReduceUtil.initCredentials(job);
+
+      // Start the MR Job and wait
+      return job.waitForCompletion(true) ? 0 : 1;
+    } finally {
+      fs.delete(stagingDir, true);
+    }
+  }
+
+  /**
+   * Execute compaction, from this client, one path at the time.
+   */
+  private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
+      final boolean compactOnce, final boolean major) throws IOException {
+    CompactionWorker worker = new CompactionWorker(fs, getConf());
+    for (Path path: toCompactDirs) {
+      worker.compact(path, compactOnce, major);
+    }
+    return 0;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    Set<Path> toCompactDirs = new HashSet<>();
+    boolean compactOnce = false;
+    boolean major = false;
+    boolean mapred = false;
+
+    Configuration conf = getConf();
+    FileSystem fs = FileSystem.get(conf);
+
+    try {
+      for (int i = 0; i < args.length; ++i) {
+        String opt = args[i];
+        if (opt.equals("-compactOnce")) {
+          compactOnce = true;
+        } else if (opt.equals("-major")) {
+          major = true;
+        } else if (opt.equals("-mapred")) {
+          mapred = true;
+        } else if (!opt.startsWith("-")) {
+          Path path = new Path(opt);
+          FileStatus status = fs.getFileStatus(path);
+          if (!status.isDirectory()) {
+            printUsage("Specified path is not a directory. path=" + path);
+            return 1;
+          }
+          toCompactDirs.add(path);
+        } else {
+          printUsage();
+        }
+      }
+    } catch (Exception e) {
+      printUsage(e.getMessage());
+      return 1;
+    }
+
+    if (toCompactDirs.isEmpty()) {
+      printUsage("No directories to compact specified.");
+      return 1;
+    }
+
+    // Execute compaction!
+    if (mapred) {
+      return doMapReduce(fs, toCompactDirs, compactOnce, major);
+    } else {
+      return doClient(fs, toCompactDirs, compactOnce, major);
+    }
+  }
+
+  private void printUsage() {
+    printUsage(null);
+  }
+
+  private void printUsage(final String message) {
+    if (message != null && message.length() > 0) {
+      System.err.println(message);
+    }
+    System.err.println("Usage: java " + this.getClass().getName() + " \\");
+    System.err.println("  [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
+    System.err.println();
+    System.err.println("Options:");
+    System.err.println(" mapred         Use MapReduce to run compaction.");
+    System.err.println(" compactOnce    Execute just one compaction step. (default: while needed)");
+    System.err.println(" major          Trigger major compaction.");
+    System.err.println();
+    System.err.println("Note: -D properties will be applied to the conf used. ");
+    System.err.println("For example: ");
+    System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
+    System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
+    System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
+    System.err.println();
+    System.err.println("Examples:");
+    System.err.println(" To compact the full 'TestTable' using MapReduce:");
+    System.err.println(" $ hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
+    System.err.println();
+    System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
+    System.err.println(" $ hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
+  }
+}

[33/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..403051f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -0,0 +1,410 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
+import org.apache.hadoop.hbase.client.IsolationLevel;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.Writable;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Hadoop MR API-agnostic implementation for mapreduce over table snapshots.
+ */
+@InterfaceAudience.Private
+public class TableSnapshotInputFormatImpl {
+  // TODO: Snapshots files are owned in fs by the hbase user. There is no
+  // easy way to delegate access.
+
+  public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
+
+  private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
+  // key for specifying the root dir of the restored snapshot
+  protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
+
+  /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
+  private static final String LOCALITY_CUTOFF_MULTIPLIER =
+    "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
+  private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
+
+  /**
+   * Implementation class for InputSplit logic common between mapred and mapreduce.
+   */
+  public static class InputSplit implements Writable {
+
+    private TableDescriptor htd;
+    private HRegionInfo regionInfo;
+    private String[] locations;
+    private String scan;
+    private String restoreDir;
+
+    // constructor for mapreduce framework / Writable
+    public InputSplit() {}
+
+    public InputSplit(TableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
+        Scan scan, Path restoreDir) {
+      this.htd = htd;
+      this.regionInfo = regionInfo;
+      if (locations == null || locations.isEmpty()) {
+        this.locations = new String[0];
+      } else {
+        this.locations = locations.toArray(new String[locations.size()]);
+      }
+      try {
+        this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
+      } catch (IOException e) {
+        LOG.warn("Failed to convert Scan to String", e);
+      }
+
+      this.restoreDir = restoreDir.toString();
+    }
+
+    public TableDescriptor getHtd() {
+      return htd;
+    }
+
+    public String getScan() {
+      return scan;
+    }
+
+    public String getRestoreDir() {
+      return restoreDir;
+    }
+
+    public long getLength() {
+      //TODO: We can obtain the file sizes of the snapshot here.
+      return 0;
+    }
+
+    public String[] getLocations() {
+      return locations;
+    }
+
+    public TableDescriptor getTableDescriptor() {
+      return htd;
+    }
+
+    public HRegionInfo getRegionInfo() {
+      return regionInfo;
+    }
+
+    // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
+    // doing this wrapping with Writables.
+    @Override
+    public void write(DataOutput out) throws IOException {
+      TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
+          .setTable(ProtobufUtil.toTableSchema(htd))
+          .setRegion(HRegionInfo.convert(regionInfo));
+
+      for (String location : locations) {
+        builder.addLocations(location);
+      }
+
+      TableSnapshotRegionSplit split = builder.build();
+
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      split.writeTo(baos);
+      baos.close();
+      byte[] buf = baos.toByteArray();
+      out.writeInt(buf.length);
+      out.write(buf);
+
+      Bytes.writeByteArray(out, Bytes.toBytes(scan));
+      Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
+
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      int len = in.readInt();
+      byte[] buf = new byte[len];
+      in.readFully(buf);
+      TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
+      this.htd = ProtobufUtil.toTableDescriptor(split.getTable());
+      this.regionInfo = HRegionInfo.convert(split.getRegion());
+      List<String> locationsList = split.getLocationsList();
+      this.locations = locationsList.toArray(new String[locationsList.size()]);
+
+      this.scan = Bytes.toString(Bytes.readByteArray(in));
+      this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
+    }
+  }
+
+  /**
+   * Implementation class for RecordReader logic common between mapred and mapreduce.
+   */
+  public static class RecordReader {
+    private InputSplit split;
+    private Scan scan;
+    private Result result = null;
+    private ImmutableBytesWritable row = null;
+    private ClientSideRegionScanner scanner;
+
+    public ClientSideRegionScanner getScanner() {
+      return scanner;
+    }
+
+    public void initialize(InputSplit split, Configuration conf) throws IOException {
+      this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
+      this.split = split;
+      TableDescriptor htd = split.htd;
+      HRegionInfo hri = this.split.getRegionInfo();
+      FileSystem fs = FSUtils.getCurrentFileSystem(conf);
+
+
+      // region is immutable, this should be fine,
+      // otherwise we have to set the thread read point
+      scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
+      // disable caching of data blocks
+      scan.setCacheBlocks(false);
+
+      scanner =
+          new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
+    }
+
+    public boolean nextKeyValue() throws IOException {
+      result = scanner.next();
+      if (result == null) {
+        //we are done
+        return false;
+      }
+
+      if (this.row == null) {
+        this.row = new ImmutableBytesWritable();
+      }
+      this.row.set(result.getRow());
+      return true;
+    }
+
+    public ImmutableBytesWritable getCurrentKey() {
+      return row;
+    }
+
+    public Result getCurrentValue() {
+      return result;
+    }
+
+    public long getPos() {
+      return 0;
+    }
+
+    public float getProgress() {
+      return 0; // TODO: use total bytes to estimate
+    }
+
+    public void close() {
+      if (this.scanner != null) {
+        this.scanner.close();
+      }
+    }
+  }
+
+  public static List<InputSplit> getSplits(Configuration conf) throws IOException {
+    String snapshotName = getSnapshotName(conf);
+
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+
+    SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
+
+    List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
+
+    // TODO: mapred does not support scan as input API. Work around for now.
+    Scan scan = extractScanFromConf(conf);
+    // the temp dir where the snapshot is restored
+    Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
+
+    return getSplits(scan, manifest, regionInfos, restoreDir, conf);
+  }
+
+  public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
+    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
+    if (regionManifests == null) {
+      throw new IllegalArgumentException("Snapshot seems empty");
+    }
+
+    List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
+
+    for (SnapshotRegionManifest regionManifest : regionManifests) {
+      HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
+      if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+        continue;
+      }
+      regionInfos.add(hri);
+    }
+    return regionInfos;
+  }
+
+  public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
+      Path rootDir, FileSystem fs) throws IOException {
+    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
+    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+    return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
+  }
+
+  public static Scan extractScanFromConf(Configuration conf) throws IOException {
+    Scan scan = null;
+    if (conf.get(TableInputFormat.SCAN) != null) {
+      scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
+    } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
+      String[] columns =
+        conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
+      scan = new Scan();
+      for (String col : columns) {
+        scan.addFamily(Bytes.toBytes(col));
+      }
+    } else {
+      throw new IllegalArgumentException("Unable to create scan");
+    }
+    return scan;
+  }
+
+  public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
+      List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
+    // load table descriptor
+    TableDescriptor htd = manifest.getTableDescriptor();
+
+    Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
+
+    List<InputSplit> splits = new ArrayList<>();
+    for (HRegionInfo hri : regionManifests) {
+      // load region descriptor
+
+      if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
+          hri.getEndKey())) {
+        // compute HDFS locations from snapshot files (which will get the locations for
+        // referred hfiles)
+        List<String> hosts = getBestLocations(conf,
+            HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
+
+        int len = Math.min(3, hosts.size());
+        hosts = hosts.subList(0, len);
+        splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
+      }
+    }
+
+    return splits;
+
+  }
+
+  /**
+   * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
+   * weights into account, thus will treat every location passed from the input split as equal. We
+   * do not want to blindly pass all the locations, since we are creating one split per region, and
+   * the region's blocks are all distributed throughout the cluster unless favorite node assignment
+   * is used. On the expected stable case, only one location will contain most of the blocks as
+   * local.
+   * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
+   * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
+   * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
+   * host with the best locality.
+   */
+  public static List<String> getBestLocations(
+      Configuration conf, HDFSBlocksDistribution blockDistribution) {
+    List<String> locations = new ArrayList<>(3);
+
+    HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
+
+    if (hostAndWeights.length == 0) {
+      return locations;
+    }
+
+    HostAndWeight topHost = hostAndWeights[0];
+    locations.add(topHost.getHost());
+
+    // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
+    double cutoffMultiplier
+      = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
+
+    double filterWeight = topHost.getWeight() * cutoffMultiplier;
+
+    for (int i = 1; i < hostAndWeights.length; i++) {
+      if (hostAndWeights[i].getWeight() >= filterWeight) {
+        locations.add(hostAndWeights[i].getHost());
+      } else {
+        break;
+      }
+    }
+
+    return locations;
+  }
+
+  private static String getSnapshotName(Configuration conf) {
+    String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
+    if (snapshotName == null) {
+      throw new IllegalArgumentException("Snapshot name must be provided");
+    }
+    return snapshotName;
+  }
+
+  /**
+   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+   * @param conf the job to configuration
+   * @param snapshotName the name of the snapshot to read from
+   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+   * After the job is finished, restoreDir can be deleted.
+   * @throws IOException if an error occurs
+   */
+  public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
+      throws IOException {
+    conf.set(SNAPSHOT_NAME_KEY, snapshotName);
+
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+
+    restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
+
+    // TODO: restore from record readers to parallelize.
+    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
+
+    conf.set(RESTORE_DIR_KEY, restoreDir.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
new file mode 100644
index 0000000..13c7c67
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
@@ -0,0 +1,395 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+/**
+ * A table split corresponds to a key range (low, high) and an optional scanner.
+ * All references to row below refer to the key of the row.
+ */
+@InterfaceAudience.Public
+public class TableSplit extends InputSplit
+implements Writable, Comparable<TableSplit> {
+  /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
+  @Deprecated
+  public static final Log LOG = LogFactory.getLog(TableSplit.class);
+
+  // should be < 0 (@see #readFields(DataInput))
+  // version 1 supports Scan data member
+  enum Version {
+    UNVERSIONED(0),
+    // Initial number we put on TableSplit when we introduced versioning.
+    INITIAL(-1),
+    // Added an encoded region name field for easier identification of split -> region
+    WITH_ENCODED_REGION_NAME(-2);
+
+    final int code;
+    static final Version[] byCode;
+    static {
+      byCode = Version.values();
+      for (int i = 0; i < byCode.length; i++) {
+        if (byCode[i].code != -1 * i) {
+          throw new AssertionError("Values in this enum should be descending by one");
+        }
+      }
+    }
+
+    Version(int code) {
+      this.code = code;
+    }
+
+    boolean atLeast(Version other) {
+      return code <= other.code;
+    }
+
+    static Version fromCode(int code) {
+      return byCode[code * -1];
+    }
+  }
+
+  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
+  private TableName tableName;
+  private byte [] startRow;
+  private byte [] endRow;
+  private String regionLocation;
+  private String encodedRegionName = "";
+  private String scan = ""; // stores the serialized form of the Scan
+  private long length; // Contains estimation of region size in bytes
+
+  /** Default constructor. */
+  public TableSplit() {
+    this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
+      HConstants.EMPTY_BYTE_ARRAY, "");
+  }
+
+  /**
+   * Creates a new instance while assigning all variables.
+   * Length of region is set to 0
+   * Encoded name of the region is set to blank
+   *
+   * @param tableName  The name of the current table.
+   * @param scan The scan associated with this split.
+   * @param startRow  The start row of the split.
+   * @param endRow  The end row of the split.
+   * @param location  The location of the region.
+   */
+  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+                    final String location) {
+    this(tableName, scan, startRow, endRow, location, 0L);
+  }
+
+  /**
+   * Creates a new instance while assigning all variables.
+   * Encoded name of region is set to blank
+   *
+   * @param tableName  The name of the current table.
+   * @param scan The scan associated with this split.
+   * @param startRow  The start row of the split.
+   * @param endRow  The end row of the split.
+   * @param location  The location of the region.
+   */
+  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+      final String location, long length) {
+    this(tableName, scan, startRow, endRow, location, "", length);
+  }
+
+  /**
+   * Creates a new instance while assigning all variables.
+   *
+   * @param tableName  The name of the current table.
+   * @param scan The scan associated with this split.
+   * @param startRow  The start row of the split.
+   * @param endRow  The end row of the split.
+   * @param encodedRegionName The region ID.
+   * @param location  The location of the region.
+   */
+  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+      final String location, final String encodedRegionName, long length) {
+    this.tableName = tableName;
+    try {
+      this.scan =
+        (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
+    } catch (IOException e) {
+      LOG.warn("Failed to convert Scan to String", e);
+    }
+    this.startRow = startRow;
+    this.endRow = endRow;
+    this.regionLocation = location;
+    this.encodedRegionName = encodedRegionName;
+    this.length = length;
+  }
+
+  /**
+   * Creates a new instance without a scanner.
+   * Length of region is set to 0
+   *
+   * @param tableName The name of the current table.
+   * @param startRow The start row of the split.
+   * @param endRow The end row of the split.
+   * @param location The location of the region.
+   */
+  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
+      final String location) {
+    this(tableName, null, startRow, endRow, location);
+  }
+
+  /**
+   * Creates a new instance without a scanner.
+   *
+   * @param tableName The name of the current table.
+   * @param startRow The start row of the split.
+   * @param endRow The end row of the split.
+   * @param location The location of the region.
+   * @param length Size of region in bytes
+   */
+  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
+                    final String location, long length) {
+    this(tableName, null, startRow, endRow, location, length);
+  }
+
+  /**
+   * Returns a Scan object from the stored string representation.
+   *
+   * @return Returns a Scan object based on the stored scanner.
+   * @throws IOException
+   */
+  public Scan getScan() throws IOException {
+    return TableMapReduceUtil.convertStringToScan(this.scan);
+  }
+
+  /**
+   * Returns the table name converted to a byte array.
+   * @see #getTable()
+   * @return The table name.
+   */
+  public byte [] getTableName() {
+    return tableName.getName();
+  }
+
+  /**
+   * Returns the table name.
+   *
+   * @return The table name.
+   */
+  public TableName getTable() {
+    // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
+    // that in here though because there was an existing getTableName in place already since
+    // deprecated.
+    return tableName;
+  }
+
+  /**
+   * Returns the start row.
+   *
+   * @return The start row.
+   */
+  public byte [] getStartRow() {
+    return startRow;
+  }
+
+  /**
+   * Returns the end row.
+   *
+   * @return The end row.
+   */
+  public byte [] getEndRow() {
+    return endRow;
+  }
+
+  /**
+   * Returns the region location.
+   *
+   * @return The region's location.
+   */
+  public String getRegionLocation() {
+    return regionLocation;
+  }
+
+  /**
+   * Returns the region's location as an array.
+   *
+   * @return The array containing the region location.
+   * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
+   */
+  @Override
+  public String[] getLocations() {
+    return new String[] {regionLocation};
+  }
+
+  /**
+   * Returns the region's encoded name.
+   *
+   * @return The region's encoded name.
+   */
+  public String getEncodedRegionName() {
+    return encodedRegionName;
+  }
+
+  /**
+   * Returns the length of the split.
+   *
+   * @return The length of the split.
+   * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
+   */
+  @Override
+  public long getLength() {
+    return length;
+  }
+
+  /**
+   * Reads the values of each field.
+   *
+   * @param in  The input to read from.
+   * @throws IOException When reading the input fails.
+   */
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    Version version = Version.UNVERSIONED;
+    // TableSplit was not versioned in the beginning.
+    // In order to introduce it now, we make use of the fact
+    // that tableName was written with Bytes.writeByteArray,
+    // which encodes the array length as a vint which is >= 0.
+    // Hence if the vint is >= 0 we have an old version and the vint
+    // encodes the length of tableName.
+    // If < 0 we just read the version and the next vint is the length.
+    // @see Bytes#readByteArray(DataInput)
+    int len = WritableUtils.readVInt(in);
+    if (len < 0) {
+      // what we just read was the version
+      version = Version.fromCode(len);
+      len = WritableUtils.readVInt(in);
+    }
+    byte[] tableNameBytes = new byte[len];
+    in.readFully(tableNameBytes);
+    tableName = TableName.valueOf(tableNameBytes);
+    startRow = Bytes.readByteArray(in);
+    endRow = Bytes.readByteArray(in);
+    regionLocation = Bytes.toString(Bytes.readByteArray(in));
+    if (version.atLeast(Version.INITIAL)) {
+      scan = Bytes.toString(Bytes.readByteArray(in));
+    }
+    length = WritableUtils.readVLong(in);
+    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
+      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
+    }
+  }
+
+  /**
+   * Writes the field values to the output.
+   *
+   * @param out  The output to write to.
+   * @throws IOException When writing the values to the output fails.
+   */
+  @Override
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, VERSION.code);
+    Bytes.writeByteArray(out, tableName.getName());
+    Bytes.writeByteArray(out, startRow);
+    Bytes.writeByteArray(out, endRow);
+    Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
+    Bytes.writeByteArray(out, Bytes.toBytes(scan));
+    WritableUtils.writeVLong(out, length);
+    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
+  }
+
+  /**
+   * Returns the details about this instance as a string.
+   *
+   * @return The values of this instance as a string.
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("HBase table split(");
+    sb.append("table name: ").append(tableName);
+    // null scan input is represented by ""
+    String printScan = "";
+    if (!scan.equals("")) {
+      try {
+        // get the real scan here in toString, not the Base64 string
+        printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
+      }
+      catch (IOException e) {
+        printScan = "";
+      }
+    }
+    sb.append(", scan: ").append(printScan);
+    sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
+    sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
+    sb.append(", region location: ").append(regionLocation);
+    sb.append(", encoded region name: ").append(encodedRegionName);
+    sb.append(")");
+    return sb.toString();
+  }
+
+  /**
+   * Compares this split against the given one.
+   *
+   * @param split  The split to compare to.
+   * @return The result of the comparison.
+   * @see java.lang.Comparable#compareTo(java.lang.Object)
+   */
+  @Override
+  public int compareTo(TableSplit split) {
+    // If The table name of the two splits is the same then compare start row
+    // otherwise compare based on table names
+    int tableNameComparison =
+        getTable().compareTo(split.getTable());
+    return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
+        getStartRow(), split.getStartRow());
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == null || !(o instanceof TableSplit)) {
+      return false;
+    }
+    return tableName.equals(((TableSplit)o).tableName) &&
+      Bytes.equals(startRow, ((TableSplit)o).startRow) &&
+      Bytes.equals(endRow, ((TableSplit)o).endRow) &&
+      regionLocation.equals(((TableSplit)o).regionLocation);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = tableName != null ? tableName.hashCode() : 0;
+    result = 31 * result + (scan != null ? scan.hashCode() : 0);
+    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
+    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
+    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
+    result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
new file mode 100644
index 0000000..30cd461
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Emits Sorted KeyValues. Parse the passed text and creates KeyValues. Sorts them before emit.
+ * @see HFileOutputFormat2
+ * @see KeyValueSortReducer
+ * @see PutSortReducer
+ */
+@InterfaceAudience.Public
+public class TextSortReducer extends
+    Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
+
+  /** Timestamp for all inserted rows */
+  private long ts;
+
+  /** Column seperator */
+  private String separator;
+
+  /** Should skip bad lines */
+  private boolean skipBadLines;
+
+  private Counter badLineCount;
+
+  private ImportTsv.TsvParser parser;
+
+  /** Cell visibility expr **/
+  private String cellVisibilityExpr;
+
+  /** Cell TTL */
+  private long ttl;
+
+  private CellCreator kvCreator;
+
+  public long getTs() {
+    return ts;
+  }
+
+  public boolean getSkipBadLines() {
+    return skipBadLines;
+  }
+
+  public Counter getBadLineCount() {
+    return badLineCount;
+  }
+
+  public void incrementBadLineCount(int count) {
+    this.badLineCount.increment(count);
+  }
+
+  /**
+   * Handles initializing this class with objects specific to it (i.e., the parser).
+   * Common initialization that might be leveraged by a subsclass is done in
+   * <code>doSetup</code>. Hence a subclass may choose to override this method
+   * and call <code>doSetup</code> as well before handling it's own custom params.
+   *
+   * @param context
+   */
+  @Override
+  protected void setup(Context context) {
+    Configuration conf = context.getConfiguration();
+    doSetup(context, conf);
+
+    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
+    if (parser.getRowKeyColumnIndex() == -1) {
+      throw new RuntimeException("No row key column specified");
+    }
+    this.kvCreator = new CellCreator(conf);
+  }
+
+  /**
+   * Handles common parameter initialization that a subclass might want to leverage.
+   * @param context
+   * @param conf
+   */
+  protected void doSetup(Context context, Configuration conf) {
+    // If a custom separator has been used,
+    // decode it back from Base64 encoding.
+    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+    if (separator == null) {
+      separator = ImportTsv.DEFAULT_SEPARATOR;
+    } else {
+      separator = new String(Base64.decode(separator));
+    }
+
+    // Should never get 0 as we are setting this to a valid value in job configuration.
+    ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
+
+    skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
+    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+  }
+
+  @Override
+  protected void reduce(
+      ImmutableBytesWritable rowKey,
+      java.lang.Iterable<Text> lines,
+      Reducer<ImmutableBytesWritable, Text,
+              ImmutableBytesWritable, KeyValue>.Context context)
+      throws java.io.IOException, InterruptedException
+  {
+    // although reduce() is called per-row, handle pathological case
+    long threshold = context.getConfiguration().getLong(
+        "reducer.row.threshold", 1L * (1<<30));
+    Iterator<Text> iter = lines.iterator();
+    while (iter.hasNext()) {
+      Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
+      long curSize = 0;
+      // stop at the end or the RAM threshold
+      while (iter.hasNext() && curSize < threshold) {
+        Text line = iter.next();
+        byte[] lineBytes = line.getBytes();
+        try {
+          ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
+          // Retrieve timestamp if exists
+          ts = parsed.getTimestamp(ts);
+          cellVisibilityExpr = parsed.getCellVisibility();
+          ttl = parsed.getCellTTL();
+
+          // create tags for the parsed line
+          List<Tag> tags = new ArrayList<>();
+          if (cellVisibilityExpr != null) {
+            tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
+              cellVisibilityExpr));
+          }
+          // Add TTL directly to the KV so we can vary them when packing more than one KV
+          // into puts
+          if (ttl > 0) {
+            tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
+          }
+          for (int i = 0; i < parsed.getColumnCount(); i++) {
+            if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
+                || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
+                || i == parser.getCellTTLColumnIndex()) {
+              continue;
+            }
+            // Creating the KV which needs to be directly written to HFiles. Using the Facade
+            // KVCreator for creation of kvs.
+            Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(),
+                parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length,
+                parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes,
+                parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
+            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+            kvs.add(kv);
+            curSize += kv.heapSize();
+          }
+        } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
+            | InvalidLabelException badLine) {
+          if (skipBadLines) {
+            System.err.println("Bad line." + badLine.getMessage());
+            incrementBadLineCount(1);
+            continue;
+          }
+          throw new IOException(badLine);
+        }
+      }
+      context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass()
+          + "(" + StringUtils.humanReadableInt(curSize) + ")");
+      int index = 0;
+      for (KeyValue kv : kvs) {
+        context.write(rowKey, kv);
+        if (++index > 0 && index % 100 == 0)
+          context.setStatus("Wrote " + index + " key values.");
+      }
+
+      // if we have more entries to process
+      if (iter.hasNext()) {
+        // force flush because we cannot guarantee intra-row sorted order
+        context.write(null, null);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
new file mode 100644
index 0000000..3c507b3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Mapper;
+
+/**
+ * Write table content out to files in hdfs.
+ */
+@InterfaceAudience.Public
+public class TsvImporterMapper
+extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>
+{
+
+  /** Timestamp for all inserted rows */
+  protected long ts;
+
+  /** Column seperator */
+  private String separator;
+
+  /** Should skip bad lines */
+  private boolean skipBadLines;
+  /** Should skip empty columns*/
+  private boolean skipEmptyColumns;
+  private Counter badLineCount;
+  private boolean logBadLines;
+
+  protected ImportTsv.TsvParser parser;
+
+  protected Configuration conf;
+
+  protected String cellVisibilityExpr;
+
+  protected long ttl;
+
+  protected CellCreator kvCreator;
+
+  private String hfileOutPath;
+
+  /** List of cell tags */
+  private List<Tag> tags;
+
+  public long getTs() {
+    return ts;
+  }
+
+  public boolean getSkipBadLines() {
+    return skipBadLines;
+  }
+
+  public Counter getBadLineCount() {
+    return badLineCount;
+  }
+
+  public void incrementBadLineCount(int count) {
+    this.badLineCount.increment(count);
+  }
+
+  /**
+   * Handles initializing this class with objects specific to it (i.e., the parser).
+   * Common initialization that might be leveraged by a subsclass is done in
+   * <code>doSetup</code>. Hence a subclass may choose to override this method
+   * and call <code>doSetup</code> as well before handling it's own custom params.
+   *
+   * @param context
+   */
+  @Override
+  protected void setup(Context context) {
+    doSetup(context);
+
+    conf = context.getConfiguration();
+    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY),
+                           separator);
+    if (parser.getRowKeyColumnIndex() == -1) {
+      throw new RuntimeException("No row key column specified");
+    }
+    this.kvCreator = new CellCreator(conf);
+    tags = new ArrayList<>();
+  }
+
+  /**
+   * Handles common parameter initialization that a subclass might want to leverage.
+   * @param context
+   */
+  protected void doSetup(Context context) {
+    Configuration conf = context.getConfiguration();
+
+    // If a custom separator has been used,
+    // decode it back from Base64 encoding.
+    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+    if (separator == null) {
+      separator = ImportTsv.DEFAULT_SEPARATOR;
+    } else {
+      separator = new String(Base64.decode(separator));
+    }
+    // Should never get 0 as we are setting this to a valid value in job
+    // configuration.
+    ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
+
+    skipEmptyColumns = context.getConfiguration().getBoolean(
+        ImportTsv.SKIP_EMPTY_COLUMNS, false);
+    skipBadLines = context.getConfiguration().getBoolean(
+        ImportTsv.SKIP_LINES_CONF_KEY, true);
+    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+    logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
+    hfileOutPath = conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY);
+  }
+
+  /**
+   * Convert a line of TSV text into an HBase table row.
+   */
+  @Override
+  public void map(LongWritable offset, Text value,
+    Context context)
+  throws IOException {
+    byte[] lineBytes = value.getBytes();
+
+    try {
+      ImportTsv.TsvParser.ParsedLine parsed = parser.parse(
+          lineBytes, value.getLength());
+      ImmutableBytesWritable rowKey =
+        new ImmutableBytesWritable(lineBytes,
+            parsed.getRowKeyOffset(),
+            parsed.getRowKeyLength());
+      // Retrieve timestamp if exists
+      ts = parsed.getTimestamp(ts);
+      cellVisibilityExpr = parsed.getCellVisibility();
+      ttl = parsed.getCellTTL();
+
+      // create tags for the parsed line
+      if (hfileOutPath != null) {
+        tags.clear();
+        if (cellVisibilityExpr != null) {
+          tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
+            cellVisibilityExpr));
+        }
+        // Add TTL directly to the KV so we can vary them when packing more than one KV
+        // into puts
+        if (ttl > 0) {
+          tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
+        }
+      }
+      Put put = new Put(rowKey.copyBytes());
+      for (int i = 0; i < parsed.getColumnCount(); i++) {
+        if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
+            || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
+            || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns
+            && parsed.getColumnLength(i) == 0)) {
+          continue;
+        }
+        populatePut(lineBytes, parsed, put, i);
+      }
+      context.write(rowKey, put);
+    } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
+        | InvalidLabelException badLine) {
+      if (logBadLines) {
+        System.err.println(value);
+      }
+      System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
+      if (skipBadLines) {
+        incrementBadLineCount(1);
+        return;
+      }
+      throw new IOException(badLine);
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+  }
+
+  protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put,
+      int i) throws BadTsvLineException, IOException {
+    Cell cell = null;
+    if (hfileOutPath == null) {
+      cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+          parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+          parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes,
+          parsed.getColumnOffset(i), parsed.getColumnLength(i));
+      if (cellVisibilityExpr != null) {
+        // We won't be validating the expression here. The Visibility CP will do
+        // the validation
+        put.setCellVisibility(new CellVisibility(cellVisibilityExpr));
+      }
+      if (ttl > 0) {
+        put.setTTL(ttl);
+      }
+    } else {
+      // Creating the KV which needs to be directly written to HFiles. Using the Facade
+      // KVCreator for creation of kvs.
+      cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+          parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+          parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i),
+          parsed.getColumnLength(i), tags);
+    }
+    put.add(cell);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
new file mode 100644
index 0000000..a3b095c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+import java.io.IOException;
+
+/**
+ * Write table content out to map output files.
+ */
+@InterfaceAudience.Public
+public class TsvImporterTextMapper
+extends Mapper<LongWritable, Text, ImmutableBytesWritable, Text>
+{
+
+  /** Column seperator */
+  private String separator;
+
+  /** Should skip bad lines */
+  private boolean skipBadLines;
+  private Counter badLineCount;
+  private boolean logBadLines;
+
+  private ImportTsv.TsvParser parser;
+
+  public boolean getSkipBadLines() {
+    return skipBadLines;
+  }
+
+  public Counter getBadLineCount() {
+    return badLineCount;
+  }
+
+  public void incrementBadLineCount(int count) {
+    this.badLineCount.increment(count);
+  }
+
+  /**
+   * Handles initializing this class with objects specific to it (i.e., the parser).
+   * Common initialization that might be leveraged by a subsclass is done in
+   * <code>doSetup</code>. Hence a subclass may choose to override this method
+   * and call <code>doSetup</code> as well before handling it's own custom params.
+   *
+   * @param context
+   */
+  @Override
+  protected void setup(Context context) {
+    doSetup(context);
+
+    Configuration conf = context.getConfiguration();
+
+    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
+    if (parser.getRowKeyColumnIndex() == -1) {
+      throw new RuntimeException("No row key column specified");
+    }
+  }
+
+  /**
+   * Handles common parameter initialization that a subclass might want to leverage.
+   * @param context
+   */
+  protected void doSetup(Context context) {
+    Configuration conf = context.getConfiguration();
+
+    // If a custom separator has been used,
+    // decode it back from Base64 encoding.
+    separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+    if (separator == null) {
+      separator = ImportTsv.DEFAULT_SEPARATOR;
+    } else {
+      separator = new String(Base64.decode(separator));
+    }
+
+    skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
+    logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
+    badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+  }
+
+  /**
+   * Convert a line of TSV text into an HBase table row.
+   */
+  @Override
+  public void map(LongWritable offset, Text value, Context context) throws IOException {
+    try {
+      Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
+      ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
+          value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
+      context.write(rowKey, value);
+    } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
+      if (logBadLines) {
+        System.err.println(value);
+      }
+      System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
+      if (skipBadLines) {
+        incrementBadLineCount(1);
+        return;
+      }
+      throw new IOException(badLine);
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+      Thread.currentThread().interrupt();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
new file mode 100644
index 0000000..a83a88f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hbase.Tag;
+
+/**
+ * Interface to convert visibility expressions into Tags for storing along with Cells in HFiles.
+ */
+@InterfaceAudience.Public
+public interface VisibilityExpressionResolver extends Configurable {
+
+  /**
+   * Giving a chance for the initialization.
+   */
+  void init();
+
+  /**
+   * Convert visibility expression into tags to be serialized.
+   * @param visExpression the label expression
+   * @return The list of tags corresponds to the visibility expression. These tags will be stored
+   *         along with the Cells.
+   */
+  List<Tag> createVisibilityExpTags(String visExpression) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
new file mode 100644
index 0000000..8b4e967
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
@@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.EOFException;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WAL.Entry;
+import org.apache.hadoop.hbase.wal.WAL.Reader;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Simple {@link InputFormat} for {@link org.apache.hadoop.hbase.wal.WAL} files.
+ */
+@InterfaceAudience.Public
+public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
+  private static final Log LOG = LogFactory.getLog(WALInputFormat.class);
+
+  public static final String START_TIME_KEY = "wal.start.time";
+  public static final String END_TIME_KEY = "wal.end.time";
+
+  /**
+   * {@link InputSplit} for {@link WAL} files. Each split represent
+   * exactly one log file.
+   */
+  static class WALSplit extends InputSplit implements Writable {
+    private String logFileName;
+    private long fileSize;
+    private long startTime;
+    private long endTime;
+
+    /** for serialization */
+    public WALSplit() {}
+
+    /**
+     * Represent an WALSplit, i.e. a single WAL file.
+     * Start- and EndTime are managed by the split, so that WAL files can be
+     * filtered before WALEdits are passed to the mapper(s).
+     * @param logFileName
+     * @param fileSize
+     * @param startTime
+     * @param endTime
+     */
+    public WALSplit(String logFileName, long fileSize, long startTime, long endTime) {
+      this.logFileName = logFileName;
+      this.fileSize = fileSize;
+      this.startTime = startTime;
+      this.endTime = endTime;
+    }
+
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return fileSize;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      // TODO: Find the data node with the most blocks for this WAL?
+      return new String[] {};
+    }
+
+    public String getLogFileName() {
+      return logFileName;
+    }
+
+    public long getStartTime() {
+      return startTime;
+    }
+
+    public long getEndTime() {
+      return endTime;
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      logFileName = in.readUTF();
+      fileSize = in.readLong();
+      startTime = in.readLong();
+      endTime = in.readLong();
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      out.writeUTF(logFileName);
+      out.writeLong(fileSize);
+      out.writeLong(startTime);
+      out.writeLong(endTime);
+    }
+
+    @Override
+    public String toString() {
+      return logFileName + " (" + startTime + ":" + endTime + ") length:" + fileSize;
+    }
+  }
+
+  /**
+   * {@link RecordReader} for an {@link WAL} file.
+   * Implementation shared with deprecated HLogInputFormat.
+   */
+  static abstract class WALRecordReader<K extends WALKey> extends RecordReader<K, WALEdit> {
+    private Reader reader = null;
+    // visible until we can remove the deprecated HLogInputFormat
+    Entry currentEntry = new Entry();
+    private long startTime;
+    private long endTime;
+    private Configuration conf;
+    private Path logFile;
+    private long currentPos;
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context)
+        throws IOException, InterruptedException {
+      WALSplit hsplit = (WALSplit)split;
+      logFile = new Path(hsplit.getLogFileName());
+      conf = context.getConfiguration();
+      LOG.info("Opening reader for "+split);
+      openReader(logFile);
+      this.startTime = hsplit.getStartTime();
+      this.endTime = hsplit.getEndTime();
+    }
+
+    private void openReader(Path path) throws IOException
+    {
+      closeReader();
+      reader = AbstractFSWALProvider.openReader(path, conf);
+      seek();
+      setCurrentPath(path);
+    }
+
+    private void setCurrentPath(Path path) {
+      this.logFile = path;
+    }
+
+    private void closeReader() throws IOException {
+      if (reader != null) {
+        reader.close();
+        reader = null;
+      }
+    }
+
+    private void seek() throws IOException {
+      if (currentPos != 0) {
+        reader.seek(currentPos);
+      }
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      if (reader == null) return false;
+      this.currentPos = reader.getPosition();
+      Entry temp;
+      long i = -1;
+      try {
+        do {
+          // skip older entries
+          try {
+            temp = reader.next(currentEntry);
+            i++;
+          } catch (EOFException x) {
+            LOG.warn("Corrupted entry detected. Ignoring the rest of the file."
+                + " (This is normal when a RegionServer crashed.)");
+            return false;
+          }
+        } while (temp != null && temp.getKey().getWriteTime() < startTime);
+
+        if (temp == null) {
+          if (i > 0) LOG.info("Skipped " + i + " entries.");
+          LOG.info("Reached end of file.");
+          return false;
+        } else if (i > 0) {
+          LOG.info("Skipped " + i + " entries, until ts: " + temp.getKey().getWriteTime() + ".");
+        }
+        boolean res = temp.getKey().getWriteTime() <= endTime;
+        if (!res) {
+          LOG.info("Reached ts: " + temp.getKey().getWriteTime()
+              + " ignoring the rest of the file.");
+        }
+        return res;
+      } catch (IOException e) {
+        Path archivedLog = AbstractFSWALProvider.getArchivedLogPath(logFile, conf);
+        if (logFile != archivedLog) {
+          openReader(archivedLog);
+          // Try call again in recursion
+          return nextKeyValue();
+        } else {
+          throw e;
+        }
+      }
+    }
+
+    @Override
+    public WALEdit getCurrentValue() throws IOException, InterruptedException {
+      return currentEntry.getEdit();
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      // N/A depends on total number of entries, which is unknown
+      return 0;
+    }
+
+    @Override
+    public void close() throws IOException {
+      LOG.info("Closing reader");
+      if (reader != null) this.reader.close();
+    }
+  }
+
+  /**
+   * handler for non-deprecated WALKey version. fold into WALRecordReader once we no longer
+   * need to support HLogInputFormat.
+   */
+  static class WALKeyRecordReader extends WALRecordReader<WALKey> {
+    @Override
+    public WALKey getCurrentKey() throws IOException, InterruptedException {
+      return currentEntry.getKey();
+    }
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException,
+      InterruptedException {
+    return getSplits(context, START_TIME_KEY, END_TIME_KEY);
+  }
+
+  /**
+   * implementation shared with deprecated HLogInputFormat
+   */
+  List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
+      throws IOException, InterruptedException {
+    Configuration conf = context.getConfiguration();
+    boolean ignoreMissing = conf.getBoolean(WALPlayer.IGNORE_MISSING_FILES, false);
+    Path[] inputPaths = getInputPaths(conf);
+    long startTime = conf.getLong(startKey, Long.MIN_VALUE);
+    long endTime = conf.getLong(endKey, Long.MAX_VALUE);
+
+    List<FileStatus> allFiles = new ArrayList<FileStatus>();
+    for(Path inputPath: inputPaths){
+      FileSystem fs = inputPath.getFileSystem(conf);
+      try {
+        List<FileStatus> files = getFiles(fs, inputPath, startTime, endTime);
+        allFiles.addAll(files);
+      } catch (FileNotFoundException e) {
+        if (ignoreMissing) {
+          LOG.warn("File "+ inputPath +" is missing. Skipping it.");
+          continue;
+        }
+        throw e;
+      }
+    }
+    List<InputSplit> splits = new ArrayList<InputSplit>(allFiles.size());
+    for (FileStatus file : allFiles) {
+      splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
+    }
+    return splits;
+  }
+
+  private Path[] getInputPaths(Configuration conf) {
+    String inpDirs = conf.get(FileInputFormat.INPUT_DIR);
+    return StringUtils.stringToPath(
+      inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ",")));
+  }
+
+  private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
+      throws IOException {
+    List<FileStatus> result = new ArrayList<>();
+    LOG.debug("Scanning " + dir.toString() + " for WAL files");
+
+    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
+    if (!iter.hasNext()) return Collections.emptyList();
+    while (iter.hasNext()) {
+      LocatedFileStatus file = iter.next();
+      if (file.isDirectory()) {
+        // recurse into sub directories
+        result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
+      } else {
+        String name = file.getPath().toString();
+        int idx = name.lastIndexOf('.');
+        if (idx > 0) {
+          try {
+            long fileStartTime = Long.parseLong(name.substring(idx+1));
+            if (fileStartTime <= endTime) {
+              LOG.info("Found: " + file);
+              result.add(file);
+            }
+          } catch (NumberFormatException x) {
+            idx = 0;
+          }
+        }
+        if (idx == 0) {
+          LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
+        }
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public RecordReader<WALKey, WALEdit> createRecordReader(InputSplit split,
+      TaskAttemptContext context) throws IOException, InterruptedException {
+    return new WALKeyRecordReader();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
new file mode 100644
index 0000000..b1e655c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
@@ -0,0 +1,384 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A tool to replay WAL files as a M/R job.
+ * The WAL can be replayed for a set of tables or all tables,
+ * and a time range can be provided (in milliseconds).
+ * The WAL is filtered to the passed set of tables and  the output
+ * can optionally be mapped to another set of tables.
+ *
+ * WAL replay can also generate HFiles for later bulk importing,
+ * in that case the WAL is replayed for a single table only.
+ */
+@InterfaceAudience.Public
+public class WALPlayer extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(WALPlayer.class);
+  final static String NAME = "WALPlayer";
+  public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
+  public final static String TABLES_KEY = "wal.input.tables";
+  public final static String TABLE_MAP_KEY = "wal.input.tablesmap";
+  public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator";
+  public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files";
+
+
+  // This relies on Hadoop Configuration to handle warning about deprecated configs and
+  // to set the correct non-deprecated configs when an old one shows up.
+  static {
+    Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
+    Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
+    Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
+  }
+
+  private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+  public WALPlayer(){
+  }
+
+  protected WALPlayer(final Configuration c) {
+    super(c);
+  }
+
+  /**
+   * A mapper that just writes out KeyValues.
+   * This one can be used together with {@link KeyValueSortReducer}
+   */
+  static class WALKeyValueMapper
+    extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
+    private byte[] table;
+
+    @Override
+    public void map(WALKey key, WALEdit value,
+      Context context)
+    throws IOException {
+      try {
+        // skip all other tables
+        if (Bytes.equals(table, key.getTablename().getName())) {
+          for (Cell cell : value.getCells()) {
+            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+            if (WALEdit.isMetaEditFamily(kv)) {
+              continue;
+            }
+            context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv);
+          }
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+
+    @Override
+    public void setup(Context context) throws IOException {
+      // only a single table is supported when HFiles are generated with HFileOutputFormat
+      String[] tables = context.getConfiguration().getStrings(TABLES_KEY);
+      if (tables == null || tables.length != 1) {
+        // this can only happen when WALMapper is used directly by a class other than WALPlayer
+        throw new IOException("Exactly one table must be specified for bulk HFile case.");
+      }
+      table = Bytes.toBytes(tables[0]);
+
+    }
+
+  }
+
+  /**
+   * A mapper that writes out {@link Mutation} to be directly applied to
+   * a running HBase instance.
+   */
+  protected static class WALMapper
+  extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
+    private Map<TableName, TableName> tables = new TreeMap<>();
+
+    @Override
+    public void map(WALKey key, WALEdit value, Context context)
+    throws IOException {
+      try {
+        if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
+          TableName targetTable = tables.isEmpty() ?
+                key.getTablename() :
+                tables.get(key.getTablename());
+          ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
+          Put put = null;
+          Delete del = null;
+          Cell lastCell = null;
+          for (Cell cell : value.getCells()) {
+            // filtering WAL meta entries
+            if (WALEdit.isMetaEditFamily(cell)) {
+              continue;
+            }
+
+            // Allow a subclass filter out this cell.
+            if (filter(context, cell)) {
+              // A WALEdit may contain multiple operations (HBASE-3584) and/or
+              // multiple rows (HBASE-5229).
+              // Aggregate as much as possible into a single Put/Delete
+              // operation before writing to the context.
+              if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
+                  || !CellUtil.matchingRow(lastCell, cell)) {
+                // row or type changed, write out aggregate KVs.
+                if (put != null) {
+                  context.write(tableOut, put);
+                }
+                if (del != null) {
+                  context.write(tableOut, del);
+                }
+                if (CellUtil.isDelete(cell)) {
+                  del = new Delete(CellUtil.cloneRow(cell));
+                } else {
+                  put = new Put(CellUtil.cloneRow(cell));
+                }
+              }
+              if (CellUtil.isDelete(cell)) {
+                del.add(cell);
+              } else {
+                put.add(cell);
+              }
+            }
+            lastCell = cell;
+          }
+          // write residual KVs
+          if (put != null) {
+            context.write(tableOut, put);
+          }
+          if (del != null) {
+            context.write(tableOut, del);
+          }
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+
+    protected boolean filter(Context context, final Cell cell) {
+      return true;
+    }
+
+    @Override
+    protected void
+        cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
+            throws IOException, InterruptedException {
+      super.cleanup(context);
+    }
+
+    @Override
+    public void setup(Context context) throws IOException {
+      String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
+      String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
+      if (tableMap == null) {
+        tableMap = tablesToUse;
+      }
+      if (tablesToUse == null) {
+        // Then user wants all tables.
+      } else if (tablesToUse.length != tableMap.length) {
+        // this can only happen when WALMapper is used directly by a class other than WALPlayer
+        throw new IOException("Incorrect table mapping specified .");
+      }
+      int i = 0;
+      if (tablesToUse != null) {
+        for (String table : tablesToUse) {
+          tables.put(TableName.valueOf(table),
+            TableName.valueOf(tableMap[i++]));
+        }
+      }
+    }
+  }
+
+  void setupTime(Configuration conf, String option) throws IOException {
+    String val = conf.get(option);
+    if (null == val) {
+      return;
+    }
+    long ms;
+    try {
+      // first try to parse in user friendly form
+      ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
+    } catch (ParseException pe) {
+      try {
+        // then see if just a number of ms's was specified
+        ms = Long.parseLong(val);
+      } catch (NumberFormatException nfe) {
+        throw new IOException(option
+            + " must be specified either in the form 2001-02-20T16:35:06.99 "
+            + "or as number of milliseconds");
+      }
+    }
+    conf.setLong(option, ms);
+  }
+
+  /**
+   * Sets up the actual job.
+   *
+   * @param args  The command line parameters.
+   * @return The newly created job.
+   * @throws IOException When setting up the job fails.
+   */
+  public Job createSubmittableJob(String[] args) throws IOException {
+    Configuration conf = getConf();
+    setupTime(conf, WALInputFormat.START_TIME_KEY);
+    setupTime(conf, WALInputFormat.END_TIME_KEY);
+    String inputDirs = args[0];
+    String[] tables = args[1].split(",");
+    String[] tableMap;
+    if (args.length > 2) {
+      tableMap = args[2].split(",");
+      if (tableMap.length != tables.length) {
+        throw new IOException("The same number of tables and mapping must be provided.");
+      }
+    } else {
+      // if not mapping is specified map each table to itself
+      tableMap = tables;
+    }
+    conf.setStrings(TABLES_KEY, tables);
+    conf.setStrings(TABLE_MAP_KEY, tableMap);
+    conf.set(FileInputFormat.INPUT_DIR, inputDirs);
+    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
+    job.setJarByClass(WALPlayer.class);
+
+    job.setInputFormatClass(WALInputFormat.class);
+    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+
+    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+    if (hfileOutPath != null) {
+      LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
+
+      // the bulk HFile case
+      if (tables.length != 1) {
+        throw new IOException("Exactly one table must be specified for the bulk export option");
+      }
+      TableName tableName = TableName.valueOf(tables[0]);
+      job.setMapperClass(WALKeyValueMapper.class);
+      job.setReducerClass(KeyValueSortReducer.class);
+      Path outputDir = new Path(hfileOutPath);
+      FileOutputFormat.setOutputPath(job, outputDir);
+      job.setMapOutputValueClass(KeyValue.class);
+      try (Connection conn = ConnectionFactory.createConnection(conf);
+          Table table = conn.getTable(tableName);
+          RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+      }
+      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+          org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+    } else {
+      // output to live cluster
+      job.setMapperClass(WALMapper.class);
+      job.setOutputFormatClass(MultiTableOutputFormat.class);
+      TableMapReduceUtil.addDependencyJars(job);
+      TableMapReduceUtil.initCredentials(job);
+      // No reducers.
+      job.setNumReduceTasks(0);
+    }
+    String codecCls = WALCellCodec.getWALCellCodecClass(conf);
+    try {
+      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls));
+    } catch (Exception e) {
+      throw new IOException("Cannot determine wal codec class " + codecCls, e);
+    }
+    return job;
+  }
+
+
+  /**
+   * Print usage
+   * @param errorMsg Error message.  Can be null.
+   */
+  private void usage(final String errorMsg) {
+    if (errorMsg != null && errorMsg.length() > 0) {
+      System.err.println("ERROR: " + errorMsg);
+    }
+    System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
+    System.err.println("Read all WAL entries for <tables>.");
+    System.err.println("If no tables (\"\") are specific, all tables are imported.");
+    System.err.println("(Careful, even hbase:meta entries will be imported"+
+      " in that case.)");
+    System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
+    System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
+    System.err.println("<tableMapping> is a command separated list of targettables.");
+    System.err.println("If specified, each table in <tables> must have a mapping.\n");
+    System.err.println("By default " + NAME + " will load data directly into HBase.");
+    System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
+    System.err.println("  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
+    System.err.println("  (Only one table can be specified, and no mapping is allowed!)");
+    System.err.println("Other options: (specify time range to WAL edit to consider)");
+    System.err.println("  -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
+    System.err.println("  -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
+    System.err.println("   -D " + JOB_NAME_CONF_KEY
+      + "=jobName - use the specified mapreduce job name for the wal player");
+    System.err.println("For performance also consider the following options:\n"
+      + "  -Dmapreduce.map.speculative=false\n"
+      + "  -Dmapreduce.reduce.speculative=false");
+  }
+
+  /**
+   * Main entry point.
+   *
+   * @param args  The command line parameters.
+   * @throws Exception When running the job fails.
+   */
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
+    System.exit(ret);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length < 2) {
+      usage("Wrong number of arguments: " + args.length);
+      System.exit(-1);
+    }
+    Job job = createSubmittableJob(args);
+    return job.waitForCompletion(true) ? 0 : 1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
new file mode 100644
index 0000000..b1f15ba
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
@@ -0,0 +1,26 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
+Input/OutputFormats, a table indexing MapReduce job, and utility methods.
+
+<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
+in the HBase Reference Guide for mapreduce over hbase documentation.
+*/
+package org.apache.hadoop.hbase.mapreduce;

[05/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
deleted file mode 100644
index efcf91e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
+++ /dev/null
@@ -1,571 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestImportTsv implements Configurable {
-
-  private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
-  protected static final String NAME = TestImportTsv.class.getSimpleName();
-  protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
-  // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.
-  protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
-  /**
-   * Force use of combiner in doMROnTableTest. Boolean. Default is true.
-   */
-  protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
-  private final String FAMILY = "FAM";
-  private TableName tn;
-  private Map<String, String> args;
-
-  @Rule
-  public ExpectedException exception = ExpectedException.none();
-
-  public Configuration getConf() {
-    return util.getConfiguration();
-  }
-
-  public void setConf(Configuration conf) {
-    throw new IllegalArgumentException("setConf not supported");
-  }
-
-  @BeforeClass
-  public static void provisionCluster() throws Exception {
-    util.startMiniCluster();
-  }
-
-  @AfterClass
-  public static void releaseCluster() throws Exception {
-    util.shutdownMiniCluster();
-  }
-
-  @Before
-  public void setup() throws Exception {
-    tn = TableName.valueOf("test-" + UUID.randomUUID());
-    args = new HashMap<>();
-    // Prepare the arguments required for the test.
-    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");
-    args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");
-  }
-
-  @Test
-  public void testMROnTable() throws Exception {
-    util.createTable(tn, FAMILY);
-    doMROnTableTest(null, 1);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testMROnTableWithTimestamp() throws Exception {
-    util.createTable(tn, FAMILY);
-    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
-    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
-    String data = "KEY,1234,VALUE1,VALUE2\n";
-
-    doMROnTableTest(data, 1);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testMROnTableWithCustomMapper()
-  throws Exception {
-    util.createTable(tn, FAMILY);
-    args.put(ImportTsv.MAPPER_CONF_KEY,
-        "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");
-
-    doMROnTableTest(null, 3);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testBulkOutputWithoutAnExistingTable() throws Exception {
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-
-    doMROnTableTest(null, 3);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testBulkOutputWithAnExistingTable() throws Exception {
-    util.createTable(tn, FAMILY);
-
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-
-    doMROnTableTest(null, 3);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {
-    util.createTable(tn, FAMILY);
-
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-    args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");
-    doMROnTableTest(null, 3);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
-    String INPUT_FILE = "InputFile1.csv";
-    // Prepare the arguments required for the test.
-    String[] args =
-        new String[] {
-            "-D" + ImportTsv.MAPPER_CONF_KEY
-                + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
-            "-D" + ImportTsv.COLUMNS_CONF_KEY
-                + "=HBASE_ROW_KEY,FAM:A,FAM:B",
-            "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
-            "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
-                tn.getNameAsString(),
-            INPUT_FILE
-            };
-    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
-        new Configuration(util.getConfiguration()),
-        new ImportTsv() {
-          @Override
-          public int run(String[] args) throws Exception {
-            Job job = createSubmittableJob(getConf(), args);
-            assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));
-            assertTrue(job.getReducerClass().equals(TextSortReducer.class));
-            assertTrue(job.getMapOutputValueClass().equals(Text.class));
-            return 0;
-          }
-        }, args));
-    // Delete table created by createSubmittableJob.
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
-    args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
-    String data = "KEY\u001bVALUE4\u001bVALUE8\n";
-    doMROnTableTest(data, 4);
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
-    String[] args = new String[] { tn.getNameAsString(), "/inputFile" };
-
-    Configuration conf = new Configuration(util.getConfiguration());
-    conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
-    conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
-    conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
-    exception.expect(TableNotFoundException.class);
-    assertEquals("running test job configuration failed.", 0,
-        ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {
-              @Override public int run(String[] args) throws Exception {
-                createSubmittableJob(getConf(), args);
-                return 0;
-              }
-            }, args));
-  }
-
-  @Test
-  public void testMRWithoutAnExistingTable() throws Exception {
-    String[] args =
-        new String[] { tn.getNameAsString(), "/inputFile" };
-
-    exception.expect(TableNotFoundException.class);
-    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
-        new Configuration(util.getConfiguration()),
-        new ImportTsv() {
-          @Override
-          public int run(String[] args) throws Exception {
-            createSubmittableJob(getConf(), args);
-            return 0;
-          }
-        }, args));
-  }
-
-  @Test
-  public void testJobConfigurationsWithDryMode() throws Exception {
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
-    String INPUT_FILE = "InputFile1.csv";
-    // Prepare the arguments required for the test.
-    String[] argsArray = new String[] {
-        "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
-        "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
-        "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
-        "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
-        tn.getNameAsString(),
-        INPUT_FILE };
-    assertEquals("running test job configuration failed.", 0, ToolRunner.run(
-        new Configuration(util.getConfiguration()),
-        new ImportTsv() {
-          @Override
-          public int run(String[] args) throws Exception {
-            Job job = createSubmittableJob(getConf(), args);
-            assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
-            return 0;
-          }
-        }, argsArray));
-    // Delete table created by createSubmittableJob.
-    util.deleteTable(tn);
-  }
-
-  @Test
-  public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {
-    util.createTable(tn, FAMILY);
-    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
-    doMROnTableTest(null, 1);
-    // Dry mode should not delete an existing table. If it's not present,
-    // this will throw TableNotFoundException.
-    util.deleteTable(tn);
-  }
-
-  /**
-   * If table is not present in non-bulk mode, dry run should fail just like
-   * normal mode.
-   */
-  @Test
-  public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {
-    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
-    exception.expect(TableNotFoundException.class);
-    doMROnTableTest(null, 1);
-  }
-
-  @Test public void testDryModeWithBulkOutputAndTableExists() throws Exception {
-    util.createTable(tn, FAMILY);
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
-    doMROnTableTest(null, 1);
-    // Dry mode should not delete an existing table. If it's not present,
-    // this will throw TableNotFoundException.
-    util.deleteTable(tn);
-  }
-
-  /**
-   * If table is not present in bulk mode and create.table is not set to yes,
-   * import should fail with TableNotFoundException.
-   */
-  @Test
-  public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws
-      Exception {
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
-    args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
-    exception.expect(TableNotFoundException.class);
-    doMROnTableTest(null, 1);
-  }
-
-  @Test
-  public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {
-    // Prepare the arguments required for the test.
-    Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-    args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
-    args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");
-    doMROnTableTest(null, 1);
-    // Verify temporary table was deleted.
-    exception.expect(TableNotFoundException.class);
-    util.deleteTable(tn);
-  }
-
-  /**
-   * If there are invalid data rows as inputs, then only those rows should be ignored.
-   */
-  @Test
-  public void testTsvImporterTextMapperWithInvalidData() throws Exception {
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
-    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
-    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
-    // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS
-    String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";
-    doMROnTableTest(util, tn, FAMILY, data, args, 1, 4);
-    util.deleteTable(tn);
-  }
-  
-  @Test
-  public void testSkipEmptyColumns() throws Exception {
-    Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
-    args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
-    args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
-    args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
-    args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");
-    // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4
-    String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";
-    doMROnTableTest(util, tn, FAMILY, data, args, 1, 3);
-    util.deleteTable(tn);
-  }
-
-  private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {
-    return doMROnTableTest(util, tn, FAMILY, data, args, valueMultiplier,-1);
-  }
-
-  protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
-      String family, String data, Map<String, String> args) throws Exception {
-    return doMROnTableTest(util, table, family, data, args, 1,-1);
-  }
-
-  /**
-   * Run an ImportTsv job and perform basic validation on the results.
-   * Returns the ImportTsv <code>Tool</code> instance so that other tests can
-   * inspect it for further validation as necessary. This method is static to
-   * insure non-reliance on instance's util/conf facilities.
-   * @param args Any arguments to pass BEFORE inputFile path is appended.
-   * @return The Tool instance used to run the test.
-   */
-  protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
-      String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
-  throws Exception {
-    Configuration conf = new Configuration(util.getConfiguration());
-
-    // populate input file
-    FileSystem fs = FileSystem.get(conf);
-    Path inputPath = fs.makeQualified(
-            new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
-    FSDataOutputStream op = fs.create(inputPath, true);
-    if (data == null) {
-      data = "KEY\u001bVALUE1\u001bVALUE2\n";
-    }
-    op.write(Bytes.toBytes(data));
-    op.close();
-    LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
-    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
-      LOG.debug("Forcing combiner.");
-      conf.setInt("mapreduce.map.combine.minspills", 1);
-    }
-
-    // Build args array.
-    String[] argsArray = new String[args.size() + 2];
-    Iterator it = args.entrySet().iterator();
-    int i = 0;
-    while (it.hasNext()) {
-      Map.Entry pair = (Map.Entry) it.next();
-      argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
-      i++;
-    }
-    argsArray[i] = table.getNameAsString();
-    argsArray[i + 1] = inputPath.toString();
-
-    // run the import
-    Tool tool = new ImportTsv();
-    LOG.debug("Running ImportTsv with arguments: " + argsArray);
-    assertEquals(0, ToolRunner.run(conf, tool, argsArray));
-
-    // Perform basic validation. If the input args did not include
-    // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
-    // Otherwise, validate presence of hfiles.
-    boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
-        "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
-    if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
-      if (isDryRun) {
-        assertFalse(String.format("Dry run mode, %s should not have been created.",
-                 ImportTsv.BULK_OUTPUT_CONF_KEY),
-            fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
-      } else {
-        validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
-      }
-    } else {
-      validateTable(conf, table, family, valueMultiplier, isDryRun);
-    }
-
-    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
-      LOG.debug("Deleting test subdirectory");
-      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
-    }
-    return tool;
-  }
-
-  /**
-   * Confirm ImportTsv via data in online table.
-   */
-  private static void validateTable(Configuration conf, TableName tableName,
-      String family, int valueMultiplier, boolean isDryRun) throws IOException {
-
-    LOG.debug("Validating table.");
-    Connection connection = ConnectionFactory.createConnection(conf);
-    Table table = connection.getTable(tableName);
-    boolean verified = false;
-    long pause = conf.getLong("hbase.client.pause", 5 * 1000);
-    int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
-    for (int i = 0; i < numRetries; i++) {
-      try {
-        Scan scan = new Scan();
-        // Scan entire family.
-        scan.addFamily(Bytes.toBytes(family));
-        ResultScanner resScanner = table.getScanner(scan);
-        int numRows = 0;
-        for (Result res : resScanner) {
-          numRows++;
-          assertEquals(2, res.size());
-          List<Cell> kvs = res.listCells();
-          assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
-          assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
-          assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
-          assertTrue(CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
-          // Only one result set is expected, so let it loop.
-        }
-        if (isDryRun) {
-          assertEquals(0, numRows);
-        } else {
-          assertEquals(1, numRows);
-        }
-        verified = true;
-        break;
-      } catch (NullPointerException e) {
-        // If here, a cell was empty. Presume its because updates came in
-        // after the scanner had been opened. Wait a while and retry.
-      }
-      try {
-        Thread.sleep(pause);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    table.close();
-    connection.close();
-    assertTrue(verified);
-  }
-
-  /**
-   * Confirm ImportTsv via HFiles on fs.
-   */
-  private static void validateHFiles(FileSystem fs, String outputPath, String family,
-      int expectedKVCount) throws IOException {
-    // validate number and content of output columns
-    LOG.debug("Validating HFiles.");
-    Set<String> configFamilies = new HashSet<>();
-    configFamilies.add(family);
-    Set<String> foundFamilies = new HashSet<>();
-    int actualKVCount = 0;
-    for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
-      String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
-      String cf = elements[elements.length - 1];
-      foundFamilies.add(cf);
-      assertTrue(
-        String.format(
-          "HFile output contains a column family (%s) not present in input families (%s)",
-          cf, configFamilies),
-          configFamilies.contains(cf));
-      for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
-        assertTrue(
-          String.format("HFile %s appears to contain no data.", hfile.getPath()),
-          hfile.getLen() > 0);
-        // count the number of KVs from all the hfiles
-        if (expectedKVCount > -1) {
-          actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
-        }
-      }
-    }
-    assertTrue(String.format("HFile output does not contain the input family '%s'.", family),
-        foundFamilies.contains(family));
-    if (expectedKVCount > -1) {
-      assertTrue(String.format(
-        "KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
-        expectedKVCount), actualKVCount == expectedKVCount);
-    }
-  }
-
-  /**
-   * Method returns the total KVs in given hfile
-   * @param fs File System
-   * @param p HFile path
-   * @return KV count in the given hfile
-   * @throws IOException
-   */
-  private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
-    Configuration conf = util.getConfiguration();
-    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
-    reader.loadFileInfo();
-    HFileScanner scanner = reader.getScanner(false, false);
-    scanner.seekTo();
-    int count = 0;
-    do {
-      count++;
-    } while (scanner.next());
-    reader.close();
-    return count;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
deleted file mode 100644
index 3c38102..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
+++ /dev/null
@@ -1,314 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterables;
-
-/**
- * Tests for {@link TsvParser}.
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestImportTsvParser {
-
-  private void assertBytesEquals(byte[] a, byte[] b) {
-    assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
-  }
-
-  private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
-    ArrayList<String> parsedCols = new ArrayList<>();
-    for (int i = 0; i < parsed.getColumnCount(); i++) {
-      parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i),
-          parsed.getColumnLength(i)));
-    }
-    if (!Iterables.elementsEqual(parsedCols, expected)) {
-      fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:"
-          + Joiner.on(",").join(parsedCols));
-    }
-  }
-
-  @Test
-  public void testTsvParserSpecParsing() {
-    TsvParser parser;
-
-    parser = new TsvParser("HBASE_ROW_KEY", "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertFalse(parser.hasTimestamp());
-
-    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertFalse(parser.hasTimestamp());
-
-    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
-    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertFalse(parser.hasTimestamp());
-
-    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
-    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertTrue(parser.hasTimestamp());
-    assertEquals(2, parser.getTimestampKeyColumnIndex());
-
-    parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY",
-        "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
-    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertTrue(parser.hasTimestamp());
-    assertEquals(2, parser.getTimestampKeyColumnIndex());
-    assertEquals(4, parser.getAttributesKeyColumnIndex());
-
-    parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY",
-        "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
-    assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
-    assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
-    assertEquals(4, parser.getRowKeyColumnIndex());
-    assertTrue(parser.hasTimestamp());
-    assertEquals(2, parser.getTimestampKeyColumnIndex());
-    assertEquals(0, parser.getAttributesKeyColumnIndex());
-  }
-
-  @Test
-  public void testTsvParser() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
-    assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
-    assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
-    assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
-    assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
-    assertNull(parser.getFamily(2));
-    assertNull(parser.getQualifier(2));
-    assertEquals(2, parser.getRowKeyColumnIndex());
-
-    assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex());
-
-    byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
-    ParsedLine parsed = parser.parse(line, line.length);
-    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
-  }
-
-  @Test
-  public void testTsvParserWithTimestamp() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
-    assertNull(parser.getFamily(0));
-    assertNull(parser.getQualifier(0));
-    assertNull(parser.getFamily(1));
-    assertNull(parser.getQualifier(1));
-    assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
-    assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertEquals(1, parser.getTimestampKeyColumnIndex());
-
-    byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
-    ParsedLine parsed = parser.parse(line, line.length);
-    assertEquals(1234l, parsed.getTimestamp(-1));
-    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
-  }
-
-  /**
-   * Test cases that throw BadTsvLineException
-   */
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
-    byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
-    parser.parse(line, line.length);
-  }
-
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
-    byte[] line = Bytes.toBytes("");
-    parser.parse(line, line.length);
-  }
-
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
-    byte[] line = Bytes.toBytes("key_only");
-    parser.parse(line, line.length);
-  }
-
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
-    byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
-    parser.parse(line, line.length);
-  }
-
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
-    assertEquals(1, parser.getTimestampKeyColumnIndex());
-    byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
-    ParsedLine parsed = parser.parse(line, line.length);
-    assertEquals(-1, parsed.getTimestamp(-1));
-    checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
-  }
-
-  @Test(expected = BadTsvLineException.class)
-  public void testTsvParserNoTimestampValue() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
-    assertEquals(2, parser.getTimestampKeyColumnIndex());
-    byte[] line = Bytes.toBytes("rowkey\tval_a");
-    parser.parse(line, line.length);
-  }
-
-  @Test
-  public void testTsvParserParseRowKey() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234");
-    Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length);
-    assertEquals(0, rowKeyOffsets.getFirst().intValue());
-    assertEquals(6, rowKeyOffsets.getSecond().intValue());
-    try {
-      line = Bytes.toBytes("\t\tval_a\t1234");
-      parser.parseRowKey(line, line.length);
-      fail("Should get BadTsvLineException on empty rowkey.");
-    } catch (BadTsvLineException b) {
-
-    }
-    parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
-    assertEquals(1, parser.getRowKeyColumnIndex());
-    line = Bytes.toBytes("val_a\trowkey\t1234");
-    rowKeyOffsets = parser.parseRowKey(line, line.length);
-    assertEquals(6, rowKeyOffsets.getFirst().intValue());
-    assertEquals(6, rowKeyOffsets.getSecond().intValue());
-    try {
-      line = Bytes.toBytes("val_a");
-      rowKeyOffsets = parser.parseRowKey(line, line.length);
-      fail("Should get BadTsvLineException when number of columns less than rowkey position.");
-    } catch (BadTsvLineException b) {
-
-    }
-    parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
-    assertEquals(2, parser.getRowKeyColumnIndex());
-    line = Bytes.toBytes("val_a\t1234\trowkey");
-    rowKeyOffsets = parser.parseRowKey(line, line.length);
-    assertEquals(11, rowKeyOffsets.getFirst().intValue());
-    assertEquals(6, rowKeyOffsets.getSecond().intValue());
-  }
-
-  @Test
-  public void testTsvParseAttributesKey() throws BadTsvLineException {
-    TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t");
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value");
-    ParsedLine parse = parser.parse(line, line.length);
-    assertEquals(18, parse.getAttributeKeyOffset());
-    assertEquals(3, parser.getAttributesKeyColumnIndex());
-    String attributes[] = parse.getIndividualAttributes();
-    assertEquals(attributes[0], "key=>value");
-    try {
-      line = Bytes.toBytes("rowkey\tval_a\t1234");
-      parser.parse(line, line.length);
-      fail("Should get BadTsvLineException on empty rowkey.");
-    } catch (BadTsvLineException b) {
-
-    }
-    parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
-    assertEquals(2, parser.getRowKeyColumnIndex());
-    line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234");
-    parse = parser.parse(line, line.length);
-    assertEquals(0, parse.getAttributeKeyOffset());
-    assertEquals(0, parser.getAttributesKeyColumnIndex());
-    attributes = parse.getIndividualAttributes();
-    assertEquals(attributes[0], "key=>value");
-    try {
-      line = Bytes.toBytes("val_a");
-      ParsedLine parse2 = parser.parse(line, line.length);
-      fail("Should get BadTsvLineException when number of columns less than rowkey position.");
-    } catch (BadTsvLineException b) {
-
-    }
-    parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
-    assertEquals(3, parser.getRowKeyColumnIndex());
-    line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey");
-    parse = parser.parse(line, line.length);
-    assertEquals(1, parser.getAttributesKeyColumnIndex());
-    assertEquals(6, parse.getAttributeKeyOffset());
-    String[] attr = parse.getIndividualAttributes();
-    int i = 0;
-    for(String str :  attr) {
-      assertEquals(("key"+i+"=>"+"value"+i), str );
-      i++;
-    }
-  }
-
-  @Test
-  public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException {
-    TsvParser parser = new TsvParser(
-        "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t");
-    assertEquals(0, parser.getRowKeyColumnIndex());
-    assertEquals(4, parser.getCellVisibilityColumnIndex());
-    byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET");
-    ParsedLine parse = parser.parse(line, line.length);
-    assertEquals(18, parse.getAttributeKeyOffset());
-    assertEquals(3, parser.getAttributesKeyColumnIndex());
-    String attributes[] = parse.getIndividualAttributes();
-    assertEquals(attributes[0], "key=>value");
-    assertEquals(29, parse.getCellVisibilityColumnOffset());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
deleted file mode 100644
index 8187b73..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.Writer;
-import java.text.MessageFormat;
-import java.util.Properties;
-import java.util.jar.JarInputStream;
-import java.util.jar.JarOutputStream;
-import java.util.jar.Manifest;
-
-/**
- * This file was forked from hadoop/common/branches/branch-2@1350012.
- */
-@Category(SmallTests.class)
-public class TestJarFinder {
-
-  @Test
-  public void testJar() throws Exception {
-
-    //picking a class that is for sure in a JAR in the classpath
-    String jar = JarFinder.getJar(LogFactory.class);
-    Assert.assertTrue(new File(jar).exists());
-  }
-
-  private static void delete(File file) throws IOException {
-    if (file.getAbsolutePath().length() < 5) {
-      throw new IllegalArgumentException(
-        MessageFormat.format("Path [{0}] is too short, not deleting",
-                             file.getAbsolutePath()));
-    }
-    if (file.exists()) {
-      if (file.isDirectory()) {
-        File[] children = file.listFiles();
-        if (children != null) {
-          for (File child : children) {
-            delete(child);
-          }
-        }
-      }
-      if (!file.delete()) {
-        throw new RuntimeException(
-          MessageFormat.format("Could not delete path [{0}]",
-                               file.getAbsolutePath()));
-      }
-    }
-  }
-
-  @Test
-  public void testExpandedClasspath() throws Exception {
-    //picking a class that is for sure in a directory in the classpath
-    //in this case the JAR is created on the fly
-    String jar = JarFinder.getJar(TestJarFinder.class);
-    Assert.assertTrue(new File(jar).exists());
-  }
-
-  @Test
-  public void testExistingManifest() throws Exception {
-    File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
-                        TestJarFinder.class.getName() + "-testExistingManifest");
-    delete(dir);
-    dir.mkdirs();
-
-    File metaInfDir = new File(dir, "META-INF");
-    metaInfDir.mkdirs();
-    File manifestFile = new File(metaInfDir, "MANIFEST.MF");
-    Manifest manifest = new Manifest();
-    OutputStream os = new FileOutputStream(manifestFile);
-    manifest.write(os);
-    os.close();
-
-    File propsFile = new File(dir, "props.properties");
-    Writer writer = new FileWriter(propsFile);
-    new Properties().store(writer, "");
-    writer.close();
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    JarOutputStream zos = new JarOutputStream(baos);
-    JarFinder.jarDir(dir, "", zos);
-    JarInputStream jis =
-      new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
-    Assert.assertNotNull(jis.getManifest());
-    jis.close();
-  }
-
-  @Test
-  public void testNoManifest() throws Exception {
-    File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
-                        TestJarFinder.class.getName() + "-testNoManifest");
-    delete(dir);
-    dir.mkdirs();
-    File propsFile = new File(dir, "props.properties");
-    Writer writer = new FileWriter(propsFile);
-    new Properties().store(writer, "");
-    writer.close();
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    JarOutputStream zos = new JarOutputStream(baos);
-    JarFinder.jarDir(dir, "", zos);
-    JarInputStream jis =
-      new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
-    Assert.assertNotNull(jis.getManifest());
-    jis.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
index b6ad2c9..b5b7a0c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
@@ -59,7 +59,6 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HFileTestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
deleted file mode 100644
index 529a448..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ /dev/null
@@ -1,669 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Deque;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableExistsException;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.ClientServiceCallable;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
-import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.Mockito;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
-
-import org.apache.hadoop.hbase.shaded.com.google.protobuf.RpcController;
-import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
-
-/**
- * Test cases for the atomic load error handling of the bulk load functionality.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestLoadIncrementalHFilesSplitRecovery {
-  private static final Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
-
-  static HBaseTestingUtility util;
-  //used by secure subclass
-  static boolean useSecure = false;
-
-  final static int NUM_CFS = 10;
-  final static byte[] QUAL = Bytes.toBytes("qual");
-  final static int ROWCOUNT = 100;
-
-  private final static byte[][] families = new byte[NUM_CFS][];
-
-  @Rule
-  public TestName name = new TestName();
-
-  static {
-    for (int i = 0; i < NUM_CFS; i++) {
-      families[i] = Bytes.toBytes(family(i));
-    }
-  }
-
-  static byte[] rowkey(int i) {
-    return Bytes.toBytes(String.format("row_%08d", i));
-  }
-
-  static String family(int i) {
-    return String.format("family_%04d", i);
-  }
-
-  static byte[] value(int i) {
-    return Bytes.toBytes(String.format("%010d", i));
-  }
-
-  public static void buildHFiles(FileSystem fs, Path dir, int value)
-      throws IOException {
-    byte[] val = value(value);
-    for (int i = 0; i < NUM_CFS; i++) {
-      Path testIn = new Path(dir, family(i));
-
-      TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
-          Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
-    }
-  }
-
-  /**
-   * Creates a table with given table name and specified number of column
-   * families if the table does not already exist.
-   */
-  private void setupTable(final Connection connection, TableName table, int cfs)
-  throws IOException {
-    try {
-      LOG.info("Creating table " + table);
-      HTableDescriptor htd = new HTableDescriptor(table);
-      for (int i = 0; i < cfs; i++) {
-        htd.addFamily(new HColumnDescriptor(family(i)));
-      }
-      try (Admin admin = connection.getAdmin()) {
-        admin.createTable(htd);
-      }
-    } catch (TableExistsException tee) {
-      LOG.info("Table " + table + " already exists");
-    }
-  }
-
-  /**
-   * Creates a table with given table name,specified number of column families<br>
-   * and splitkeys if the table does not already exist.
-   * @param table
-   * @param cfs
-   * @param SPLIT_KEYS
-   */
-  private void setupTableWithSplitkeys(TableName table, int cfs, byte[][] SPLIT_KEYS)
-      throws IOException {
-    try {
-      LOG.info("Creating table " + table);
-      HTableDescriptor htd = new HTableDescriptor(table);
-      for (int i = 0; i < cfs; i++) {
-        htd.addFamily(new HColumnDescriptor(family(i)));
-      }
-
-      util.createTable(htd, SPLIT_KEYS);
-    } catch (TableExistsException tee) {
-      LOG.info("Table " + table + " already exists");
-    }
-  }
-
-  private Path buildBulkFiles(TableName table, int value) throws Exception {
-    Path dir = util.getDataTestDirOnTestFS(table.getNameAsString());
-    Path bulk1 = new Path(dir, table.getNameAsString() + value);
-    FileSystem fs = util.getTestFileSystem();
-    buildHFiles(fs, bulk1, value);
-    return bulk1;
-  }
-
-  /**
-   * Populate table with known values.
-   */
-  private void populateTable(final Connection connection, TableName table, int value)
-  throws Exception {
-    // create HFiles for different column families
-    LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
-    Path bulk1 = buildBulkFiles(table, value);
-    try (Table t = connection.getTable(table);
-        RegionLocator locator = connection.getRegionLocator(table);
-        Admin admin = connection.getAdmin()) {
-        lih.doBulkLoad(bulk1, admin, t, locator);
-    }
-  }
-
-  /**
-   * Split the known table in half.  (this is hard coded for this test suite)
-   */
-  private void forceSplit(TableName table) {
-    try {
-      // need to call regions server to by synchronous but isn't visible.
-      HRegionServer hrs = util.getRSForFirstRegionInTable(table);
-
-      for (HRegionInfo hri :
-          ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
-        if (hri.getTable().equals(table)) {
-          util.getAdmin().splitRegionAsync(hri.getRegionName(), rowkey(ROWCOUNT / 2));
-          //ProtobufUtil.split(null, hrs.getRSRpcServices(), hri, rowkey(ROWCOUNT / 2));
-        }
-      }
-
-      // verify that split completed.
-      int regions;
-      do {
-        regions = 0;
-        for (HRegionInfo hri :
-            ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
-          if (hri.getTable().equals(table)) {
-            regions++;
-          }
-        }
-        if (regions != 2) {
-          LOG.info("Taking some time to complete split...");
-          Thread.sleep(250);
-        }
-      } while (regions != 2);
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    }
-  }
-
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    util = new HBaseTestingUtility();
-    util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
-    util.startMiniCluster(1);
-  }
-
-  @AfterClass
-  public static void teardownCluster() throws Exception {
-    util.shutdownMiniCluster();
-  }
-
-  /**
-   * Checks that all columns have the expected value and that there is the
-   * expected number of rows.
-   * @throws IOException
-   */
-  void assertExpectedTable(TableName table, int count, int value) throws IOException {
-    HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
-    assertEquals(htds.length, 1);
-    Table t = null;
-    try {
-      t = util.getConnection().getTable(table);
-      Scan s = new Scan();
-      ResultScanner sr = t.getScanner(s);
-      int i = 0;
-      for (Result r : sr) {
-        i++;
-        for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
-          for (byte[] val : nm.values()) {
-            assertTrue(Bytes.equals(val, value(value)));
-          }
-        }
-      }
-      assertEquals(count, i);
-    } catch (IOException e) {
-      fail("Failed due to exception");
-    } finally {
-      if (t != null) t.close();
-    }
-  }
-
-  /**
-   * Test that shows that exception thrown from the RS side will result in an
-   * exception on the LIHFile client.
-   */
-  @Test(expected=IOException.class, timeout=120000)
-  public void testBulkLoadPhaseFailure() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName());
-    final AtomicInteger attmptedCalls = new AtomicInteger();
-    final AtomicInteger failedCalls = new AtomicInteger();
-    util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
-    try (Connection connection = ConnectionFactory.createConnection(util
-        .getConfiguration())) {
-      setupTable(connection, table, 10);
-      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
-          util.getConfiguration()) {
-        @Override
-        protected List<LoadQueueItem> tryAtomicRegionLoad(
-            ClientServiceCallable<byte[]> serviceCallable, TableName tableName, final byte[] first,
-            Collection<LoadQueueItem> lqis) throws IOException {
-          int i = attmptedCalls.incrementAndGet();
-          if (i == 1) {
-            Connection errConn;
-            try {
-              errConn = getMockedConnection(util.getConfiguration());
-              serviceCallable = this.buildClientServiceCallable(errConn, table, first, lqis, true);
-            } catch (Exception e) {
-              LOG.fatal("mocking cruft, should never happen", e);
-              throw new RuntimeException("mocking cruft, should never happen");
-            }
-            failedCalls.incrementAndGet();
-            return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
-          }
-
-          return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
-        }
-      };
-      try {
-        // create HFiles for different column families
-        Path dir = buildBulkFiles(table, 1);
-        try (Table t = connection.getTable(table);
-            RegionLocator locator = connection.getRegionLocator(table);
-            Admin admin = connection.getAdmin()) {
-          lih.doBulkLoad(dir, admin, t, locator);
-        }
-      } finally {
-        util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
-            HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
-      }
-      fail("doBulkLoad should have thrown an exception");
-    }
-  }
-
-  /**
-   * Test that shows that exception thrown from the RS side will result in the
-   * expected number of retries set by ${@link HConstants#HBASE_CLIENT_RETRIES_NUMBER}
-   * when ${@link LoadIncrementalHFiles#RETRY_ON_IO_EXCEPTION} is set
-   */
-  @Test
-  public void testRetryOnIOException() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName());
-    final AtomicInteger calls = new AtomicInteger(1);
-    final Connection conn = ConnectionFactory.createConnection(util
-        .getConfiguration());
-    util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
-    util.getConfiguration().setBoolean(
-        LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, true);
-    final LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
-        util.getConfiguration()) {
-      @Override
-      protected List<LoadQueueItem> tryAtomicRegionLoad(
-          ClientServiceCallable<byte[]> serverCallable, TableName tableName,
-          final byte[] first, Collection<LoadQueueItem> lqis)
-          throws IOException {
-        if (calls.getAndIncrement() < util.getConfiguration().getInt(
-            HConstants.HBASE_CLIENT_RETRIES_NUMBER,
-            HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER) - 1) {
-          ClientServiceCallable<byte[]> newServerCallable = new ClientServiceCallable<byte[]>(
-              conn, tableName, first, new RpcControllerFactory(
-                  util.getConfiguration()).newController(), HConstants.PRIORITY_UNSET) {
-            @Override
-            public byte[] rpcCall() throws Exception {
-              throw new IOException("Error calling something on RegionServer");
-            }
-          };
-          return super.tryAtomicRegionLoad(newServerCallable, tableName, first, lqis);
-        } else {
-          return super.tryAtomicRegionLoad(serverCallable, tableName, first, lqis);
-        }
-      }
-    };
-    setupTable(conn, table, 10);
-    Path dir = buildBulkFiles(table, 1);
-    lih.doBulkLoad(dir, conn.getAdmin(), conn.getTable(table),
-        conn.getRegionLocator(table));
-    util.getConfiguration().setBoolean(
-        LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, false);
-
-  }
-
-  @SuppressWarnings("deprecation")
-  private ClusterConnection getMockedConnection(final Configuration conf)
-  throws IOException, org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException {
-    ClusterConnection c = Mockito.mock(ClusterConnection.class);
-    Mockito.when(c.getConfiguration()).thenReturn(conf);
-    Mockito.doNothing().when(c).close();
-    // Make it so we return a particular location when asked.
-    final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
-        ServerName.valueOf("example.org", 1234, 0));
-    Mockito.when(c.getRegionLocation((TableName) Mockito.any(),
-        (byte[]) Mockito.any(), Mockito.anyBoolean())).
-      thenReturn(loc);
-    Mockito.when(c.locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any())).
-      thenReturn(loc);
-    ClientProtos.ClientService.BlockingInterface hri =
-      Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
-    Mockito.when(hri.bulkLoadHFile((RpcController)Mockito.any(), (BulkLoadHFileRequest)Mockito.any())).
-      thenThrow(new ServiceException(new IOException("injecting bulk load error")));
-    Mockito.when(c.getClient(Mockito.any(ServerName.class))).
-      thenReturn(hri);
-    return c;
-  }
-
-  /**
-   * This test exercises the path where there is a split after initial
-   * validation but before the atomic bulk load call. We cannot use presplitting
-   * to test this path, so we actually inject a split just before the atomic
-   * region load.
-   */
-  @Test (timeout=120000)
-  public void testSplitWhileBulkLoadPhase() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName());
-    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
-      setupTable(connection, table, 10);
-      populateTable(connection, table,1);
-      assertExpectedTable(table, ROWCOUNT, 1);
-
-      // Now let's cause trouble.  This will occur after checks and cause bulk
-      // files to fail when attempt to atomically import.  This is recoverable.
-      final AtomicInteger attemptedCalls = new AtomicInteger();
-      LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(util.getConfiguration()) {
-        @Override
-        protected void bulkLoadPhase(final Table htable, final Connection conn,
-            ExecutorService pool, Deque<LoadQueueItem> queue,
-            final Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFile,
-            Map<LoadQueueItem, ByteBuffer> item2RegionMap)
-                throws IOException {
-          int i = attemptedCalls.incrementAndGet();
-          if (i == 1) {
-            // On first attempt force a split.
-            forceSplit(table);
-          }
-          super.bulkLoadPhase(htable, conn, pool, queue, regionGroups, copyFile, item2RegionMap);
-        }
-      };
-
-      // create HFiles for different column families
-      try (Table t = connection.getTable(table);
-          RegionLocator locator = connection.getRegionLocator(table);
-          Admin admin = connection.getAdmin()) {
-        Path bulk = buildBulkFiles(table, 2);
-        lih2.doBulkLoad(bulk, admin, t, locator);
-      }
-
-      // check that data was loaded
-      // The three expected attempts are 1) failure because need to split, 2)
-      // load of split top 3) load of split bottom
-      assertEquals(attemptedCalls.get(), 3);
-      assertExpectedTable(table, ROWCOUNT, 2);
-    }
-  }
-
-  /**
-   * This test splits a table and attempts to bulk load.  The bulk import files
-   * should be split before atomically importing.
-   */
-  @Test (timeout=120000)
-  public void testGroupOrSplitPresplit() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName());
-    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
-      setupTable(connection, table, 10);
-      populateTable(connection, table, 1);
-      assertExpectedTable(connection, table, ROWCOUNT, 1);
-      forceSplit(table);
-
-      final AtomicInteger countedLqis= new AtomicInteger();
-      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
-          util.getConfiguration()) {
-        @Override
-        protected Pair<List<LoadQueueItem>, String> groupOrSplit(
-            Multimap<ByteBuffer, LoadQueueItem> regionGroups,
-            final LoadQueueItem item, final Table htable,
-            final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
-          Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
-              startEndKeys);
-          if (lqis != null && lqis.getFirst() != null) {
-            countedLqis.addAndGet(lqis.getFirst().size());
-          }
-          return lqis;
-        }
-      };
-
-      // create HFiles for different column families
-      Path bulk = buildBulkFiles(table, 2);
-      try (Table t = connection.getTable(table);
-          RegionLocator locator = connection.getRegionLocator(table);
-          Admin admin = connection.getAdmin()) {
-        lih.doBulkLoad(bulk, admin, t, locator);
-      }
-      assertExpectedTable(connection, table, ROWCOUNT, 2);
-      assertEquals(20, countedLqis.get());
-    }
-  }
-
-  /**
-   * This test creates a table with many small regions.  The bulk load files
-   * would be splitted multiple times before all of them can be loaded successfully.
-   */
-  @Test (timeout=120000)
-  public void testSplitTmpFileCleanUp() throws Exception {
-    final TableName table = TableName.valueOf(name.getMethodName());
-    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"),
-        Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"),
-        Bytes.toBytes("row_00000040"), Bytes.toBytes("row_00000050")};
-    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
-      setupTableWithSplitkeys(table, 10, SPLIT_KEYS);
-
-      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
-
-      // create HFiles
-      Path bulk = buildBulkFiles(table, 2);
-      try (Table t = connection.getTable(table);
-          RegionLocator locator = connection.getRegionLocator(table);
-          Admin admin = connection.getAdmin()) {
-        lih.doBulkLoad(bulk, admin, t, locator);
-      }
-      // family path
-      Path tmpPath = new Path(bulk, family(0));
-      // TMP_DIR under family path
-      tmpPath = new Path(tmpPath, LoadIncrementalHFiles.TMP_DIR);
-      FileSystem fs = bulk.getFileSystem(util.getConfiguration());
-      // HFiles have been splitted, there is TMP_DIR
-      assertTrue(fs.exists(tmpPath));
-      // TMP_DIR should have been cleaned-up
-      assertNull(LoadIncrementalHFiles.TMP_DIR + " should be empty.",
-        FSUtils.listStatus(fs, tmpPath));
-      assertExpectedTable(connection, table, ROWCOUNT, 2);
-    }
-  }
-
-  /**
-   * This simulates an remote exception which should cause LIHF to exit with an
-   * exception.
-   */
-  @Test(expected = IOException.class, timeout=120000)
-  public void testGroupOrSplitFailure() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
-      setupTable(connection, tableName, 10);
-
-      LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
-          util.getConfiguration()) {
-        int i = 0;
-
-        @Override
-        protected Pair<List<LoadQueueItem>, String> groupOrSplit(
-            Multimap<ByteBuffer, LoadQueueItem> regionGroups,
-            final LoadQueueItem item, final Table table,
-            final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
-          i++;
-
-          if (i == 5) {
-            throw new IOException("failure");
-          }
-          return super.groupOrSplit(regionGroups, item, table, startEndKeys);
-        }
-      };
-
-      // create HFiles for different column families
-      Path dir = buildBulkFiles(tableName,1);
-      try (Table t = connection.getTable(tableName);
-          RegionLocator locator = connection.getRegionLocator(tableName);
-          Admin admin = connection.getAdmin()) {
-        lih.doBulkLoad(dir, admin, t, locator);
-      }
-    }
-
-    fail("doBulkLoad should have thrown an exception");
-  }
-
-  @Test (timeout=120000)
-  public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception {
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000100") };
-    // Share connection. We were failing to find the table with our new reverse scan because it
-    // looks for first region, not any region -- that is how it works now.  The below removes first
-    // region in test.  Was reliant on the Connection caching having first region.
-    Connection connection = ConnectionFactory.createConnection(util.getConfiguration());
-    Table table = connection.getTable(tableName);
-
-    setupTableWithSplitkeys(tableName, 10, SPLIT_KEYS);
-    Path dir = buildBulkFiles(tableName, 2);
-
-    final AtomicInteger countedLqis = new AtomicInteger();
-    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) {
-
-      @Override
-      protected Pair<List<LoadQueueItem>, String> groupOrSplit(
-          Multimap<ByteBuffer, LoadQueueItem> regionGroups,
-          final LoadQueueItem item, final Table htable,
-          final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
-        Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
-            startEndKeys);
-        if (lqis != null && lqis.getFirst() != null) {
-          countedLqis.addAndGet(lqis.getFirst().size());
-        }
-        return lqis;
-      }
-    };
-
-    // do bulkload when there is no region hole in hbase:meta.
-    try (Table t = connection.getTable(tableName);
-        RegionLocator locator = connection.getRegionLocator(tableName);
-        Admin admin = connection.getAdmin()) {
-      loader.doBulkLoad(dir, admin, t, locator);
-    } catch (Exception e) {
-      LOG.error("exeception=", e);
-    }
-    // check if all the data are loaded into the table.
-    this.assertExpectedTable(tableName, ROWCOUNT, 2);
-
-    dir = buildBulkFiles(tableName, 3);
-
-    // Mess it up by leaving a hole in the hbase:meta
-    List<HRegionInfo> regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
-    for (HRegionInfo regionInfo : regionInfos) {
-      if (Bytes.equals(regionInfo.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
-        MetaTableAccessor.deleteRegion(connection, regionInfo);
-        break;
-      }
-    }
-
-    try (Table t = connection.getTable(tableName);
-        RegionLocator locator = connection.getRegionLocator(tableName);
-        Admin admin = connection.getAdmin()) {
-      loader.doBulkLoad(dir, admin, t, locator);
-    } catch (Exception e) {
-      LOG.error("exception=", e);
-      assertTrue("IOException expected", e instanceof IOException);
-    }
-
-    table.close();
-
-    // Make sure at least the one region that still exists can be found.
-    regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
-    assertTrue(regionInfos.size() >= 1);
-
-    this.assertExpectedTable(connection, tableName, ROWCOUNT, 2);
-    connection.close();
-  }
-
-  /**
-   * Checks that all columns have the expected value and that there is the
-   * expected number of rows.
-   * @throws IOException
-   */
-  void assertExpectedTable(final Connection connection, TableName table, int count, int value)
-  throws IOException {
-    HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
-    assertEquals(htds.length, 1);
-    Table t = null;
-    try {
-      t = connection.getTable(table);
-      Scan s = new Scan();
-      ResultScanner sr = t.getScanner(s);
-      int i = 0;
-      for (Result r : sr) {
-        i++;
-        for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
-          for (byte[] val : nm.values()) {
-            assertTrue(Bytes.equals(val, value(value)));
-          }
-        }
-      }
-      assertEquals(count, i);
-    } catch (IOException e) {
-      fail("Failed due to exception");
-    } finally {
-      if (t != null) t.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
deleted file mode 100644
index 0c5207b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Tests various scan start and stop row scenarios. This is set in a scan and
- * tested in a MapReduce job to see if that is handed over and done properly
- * too.
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestMultiTableInputFormat extends MultiTableInputFormatTestBase {
-
-  @BeforeClass
-  public static void setupLogging() {
-    TEST_UTIL.enableDebug(MultiTableInputFormat.class);
-      }
-
-    @Override
-  protected void initJob(List<Scan> scans, Job job) throws IOException {
-    TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
-        ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
deleted file mode 100644
index 32f511b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Function;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimaps;
-import edu.umd.cs.findbugs.annotations.Nullable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-@Category({ VerySlowMapReduceTests.class, LargeTests.class })
-public class TestMultiTableSnapshotInputFormat extends MultiTableInputFormatTestBase {
-
-  protected Path restoreDir;
-
-  @BeforeClass
-  public static void setUpSnapshots() throws Exception {
-
-    TEST_UTIL.enableDebug(MultiTableSnapshotInputFormat.class);
-    TEST_UTIL.enableDebug(MultiTableSnapshotInputFormatImpl.class);
-
-    // take a snapshot of every table we have.
-    for (String tableName : TABLES) {
-      SnapshotTestingUtils
-          .createSnapshotAndValidate(TEST_UTIL.getAdmin(), TableName.valueOf(tableName),
-              ImmutableList.of(MultiTableInputFormatTestBase.INPUT_FAMILY), null,
-              snapshotNameForTable(tableName), FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
-              TEST_UTIL.getTestFileSystem(), true);
-    }
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    this.restoreDir = TEST_UTIL.getRandomDir();
-  }
-
-  @Override
-  protected void initJob(List<Scan> scans, Job job) throws IOException {
-    TableMapReduceUtil
-        .initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), ScanMapper.class,
-            ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
-  }
-
-  protected Map<String, Collection<Scan>> getSnapshotScanMapping(final List<Scan> scans) {
-    return Multimaps.index(scans, new Function<Scan, String>() {
-      @Nullable
-      @Override
-      public String apply(Scan input) {
-        return snapshotNameForTable(
-            Bytes.toStringBinary(input.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME)));
-      }
-    }).asMap();
-  }
-
-  public static String snapshotNameForTable(String tableName) {
-    return tableName + "_snapshot";
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
deleted file mode 100644
index 1c33848..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.doNothing;
-import static org.mockito.Mockito.verify;
-
-@Category({ SmallTests.class })
-public class TestMultiTableSnapshotInputFormatImpl {
-
-  private MultiTableSnapshotInputFormatImpl subject;
-  private Map<String, Collection<Scan>> snapshotScans;
-  private Path restoreDir;
-  private Configuration conf;
-  private Path rootDir;
-
-  @Before
-  public void setUp() throws Exception {
-    this.subject = Mockito.spy(new MultiTableSnapshotInputFormatImpl());
-
-    // mock out restoreSnapshot
-    // TODO: this is kind of meh; it'd be much nicer to just inject the RestoreSnapshotHelper
-    // dependency into the
-    // input format. However, we need a new RestoreSnapshotHelper per snapshot in the current
-    // design, and it *also*
-    // feels weird to introduce a RestoreSnapshotHelperFactory and inject that, which would
-    // probably be the more "pure"
-    // way of doing things. This is the lesser of two evils, perhaps?
-    doNothing().when(this.subject).
-        restoreSnapshot(any(Configuration.class), any(String.class), any(Path.class),
-            any(Path.class), any(FileSystem.class));
-
-    this.conf = new Configuration();
-    this.rootDir = new Path("file:///test-root-dir");
-    FSUtils.setRootDir(conf, rootDir);
-    this.snapshotScans = ImmutableMap.<String, Collection<Scan>>of("snapshot1",
-        ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2"))), "snapshot2",
-        ImmutableList.of(new Scan(Bytes.toBytes("3"), Bytes.toBytes("4")),
-            new Scan(Bytes.toBytes("5"), Bytes.toBytes("6"))));
-
-    this.restoreDir = new Path(FSUtils.getRootDir(conf), "restore-dir");
-
-  }
-
-  public void callSetInput() throws IOException {
-    subject.setInput(this.conf, snapshotScans, restoreDir);
-  }
-
-  public Map<String, Collection<ScanWithEquals>> toScanWithEquals(
-      Map<String, Collection<Scan>> snapshotScans) throws IOException {
-    Map<String, Collection<ScanWithEquals>> rtn = Maps.newHashMap();
-
-    for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
-      List<ScanWithEquals> scans = Lists.newArrayList();
-
-      for (Scan scan : entry.getValue()) {
-        scans.add(new ScanWithEquals(scan));
-      }
-      rtn.put(entry.getKey(), scans);
-    }
-
-    return rtn;
-  }
-
-  public static class ScanWithEquals {
-
-    private final String startRow;
-    private final String stopRow;
-
-    /**
-     * Creates a new instance of this class while copying all values.
-     *
-     * @param scan The scan instance to copy from.
-     * @throws java.io.IOException When copying the values fails.
-     */
-    public ScanWithEquals(Scan scan) throws IOException {
-      this.startRow = Bytes.toStringBinary(scan.getStartRow());
-      this.stopRow = Bytes.toStringBinary(scan.getStopRow());
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof ScanWithEquals)) {
-        return false;
-      }
-      ScanWithEquals otherScan = (ScanWithEquals) obj;
-      return Objects.equals(this.startRow, otherScan.startRow) && Objects
-          .equals(this.stopRow, otherScan.stopRow);
-    }
-
-    @Override
-    public String toString() {
-      return org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects.
-        toStringHelper(this).add("startRow", startRow)
-          .add("stopRow", stopRow).toString();
-    }
-  }
-
-  @Test
-  public void testSetInputSetsSnapshotToScans() throws Exception {
-
-    callSetInput();
-
-    Map<String, Collection<Scan>> actual = subject.getSnapshotsToScans(conf);
-
-    // convert to scans we can use .equals on
-    Map<String, Collection<ScanWithEquals>> actualWithEquals = toScanWithEquals(actual);
-    Map<String, Collection<ScanWithEquals>> expectedWithEquals = toScanWithEquals(snapshotScans);
-
-    assertEquals(expectedWithEquals, actualWithEquals);
-  }
-
-  @Test
-  public void testSetInputPushesRestoreDirectories() throws Exception {
-    callSetInput();
-
-    Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
-
-    assertEquals(this.snapshotScans.keySet(), restoreDirs.keySet());
-  }
-
-  @Test
-  public void testSetInputCreatesRestoreDirectoriesUnderRootRestoreDir() throws Exception {
-    callSetInput();
-
-    Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
-
-    for (Path snapshotDir : restoreDirs.values()) {
-      assertEquals("Expected " + snapshotDir + " to be a child of " + restoreDir, restoreDir,
-          snapshotDir.getParent());
-    }
-  }
-
-  @Test
-  public void testSetInputRestoresSnapshots() throws Exception {
-    callSetInput();
-
-    Map<String, Path> snapshotDirs = subject.getSnapshotDirs(conf);
-
-    for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
-      verify(this.subject).restoreSnapshot(eq(this.conf), eq(entry.getKey()), eq(this.rootDir),
-          eq(entry.getValue()), any(FileSystem.class));
-    }
-  }
-}

[27/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.

Posted by ap...@apache.org.

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
new file mode 100644
index 0000000..c6a8761
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
@@ -0,0 +1,1496 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.HadoopShims;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.PerformanceEvaluation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.TagUtil;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.Store;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+import org.mockito.Mockito;
+
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Simple test for {@link HFileOutputFormat2}.
+ * Sets up and runs a mapreduce job that writes hfile output.
+ * Creates a few inner classes to implement splits and an inputformat that
+ * emits keys and values like those of {@link PerformanceEvaluation}.
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestHFileOutputFormat2  {
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  private final static int ROWSPERSPLIT = 1024;
+
+  public static final byte[] FAMILY_NAME = TestHRegionFileSystem.FAMILY_NAME;
+  private static final byte[][] FAMILIES = {
+    Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))};
+  private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2",
+          "TestTable3").map(TableName::valueOf).toArray(TableName[]::new);
+
+  private HBaseTestingUtility util = new HBaseTestingUtility();
+
+  private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
+
+  /**
+   * Simple mapper that makes KeyValue output.
+   */
+  static class RandomKVGeneratingMapper
+      extends Mapper<NullWritable, NullWritable,
+                 ImmutableBytesWritable, Cell> {
+
+    private int keyLength;
+    private static final int KEYLEN_DEFAULT=10;
+    private static final String KEYLEN_CONF="randomkv.key.length";
+
+    private int valLength;
+    private static final int VALLEN_DEFAULT=10;
+    private static final String VALLEN_CONF="randomkv.val.length";
+    private static final byte [] QUALIFIER = Bytes.toBytes("data");
+    private boolean multiTableMapper = false;
+    private TableName[] tables = null;
+
+
+    @Override
+    protected void setup(Context context) throws IOException,
+        InterruptedException {
+      super.setup(context);
+
+      Configuration conf = context.getConfiguration();
+      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
+      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
+      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
+              false);
+      if (multiTableMapper) {
+        tables = TABLE_NAMES;
+      } else {
+        tables = new TableName[]{TABLE_NAMES[0]};
+      }
+    }
+
+    @Override
+    protected void map(
+        NullWritable n1, NullWritable n2,
+        Mapper<NullWritable, NullWritable,
+               ImmutableBytesWritable,Cell>.Context context)
+        throws java.io.IOException ,InterruptedException
+    {
+
+      byte keyBytes[] = new byte[keyLength];
+      byte valBytes[] = new byte[valLength];
+
+      int taskId = context.getTaskAttemptID().getTaskID().getId();
+      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+      Random random = new Random();
+      byte[] key;
+      for (int j = 0; j < tables.length; ++j) {
+        for (int i = 0; i < ROWSPERSPLIT; i++) {
+          random.nextBytes(keyBytes);
+          // Ensure that unique tasks generate unique keys
+          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
+          random.nextBytes(valBytes);
+          key = keyBytes;
+          if (multiTableMapper) {
+            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
+          }
+
+          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
+            Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
+            context.write(new ImmutableBytesWritable(key), kv);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Simple mapper that makes Put output.
+   */
+  static class RandomPutGeneratingMapper
+      extends Mapper<NullWritable, NullWritable,
+                 ImmutableBytesWritable, Put> {
+
+    private int keyLength;
+    private static final int KEYLEN_DEFAULT = 10;
+    private static final String KEYLEN_CONF = "randomkv.key.length";
+
+    private int valLength;
+    private static final int VALLEN_DEFAULT = 10;
+    private static final String VALLEN_CONF = "randomkv.val.length";
+    private static final byte[] QUALIFIER = Bytes.toBytes("data");
+    private boolean multiTableMapper = false;
+    private TableName[] tables = null;
+
+    @Override
+    protected void setup(Context context) throws IOException,
+            InterruptedException {
+      super.setup(context);
+
+      Configuration conf = context.getConfiguration();
+      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
+      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
+      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
+              false);
+      if (multiTableMapper) {
+        tables = TABLE_NAMES;
+      } else {
+        tables = new TableName[]{TABLE_NAMES[0]};
+      }
+    }
+
+    @Override
+    protected void map(
+            NullWritable n1, NullWritable n2,
+            Mapper<NullWritable, NullWritable,
+                    ImmutableBytesWritable, Put>.Context context)
+            throws java.io.IOException, InterruptedException {
+
+      byte keyBytes[] = new byte[keyLength];
+      byte valBytes[] = new byte[valLength];
+
+      int taskId = context.getTaskAttemptID().getTaskID().getId();
+      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+
+      Random random = new Random();
+      byte[] key;
+      for (int j = 0; j < tables.length; ++j) {
+        for (int i = 0; i < ROWSPERSPLIT; i++) {
+          random.nextBytes(keyBytes);
+          // Ensure that unique tasks generate unique keys
+          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
+          random.nextBytes(valBytes);
+          key = keyBytes;
+          if (multiTableMapper) {
+            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
+          }
+
+          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
+            Put p = new Put(keyBytes);
+            p.addColumn(family, QUALIFIER, valBytes);
+            // set TTL to very low so that the scan does not return any value
+            p.setTTL(1l);
+            context.write(new ImmutableBytesWritable(key), p);
+          }
+        }
+      }
+    }
+  }
+
+  private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
+    if (putSortReducer) {
+      job.setInputFormatClass(NMapInputFormat.class);
+      job.setMapperClass(RandomPutGeneratingMapper.class);
+      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+      job.setMapOutputValueClass(Put.class);
+    } else {
+      job.setInputFormatClass(NMapInputFormat.class);
+      job.setMapperClass(RandomKVGeneratingMapper.class);
+      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+      job.setMapOutputValueClass(KeyValue.class);
+    }
+  }
+
+  /**
+   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
+   * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
+   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void test_LATEST_TIMESTAMP_isReplaced()
+  throws Exception {
+    Configuration conf = new Configuration(this.util.getConfiguration());
+    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+    TaskAttemptContext context = null;
+    Path dir =
+      util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
+    try {
+      Job job = new Job(conf);
+      FileOutputFormat.setOutputPath(job, dir);
+      context = createTestTaskAttemptContext(job);
+      HFileOutputFormat2 hof = new HFileOutputFormat2();
+      writer = hof.getRecordWriter(context);
+      final byte [] b = Bytes.toBytes("b");
+
+      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
+      // changed by call to write.  Check all in kv is same but ts.
+      KeyValue kv = new KeyValue(b, b, b);
+      KeyValue original = kv.clone();
+      writer.write(new ImmutableBytesWritable(), kv);
+      assertFalse(original.equals(kv));
+      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
+      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
+      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
+      assertNotSame(original.getTimestamp(), kv.getTimestamp());
+      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
+
+      // Test 2. Now test passing a kv that has explicit ts.  It should not be
+      // changed by call to record write.
+      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
+      original = kv.clone();
+      writer.write(new ImmutableBytesWritable(), kv);
+      assertTrue(original.equals(kv));
+    } finally {
+      if (writer != null && context != null) writer.close(context);
+      dir.getFileSystem(conf).delete(dir, true);
+    }
+  }
+
+  private TaskAttemptContext createTestTaskAttemptContext(final Job job)
+  throws Exception {
+    HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
+    TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
+      job, "attempt_201402131733_0001_m_000000_0");
+    return context;
+  }
+
+  /*
+   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
+   * metadata used by time-restricted scans.
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void test_TIMERANGE() throws Exception {
+    Configuration conf = new Configuration(this.util.getConfiguration());
+    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+    TaskAttemptContext context = null;
+    Path dir =
+      util.getDataTestDir("test_TIMERANGE_present");
+    LOG.info("Timerange dir writing to dir: "+ dir);
+    try {
+      // build a record writer using HFileOutputFormat2
+      Job job = new Job(conf);
+      FileOutputFormat.setOutputPath(job, dir);
+      context = createTestTaskAttemptContext(job);
+      HFileOutputFormat2 hof = new HFileOutputFormat2();
+      writer = hof.getRecordWriter(context);
+
+      // Pass two key values with explicit times stamps
+      final byte [] b = Bytes.toBytes("b");
+
+      // value 1 with timestamp 2000
+      KeyValue kv = new KeyValue(b, b, b, 2000, b);
+      KeyValue original = kv.clone();
+      writer.write(new ImmutableBytesWritable(), kv);
+      assertEquals(original,kv);
+
+      // value 2 with timestamp 1000
+      kv = new KeyValue(b, b, b, 1000, b);
+      original = kv.clone();
+      writer.write(new ImmutableBytesWritable(), kv);
+      assertEquals(original, kv);
+
+      // verify that the file has the proper FileInfo.
+      writer.close(context);
+
+      // the generated file lives 1 directory down from the attempt directory
+      // and is the only file, e.g.
+      // _attempt__0000_r_000000_0/b/1979617994050536795
+      FileSystem fs = FileSystem.get(conf);
+      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
+      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
+      FileStatus[] file = fs.listStatus(sub1[0].getPath());
+
+      // open as HFile Reader and pull out TIMERANGE FileInfo.
+      HFile.Reader rd =
+          HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
+      Map<byte[],byte[]> finfo = rd.loadFileInfo();
+      byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8"));
+      assertNotNull(range);
+
+      // unmarshall and check values.
+      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
+      Writables.copyWritable(range, timeRangeTracker);
+      LOG.info(timeRangeTracker.getMin() +
+          "...." + timeRangeTracker.getMax());
+      assertEquals(1000, timeRangeTracker.getMin());
+      assertEquals(2000, timeRangeTracker.getMax());
+      rd.close();
+    } finally {
+      if (writer != null && context != null) writer.close(context);
+      dir.getFileSystem(conf).delete(dir, true);
+    }
+  }
+
+  /**
+   * Run small MR job.
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testWritingPEData() throws Exception {
+    Configuration conf = util.getConfiguration();
+    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
+    FileSystem fs = testDir.getFileSystem(conf);
+
+    // Set down this value or we OOME in eclipse.
+    conf.setInt("mapreduce.task.io.sort.mb", 20);
+    // Write a few files.
+    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
+
+    Job job = new Job(conf, "testWritingPEData");
+    setupRandomGeneratorMapper(job, false);
+    // This partitioner doesn't work well for number keys but using it anyways
+    // just to demonstrate how to configure it.
+    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
+    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
+
+    Arrays.fill(startKey, (byte)0);
+    Arrays.fill(endKey, (byte)0xff);
+
+    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
+    // Set start and end rows for partitioner.
+    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
+    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
+    job.setReducerClass(KeyValueSortReducer.class);
+    job.setOutputFormatClass(HFileOutputFormat2.class);
+    job.setNumReduceTasks(4);
+    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+        KeyValueSerialization.class.getName());
+
+    FileOutputFormat.setOutputPath(job, testDir);
+    assertTrue(job.waitForCompletion(false));
+    FileStatus [] files = fs.listStatus(testDir);
+    assertTrue(files.length > 0);
+  }
+
+  /**
+   * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
+   * hfile.
+   */
+  @Test
+  public void test_WritingTagData()
+      throws Exception {
+    Configuration conf = new Configuration(this.util.getConfiguration());
+    final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
+    conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
+    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+    TaskAttemptContext context = null;
+    Path dir =
+        util.getDataTestDir("WritingTagData");
+    try {
+      conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
+      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
+      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
+      Job job = new Job(conf);
+      FileOutputFormat.setOutputPath(job, dir);
+      context = createTestTaskAttemptContext(job);
+      HFileOutputFormat2 hof = new HFileOutputFormat2();
+      writer = hof.getRecordWriter(context);
+      final byte [] b = Bytes.toBytes("b");
+
+      List< Tag > tags = new ArrayList<>();
+      tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
+      KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
+      writer.write(new ImmutableBytesWritable(), kv);
+      writer.close(context);
+      writer = null;
+      FileSystem fs = dir.getFileSystem(conf);
+      RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
+      while(iterator.hasNext()) {
+        LocatedFileStatus keyFileStatus = iterator.next();
+        HFile.Reader reader =
+            HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
+        HFileScanner scanner = reader.getScanner(false, false, false);
+        scanner.seekTo();
+        Cell cell = scanner.getCell();
+        List<Tag> tagsFromCell = TagUtil.asList(cell.getTagsArray(), cell.getTagsOffset(),
+            cell.getTagsLength());
+        assertTrue(tagsFromCell.size() > 0);
+        for (Tag tag : tagsFromCell) {
+          assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
+        }
+      }
+    } finally {
+      if (writer != null && context != null) writer.close(context);
+      dir.getFileSystem(conf).delete(dir, true);
+    }
+  }
+
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testJobConfiguration() throws Exception {
+    Configuration conf = new Configuration(this.util.getConfiguration());
+    conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
+        .toString());
+    Job job = new Job(conf);
+    job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
+    Table table = Mockito.mock(Table.class);
+    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
+    setupMockStartKeys(regionLocator);
+    setupMockTableName(regionLocator);
+    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
+    assertEquals(job.getNumReduceTasks(), 4);
+  }
+
+  private byte [][] generateRandomStartKeys(int numKeys) {
+    Random random = new Random();
+    byte[][] ret = new byte[numKeys][];
+    // first region start key is always empty
+    ret[0] = HConstants.EMPTY_BYTE_ARRAY;
+    for (int i = 1; i < numKeys; i++) {
+      ret[i] =
+        PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
+    }
+    return ret;
+  }
+
+  private byte[][] generateRandomSplitKeys(int numKeys) {
+    Random random = new Random();
+    byte[][] ret = new byte[numKeys][];
+    for (int i = 0; i < numKeys; i++) {
+      ret[i] =
+          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
+    }
+    return ret;
+  }
+
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testMRIncrementalLoad() throws Exception {
+    LOG.info("\nStarting test testMRIncrementalLoad\n");
+    doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
+  }
+
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testMRIncrementalLoadWithSplit() throws Exception {
+    LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
+    doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
+  }
+
+  /**
+   * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true
+   * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY
+   * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports),
+   * it's not possible to check the region locality by comparing region locations and DN hostnames.
+   * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does),
+   * we could test region locality features more easily.
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testMRIncrementalLoadWithLocality() throws Exception {
+    LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
+    doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
+    doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
+  }
+
+  //@Ignore("Wahtevs")
+  @Test
+  public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
+    LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
+    doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
+  }
+
+  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
+                                     boolean putSortReducer, String tableStr) throws Exception {
+      doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer,
+              Arrays.asList(tableStr));
+  }
+
+  @Test
+  public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception {
+    LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n");
+    doIncrementalLoadTest(false, false, true,
+            Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList
+                    ()));
+  }
+
+  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
+      boolean putSortReducer, List<String> tableStr) throws Exception {
+    util = new HBaseTestingUtility();
+    Configuration conf = util.getConfiguration();
+    conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
+    int hostCount = 1;
+    int regionNum = 5;
+    if (shouldKeepLocality) {
+      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
+      // explicit hostnames parameter just like MiniDFSCluster does.
+      hostCount = 3;
+      regionNum = 20;
+    }
+
+    String[] hostnames = new String[hostCount];
+    for (int i = 0; i < hostCount; ++i) {
+      hostnames[i] = "datanode_" + i;
+    }
+    util.startMiniCluster(1, hostCount, hostnames);
+
+    Map<String, Table> allTables = new HashMap<>(tableStr.size());
+    List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
+    boolean writeMultipleTables = tableStr.size() > 1;
+    for (String tableStrSingle : tableStr) {
+      byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
+      TableName tableName = TableName.valueOf(tableStrSingle);
+      Table table = util.createTable(tableName, FAMILIES, splitKeys);
+
+      RegionLocator r = util.getConnection().getRegionLocator(tableName);
+      assertEquals("Should start with empty table", 0, util.countRows(table));
+      int numRegions = r.getStartKeys().length;
+      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
+
+      allTables.put(tableStrSingle, table);
+      tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r));
+    }
+    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
+    // Generate the bulk load files
+    runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
+
+    for (Table tableSingle : allTables.values()) {
+      // This doesn't write into the table, just makes files
+      assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
+    }
+    int numTableDirs = 0;
+    for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) {
+      Path tablePath = testDir;
+
+      if (writeMultipleTables) {
+        if (allTables.containsKey(tf.getPath().getName())) {
+          ++numTableDirs;
+          tablePath = tf.getPath();
+        }
+        else {
+          continue;
+        }
+      }
+
+      // Make sure that a directory was created for every CF
+      int dir = 0;
+      for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) {
+        for (byte[] family : FAMILIES) {
+          if (Bytes.toString(family).equals(f.getPath().getName())) {
+            ++dir;
+          }
+        }
+      }
+      assertEquals("Column family not found in FS.", FAMILIES.length, dir);
+    }
+    if (writeMultipleTables) {
+      assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
+    }
+
+    Admin admin = util.getConnection().getAdmin();
+    try {
+      // handle the split case
+      if (shouldChangeRegions) {
+        Table chosenTable = allTables.values().iterator().next();
+        // Choose a semi-random table if multiple tables are available
+        LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
+        admin.disableTable(chosenTable.getName());
+        util.waitUntilNoRegionsInTransition();
+
+        util.deleteTable(chosenTable.getName());
+        byte[][] newSplitKeys = generateRandomSplitKeys(14);
+        Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
+
+        while (util.getConnection().getRegionLocator(chosenTable.getName())
+                .getAllRegionLocations().size() != 15 ||
+                !admin.isTableAvailable(table.getName())) {
+          Thread.sleep(200);
+          LOG.info("Waiting for new region assignment to happen");
+        }
+      }
+
+      // Perform the actual load
+      for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
+        Path tableDir = testDir;
+        String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString();
+        LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr);
+        if (writeMultipleTables) {
+          tableDir = new Path(testDir, tableNameStr);
+        }
+        Table currentTable = allTables.get(tableNameStr);
+        TableName currentTableName = currentTable.getName();
+        new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo
+                .getRegionLocator());
+
+        // Ensure data shows up
+        int expectedRows = 0;
+        if (putSortReducer) {
+          // no rows should be extracted
+          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
+                  util.countRows(currentTable));
+        } else {
+          expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
+                  util.countRows(currentTable));
+          Scan scan = new Scan();
+          ResultScanner results = currentTable.getScanner(scan);
+          for (Result res : results) {
+            assertEquals(FAMILIES.length, res.rawCells().length);
+            Cell first = res.rawCells()[0];
+            for (Cell kv : res.rawCells()) {
+              assertTrue(CellUtil.matchingRow(first, kv));
+              assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
+            }
+          }
+          results.close();
+        }
+        String tableDigestBefore = util.checksumRows(currentTable);
+        // Check region locality
+        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
+        for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
+          hbd.add(region.getHDFSBlocksDistribution());
+        }
+        for (String hostname : hostnames) {
+          float locality = hbd.getBlockLocalityIndex(hostname);
+          LOG.info("locality of [" + hostname + "]: " + locality);
+          assertEquals(100, (int) (locality * 100));
+        }
+
+        // Cause regions to reopen
+        admin.disableTable(currentTableName);
+        while (!admin.isTableDisabled(currentTableName)) {
+          Thread.sleep(200);
+          LOG.info("Waiting for table to disable");
+        }
+        admin.enableTable(currentTableName);
+        util.waitTableAvailable(currentTableName);
+        assertEquals("Data should remain after reopening of regions",
+                tableDigestBefore, util.checksumRows(currentTable));
+      }
+    } finally {
+      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
+          tableInfoSingle.getRegionLocator().close();
+      }
+      for (Entry<String, Table> singleTable : allTables.entrySet() ) {
+        singleTable.getValue().close();
+        util.deleteTable(singleTable.getValue().getName());
+      }
+      testDir.getFileSystem(conf).delete(testDir, true);
+      util.shutdownMiniCluster();
+    }
+  }
+
+  private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
+                                    boolean putSortReducer) throws IOException,
+          InterruptedException, ClassNotFoundException {
+    Job job = new Job(conf, "testLocalMRIncrementalLoad");
+    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
+    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
+        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+        KeyValueSerialization.class.getName());
+    setupRandomGeneratorMapper(job, putSortReducer);
+    if (tableInfo.size() > 1) {
+      MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
+      int sum = 0;
+      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
+        sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
+      }
+      assertEquals(sum, job.getNumReduceTasks());
+    }
+    else {
+      RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
+      HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
+              regionLocator);
+      assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
+    }
+
+    FileOutputFormat.setOutputPath(job, outDir);
+
+    assertFalse(util.getTestFileSystem().exists(outDir)) ;
+
+    assertTrue(job.waitForCompletion(true));
+  }
+
+  /**
+   * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and
+   * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
+   * Tests that the compression map is correctly serialized into
+   * and deserialized from configuration
+   *
+   * @throws IOException
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
+    for (int numCfs = 0; numCfs <= 3; numCfs++) {
+      Configuration conf = new Configuration(this.util.getConfiguration());
+      Map<String, Compression.Algorithm> familyToCompression =
+          getMockColumnFamiliesForCompression(numCfs);
+      Table table = Mockito.mock(Table.class);
+      setupMockColumnFamiliesForCompression(table, familyToCompression);
+      conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY,
+              HFileOutputFormat2.serializeColumnFamilyAttribute
+                      (HFileOutputFormat2.compressionDetails,
+                              Arrays.asList(table.getTableDescriptor())));
+
+      // read back family specific compression setting from the configuration
+      Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
+          .createFamilyCompressionMap(conf);
+
+      // test that we have a value for all column families that matches with the
+      // used mock values
+      for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
+        assertEquals("Compression configuration incorrect for column family:"
+            + entry.getKey(), entry.getValue(),
+            retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8")));
+      }
+    }
+  }
+
+  private void setupMockColumnFamiliesForCompression(Table table,
+      Map<String, Compression.Algorithm> familyToCompression) throws IOException {
+    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+    for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
+      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+          .setMaxVersions(1)
+          .setCompressionType(entry.getValue())
+          .setBlockCacheEnabled(false)
+          .setTimeToLive(0));
+    }
+    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+  }
+
+  /**
+   * @return a map from column family names to compression algorithms for
+   *         testing column family compression. Column family names have special characters
+   */
+  private Map<String, Compression.Algorithm>
+      getMockColumnFamiliesForCompression (int numCfs) {
+    Map<String, Compression.Algorithm> familyToCompression = new HashMap<>();
+    // use column family names having special characters
+    if (numCfs-- > 0) {
+      familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
+    }
+    if (numCfs-- > 0) {
+      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
+    }
+    if (numCfs-- > 0) {
+      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
+    }
+    if (numCfs-- > 0) {
+      familyToCompression.put("Family3", Compression.Algorithm.NONE);
+    }
+    return familyToCompression;
+  }
+
+
+  /**
+   * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and
+   * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}.
+   * Tests that the compression map is correctly serialized into
+   * and deserialized from configuration
+   *
+   * @throws IOException
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
+    for (int numCfs = 0; numCfs <= 2; numCfs++) {
+      Configuration conf = new Configuration(this.util.getConfiguration());
+      Map<String, BloomType> familyToBloomType =
+          getMockColumnFamiliesForBloomType(numCfs);
+      Table table = Mockito.mock(Table.class);
+      setupMockColumnFamiliesForBloomType(table,
+          familyToBloomType);
+      conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY,
+              HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails,
+              Arrays.asList(table.getTableDescriptor())));
+
+      // read back family specific data block encoding settings from the
+      // configuration
+      Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
+          HFileOutputFormat2
+              .createFamilyBloomTypeMap(conf);
+
+      // test that we have a value for all column families that matches with the
+      // used mock values
+      for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
+        assertEquals("BloomType configuration incorrect for column family:"
+            + entry.getKey(), entry.getValue(),
+            retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8")));
+      }
+    }
+  }
+
+  private void setupMockColumnFamiliesForBloomType(Table table,
+      Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
+    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
+      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+          .setMaxVersions(1)
+          .setBloomFilterType(entry.getValue())
+          .setBlockCacheEnabled(false)
+          .setTimeToLive(0));
+    }
+    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+  }
+
+  /**
+   * @return a map from column family names to compression algorithms for
+   *         testing column family compression. Column family names have special characters
+   */
+  private Map<String, BloomType>
+  getMockColumnFamiliesForBloomType (int numCfs) {
+    Map<String, BloomType> familyToBloomType = new HashMap<>();
+    // use column family names having special characters
+    if (numCfs-- > 0) {
+      familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
+    }
+    if (numCfs-- > 0) {
+      familyToBloomType.put("Family2=asdads&!AASD",
+          BloomType.ROWCOL);
+    }
+    if (numCfs-- > 0) {
+      familyToBloomType.put("Family3", BloomType.NONE);
+    }
+    return familyToBloomType;
+  }
+
+  /**
+   * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and
+   * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}.
+   * Tests that the compression map is correctly serialized into
+   * and deserialized from configuration
+   *
+   * @throws IOException
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
+    for (int numCfs = 0; numCfs <= 3; numCfs++) {
+      Configuration conf = new Configuration(this.util.getConfiguration());
+      Map<String, Integer> familyToBlockSize =
+          getMockColumnFamiliesForBlockSize(numCfs);
+      Table table = Mockito.mock(Table.class);
+      setupMockColumnFamiliesForBlockSize(table,
+          familyToBlockSize);
+      conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY,
+              HFileOutputFormat2.serializeColumnFamilyAttribute
+                      (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table
+                              .getTableDescriptor())));
+
+      // read back family specific data block encoding settings from the
+      // configuration
+      Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
+          HFileOutputFormat2
+              .createFamilyBlockSizeMap(conf);
+
+      // test that we have a value for all column families that matches with the
+      // used mock values
+      for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
+          ) {
+        assertEquals("BlockSize configuration incorrect for column family:"
+            + entry.getKey(), entry.getValue(),
+            retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8")));
+      }
+    }
+  }
+
+  private void setupMockColumnFamiliesForBlockSize(Table table,
+      Map<String, Integer> familyToDataBlockEncoding) throws IOException {
+    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+    for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
+      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+          .setMaxVersions(1)
+          .setBlocksize(entry.getValue())
+          .setBlockCacheEnabled(false)
+          .setTimeToLive(0));
+    }
+    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+  }
+
+  /**
+   * @return a map from column family names to compression algorithms for
+   *         testing column family compression. Column family names have special characters
+   */
+  private Map<String, Integer>
+  getMockColumnFamiliesForBlockSize (int numCfs) {
+    Map<String, Integer> familyToBlockSize = new HashMap<>();
+    // use column family names having special characters
+    if (numCfs-- > 0) {
+      familyToBlockSize.put("Family1!@#!@#&", 1234);
+    }
+    if (numCfs-- > 0) {
+      familyToBlockSize.put("Family2=asdads&!AASD",
+          Integer.MAX_VALUE);
+    }
+    if (numCfs-- > 0) {
+      familyToBlockSize.put("Family2=asdads&!AASD",
+          Integer.MAX_VALUE);
+    }
+    if (numCfs-- > 0) {
+      familyToBlockSize.put("Family3", 0);
+    }
+    return familyToBlockSize;
+  }
+
+  /**
+   * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
+   * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
+   * Tests that the compression map is correctly serialized into
+   * and deserialized from configuration
+   *
+   * @throws IOException
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
+    for (int numCfs = 0; numCfs <= 3; numCfs++) {
+      Configuration conf = new Configuration(this.util.getConfiguration());
+      Map<String, DataBlockEncoding> familyToDataBlockEncoding =
+          getMockColumnFamiliesForDataBlockEncoding(numCfs);
+      Table table = Mockito.mock(Table.class);
+      setupMockColumnFamiliesForDataBlockEncoding(table,
+          familyToDataBlockEncoding);
+      HTableDescriptor tableDescriptor = table.getTableDescriptor();
+      conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+              HFileOutputFormat2.serializeColumnFamilyAttribute
+                      (HFileOutputFormat2.dataBlockEncodingDetails, Arrays
+                      .asList(tableDescriptor)));
+
+      // read back family specific data block encoding settings from the
+      // configuration
+      Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
+          HFileOutputFormat2
+          .createFamilyDataBlockEncodingMap(conf);
+
+      // test that we have a value for all column families that matches with the
+      // used mock values
+      for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
+        assertEquals("DataBlockEncoding configuration incorrect for column family:"
+            + entry.getKey(), entry.getValue(),
+            retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8")));
+      }
+    }
+  }
+
+  private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
+      Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
+    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+    for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
+      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+          .setMaxVersions(1)
+          .setDataBlockEncoding(entry.getValue())
+          .setBlockCacheEnabled(false)
+          .setTimeToLive(0));
+    }
+    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+  }
+
+  /**
+   * @return a map from column family names to compression algorithms for
+   *         testing column family compression. Column family names have special characters
+   */
+  private Map<String, DataBlockEncoding>
+      getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
+    Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>();
+    // use column family names having special characters
+    if (numCfs-- > 0) {
+      familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
+    }
+    if (numCfs-- > 0) {
+      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
+          DataBlockEncoding.FAST_DIFF);
+    }
+    if (numCfs-- > 0) {
+      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
+          DataBlockEncoding.PREFIX);
+    }
+    if (numCfs-- > 0) {
+      familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
+    }
+    return familyToDataBlockEncoding;
+  }
+
+  private void setupMockStartKeys(RegionLocator table) throws IOException {
+    byte[][] mockKeys = new byte[][] {
+        HConstants.EMPTY_BYTE_ARRAY,
+        Bytes.toBytes("aaa"),
+        Bytes.toBytes("ggg"),
+        Bytes.toBytes("zzz")
+    };
+    Mockito.doReturn(mockKeys).when(table).getStartKeys();
+  }
+
+  private void setupMockTableName(RegionLocator table) throws IOException {
+    TableName mockTableName = TableName.valueOf("mock_table");
+    Mockito.doReturn(mockTableName).when(table).getName();
+  }
+
+  /**
+   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
+   * bloom filter settings from the column family descriptor
+   */
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testColumnFamilySettings() throws Exception {
+    Configuration conf = new Configuration(this.util.getConfiguration());
+    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+    TaskAttemptContext context = null;
+    Path dir = util.getDataTestDir("testColumnFamilySettings");
+
+    // Setup table descriptor
+    Table table = Mockito.mock(Table.class);
+    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
+    HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]);
+    Mockito.doReturn(htd).when(table).getTableDescriptor();
+    for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
+      htd.addFamily(hcd);
+    }
+
+    // set up the table to return some mock keys
+    setupMockStartKeys(regionLocator);
+
+    try {
+      // partial map red setup to get an operational writer for testing
+      // We turn off the sequence file compression, because DefaultCodec
+      // pollutes the GZip codec pool with an incompatible compressor.
+      conf.set("io.seqfile.compression.type", "NONE");
+      conf.set("hbase.fs.tmp.dir", dir.toString());
+      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
+      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
+
+      Job job = new Job(conf, "testLocalMRIncrementalLoad");
+      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
+      setupRandomGeneratorMapper(job, false);
+      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
+      FileOutputFormat.setOutputPath(job, dir);
+      context = createTestTaskAttemptContext(job);
+      HFileOutputFormat2 hof = new HFileOutputFormat2();
+      writer = hof.getRecordWriter(context);
+
+      // write out random rows
+      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
+      writer.close(context);
+
+      // Make sure that a directory was created for every CF
+      FileSystem fs = dir.getFileSystem(conf);
+
+      // commit so that the filesystem has one directory per column family
+      hof.getOutputCommitter(context).commitTask(context);
+      hof.getOutputCommitter(context).commitJob(context);
+      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
+      assertEquals(htd.getFamilies().size(), families.length);
+      for (FileStatus f : families) {
+        String familyStr = f.getPath().getName();
+        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
+        // verify that the compression on this file matches the configured
+        // compression
+        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
+        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
+        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+
+        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
+        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
+        assertEquals("Incorrect bloom filter used for column family " + familyStr +
+          "(reader: " + reader + ")",
+          hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
+        assertEquals("Incorrect compression used for column family " + familyStr +
+          "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
+      }
+    } finally {
+      dir.getFileSystem(conf).delete(dir, true);
+    }
+  }
+
+  /**
+   * Write random values to the writer assuming a table created using
+   * {@link #FAMILIES} as column family descriptors
+   */
+  private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
+      TaskAttemptContext context, Set<byte[]> families, int numRows)
+      throws IOException, InterruptedException {
+    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
+    int valLength = 10;
+    byte valBytes[] = new byte[valLength];
+
+    int taskId = context.getTaskAttemptID().getTaskID().getId();
+    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+    final byte [] qualifier = Bytes.toBytes("data");
+    Random random = new Random();
+    for (int i = 0; i < numRows; i++) {
+
+      Bytes.putInt(keyBytes, 0, i);
+      random.nextBytes(valBytes);
+      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
+
+      for (byte[] family : families) {
+        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
+        writer.write(key, kv);
+      }
+    }
+  }
+
+  /**
+   * This test is to test the scenario happened in HBASE-6901.
+   * All files are bulk loaded and excluded from minor compaction.
+   * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
+   * will be thrown.
+   */
+  @Ignore ("Flakey: See HBASE-9051") @Test
+  public void testExcludeAllFromMinorCompaction() throws Exception {
+    Configuration conf = util.getConfiguration();
+    conf.setInt("hbase.hstore.compaction.min", 2);
+    generateRandomStartKeys(5);
+
+    util.startMiniCluster();
+    try (Connection conn = ConnectionFactory.createConnection();
+        Admin admin = conn.getAdmin();
+        Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
+        RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) {
+      final FileSystem fs = util.getDFSCluster().getFileSystem();
+      assertEquals("Should start with empty table", 0, util.countRows(table));
+
+      // deep inspection: get the StoreFile dir
+      final Path storePath = new Path(
+        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
+          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
+            Bytes.toString(FAMILIES[0])));
+      assertEquals(0, fs.listStatus(storePath).length);
+
+      // Generate two bulk load files
+      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
+          true);
+
+      for (int i = 0; i < 2; i++) {
+        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
+        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
+                .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false);
+        // Perform the actual load
+        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
+      }
+
+      // Ensure data shows up
+      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+      assertEquals("LoadIncrementalHFiles should put expected data in table",
+          expectedRows, util.countRows(table));
+
+      // should have a second StoreFile now
+      assertEquals(2, fs.listStatus(storePath).length);
+
+      // minor compactions shouldn't get rid of the file
+      admin.compact(TABLE_NAMES[0]);
+      try {
+        quickPoll(new Callable<Boolean>() {
+          @Override
+          public Boolean call() throws Exception {
+            List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
+            for (HRegion region : regions) {
+              for (Store store : region.getStores()) {
+                store.closeAndArchiveCompactedFiles();
+              }
+            }
+            return fs.listStatus(storePath).length == 1;
+          }
+        }, 5000);
+        throw new IOException("SF# = " + fs.listStatus(storePath).length);
+      } catch (AssertionError ae) {
+        // this is expected behavior
+      }
+
+      // a major compaction should work though
+      admin.majorCompact(TABLE_NAMES[0]);
+      quickPoll(new Callable<Boolean>() {
+        @Override
+        public Boolean call() throws Exception {
+          List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
+          for (HRegion region : regions) {
+            for (Store store : region.getStores()) {
+              store.closeAndArchiveCompactedFiles();
+            }
+          }
+          return fs.listStatus(storePath).length == 1;
+        }
+      }, 5000);
+
+    } finally {
+      util.shutdownMiniCluster();
+    }
+  }
+
+  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+  public void testExcludeMinorCompaction() throws Exception {
+    Configuration conf = util.getConfiguration();
+    conf.setInt("hbase.hstore.compaction.min", 2);
+    generateRandomStartKeys(5);
+
+    util.startMiniCluster();
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+        Admin admin = conn.getAdmin()){
+      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
+      final FileSystem fs = util.getDFSCluster().getFileSystem();
+      Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
+      assertEquals("Should start with empty table", 0, util.countRows(table));
+
+      // deep inspection: get the StoreFile dir
+      final Path storePath = new Path(
+        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
+          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
+            Bytes.toString(FAMILIES[0])));
+      assertEquals(0, fs.listStatus(storePath).length);
+
+      // put some data in it and flush to create a storefile
+      Put p = new Put(Bytes.toBytes("test"));
+      p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
+      table.put(p);
+      admin.flush(TABLE_NAMES[0]);
+      assertEquals(1, util.countRows(table));
+      quickPoll(new Callable<Boolean>() {
+        @Override
+        public Boolean call() throws Exception {
+          return fs.listStatus(storePath).length == 1;
+        }
+      }, 5000);
+
+      // Generate a bulk load file with more rows
+      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
+          true);
+
+      RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]);
+      runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
+                      .getTableDescriptor(), regionLocator)), testDir, false);
+
+      // Perform the actual load
+      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
+
+      // Ensure data shows up
+      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+      assertEquals("LoadIncrementalHFiles should put expected data in table",
+          expectedRows + 1, util.countRows(table));
+
+      // should have a second StoreFile now
+      assertEquals(2, fs.listStatus(storePath).length);
+
+      // minor compactions shouldn't get rid of the file
+      admin.compact(TABLE_NAMES[0]);
+      try {
+        quickPoll(new Callable<Boolean>() {
+          @Override
+          public Boolean call() throws Exception {
+            return fs.listStatus(storePath).length == 1;
+          }
+        }, 5000);
+        throw new IOException("SF# = " + fs.listStatus(storePath).length);
+      } catch (AssertionError ae) {
+        // this is expected behavior
+      }
+
+      // a major compaction should work though
+      admin.majorCompact(TABLE_NAMES[0]);
+      quickPoll(new Callable<Boolean>() {
+        @Override
+        public Boolean call() throws Exception {
+          return fs.listStatus(storePath).length == 1;
+        }
+      }, 5000);
+
+    } finally {
+      util.shutdownMiniCluster();
+    }
+  }
+
+  private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
+    int sleepMs = 10;
+    int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
+    while (retries-- > 0) {
+      if (c.call().booleanValue()) {
+        return;
+      }
+      Thread.sleep(sleepMs);
+    }
+    fail();
+  }
+
+  public static void main(String args[]) throws Exception {
+    new TestHFileOutputFormat2().manualTest(args);
+  }
+
+  public void manualTest(String args[]) throws Exception {
+    Configuration conf = HBaseConfiguration.create();
+    util = new HBaseTestingUtility(conf);
+    if ("newtable".equals(args[0])) {
+      TableName tname = TableName.valueOf(args[1]);
+      byte[][] splitKeys = generateRandomSplitKeys(4);
+      Table table = util.createTable(tname, FAMILIES, splitKeys);
+    } else if ("incremental".equals(args[0])) {
+      TableName tname = TableName.valueOf(args[1]);
+      try(Connection c = ConnectionFactory.createConnection(conf);
+          Admin admin = c.getAdmin();
+          RegionLocator regionLocator = c.getRegionLocator(tname)) {
+        Path outDir = new Path("incremental-out");
+        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin
+                .getTableDescriptor(tname), regionLocator)), outDir, false);
+      }
+    } else {
+      throw new RuntimeException(
+          "usage: TestHFileOutputFormat2 newtable | incremental");
+    }
+  }
+
+  @Test
+  public void testBlockStoragePolicy() throws Exception {
+    util = new HBaseTestingUtility();
+    Configuration conf = util.getConfiguration();
+    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD");
+
+    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX +
+            Bytes.toString(HFileOutputFormat2.combineTableNameSuffix(
+                    TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD");
+    Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0]));
+    Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1]));
+    util.startMiniDFSCluster(3);
+    FileSystem fs = util.getDFSCluster().getFileSystem();
+    try {
+      fs.mkdirs(cf1Dir);
+      fs.mkdirs(cf2Dir);
+
+      // the original block storage policy would be HOT
+      String spA = getStoragePolicyName(fs, cf1Dir);
+      String spB = getStoragePolicyName(fs, cf2Dir);
+      LOG.debug("Storage policy of cf 0: [" + spA + "].");
+      LOG.debug("Storage policy of cf 1: [" + spB + "].");
+      assertEquals("HOT", spA);
+      assertEquals("HOT", spB);
+
+      // alter table cf schema to change storage policies
+      HFileOutputFormat2.configureStoragePolicy(conf, fs,
+              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir);
+      HFileOutputFormat2.configureStoragePolicy(conf, fs,
+              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir);
+      spA = getStoragePolicyName(fs, cf1Dir);
+      spB = getStoragePolicyName(fs, cf2Dir);
+      LOG.debug("Storage policy of cf 0: [" + spA + "].");
+      LOG.debug("Storage policy of cf 1: [" + spB + "].");
+      assertNotNull(spA);
+      assertEquals("ONE_SSD", spA);
+      assertNotNull(spB);
+      assertEquals("ALL_SSD", spB);
+    } finally {
+      fs.delete(cf1Dir, true);
+      fs.delete(cf2Dir, true);
+      util.shutdownMiniDFSCluster();
+    }
+  }
+
+  private String getStoragePolicyName(FileSystem fs, Path path) {
+    try {
+      Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path);
+      return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
+    } catch (Exception e) {
+      // Maybe fail because of using old HDFS version, try the old way
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Failed to get policy directly", e);
+      }
+      String policy = getStoragePolicyNameForOldHDFSVersion(fs, path);
+      return policy == null ? "HOT" : policy;// HOT by default
+    }
+  }
+
+  private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) {
+    try {
+      if (fs instanceof DistributedFileSystem) {
+        DistributedFileSystem dfs = (DistributedFileSystem) fs;
+        HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
+        if (null != status) {
+          byte storagePolicyId = status.getStoragePolicy();
+          Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
+          if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) {
+            BlockStoragePolicy[] policies = dfs.getStoragePolicies();
+            for (BlockStoragePolicy policy : policies) {
+              if (policy.getId() == storagePolicyId) {
+                return policy.getName();
+              }
+            }
+          }
+        }
+      }
+    } catch (Throwable e) {
+      LOG.warn("failed to get block storage policy of [" + path + "]", e);
+    }
+
+    return null;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
new file mode 100644
index 0000000..c0debb4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import static org.junit.Assert.assertEquals;
+
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestHRegionPartitioner {
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test HRegionPartitioner
+   */
+  @Test (timeout=300000)
+  public void testHRegionPartitioner() throws Exception {
+
+    byte[][] families = { Bytes.toBytes("familyA"), Bytes.toBytes("familyB") };
+
+    UTIL.createTable(TableName.valueOf(name.getMethodName()), families, 1,
+    Bytes.toBytes("aa"), Bytes.toBytes("cc"), 3);
+
+    HRegionPartitioner<Long, Long> partitioner = new HRegionPartitioner<>();
+    Configuration configuration = UTIL.getConfiguration();
+    configuration.set(TableOutputFormat.OUTPUT_TABLE, name.getMethodName());
+    partitioner.setConf(configuration);
+    ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes("bb"));
+
+    assertEquals(1, partitioner.getPartition(writable, 10L, 3));
+    assertEquals(0, partitioner.getPartition(writable, 10L, 1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
new file mode 100644
index 0000000..87e7852
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapFile;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.junit.rules.TestName;
+
+/**
+ * Basic test for the HashTable M/R tool
+ */
+@Category(LargeTests.class)
+public class TestHashTable {
+
+  private static final Log LOG = LogFactory.getLog(TestHashTable.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  @Rule
+  public TestName name = new TestName();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testHashTable() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final byte[] family = Bytes.toBytes("family");
+    final byte[] column1 = Bytes.toBytes("c1");
+    final byte[] column2 = Bytes.toBytes("c2");
+    final byte[] column3 = Bytes.toBytes("c3");
+
+    int numRows = 100;
+    int numRegions = 10;
+    int numHashFiles = 3;
+
+    byte[][] splitRows = new byte[numRegions-1][];
+    for (int i = 1; i < numRegions; i++) {
+      splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
+    }
+
+    long timestamp = 1430764183454L;
+    // put rows into the first table
+    Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
+    for (int i = 0; i < numRows; i++) {
+      Put p = new Put(Bytes.toBytes(i), timestamp);
+      p.addColumn(family, column1, column1);
+      p.addColumn(family, column2, column2);
+      p.addColumn(family, column3, column3);
+      t1.put(p);
+    }
+    t1.close();
+
+    HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
+
+    Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
+
+    long batchSize = 300;
+    int code = hashTable.run(new String[] {
+        "--batchsize=" + batchSize,
+        "--numhashfiles=" + numHashFiles,
+        "--scanbatch=2",
+        tableName.getNameAsString(),
+        testDir.toString()});
+    assertEquals("test job failed", 0, code);
+
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+
+    HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
+    assertEquals(tableName.getNameAsString(), tableHash.tableName);
+    assertEquals(batchSize, tableHash.batchSize);
+    assertEquals(numHashFiles, tableHash.numHashFiles);
+    assertEquals(numHashFiles - 1, tableHash.partitions.size());
+    for (ImmutableBytesWritable bytes : tableHash.partitions) {
+      LOG.debug("partition: " + Bytes.toInt(bytes.get()));
+    }
+
+    ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes
+      = ImmutableMap.<Integer, ImmutableBytesWritable>builder()
+      .put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
+      .put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
+      .put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
+      .put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881")))
+      .put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352")))
+      .put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
+      .put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
+      .put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
+      .put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
+      .put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb")))
+      .put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc")))
+      .put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
+      .put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b")))
+      .put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59")))
+      .put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
+      .put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
+      .put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095")))
+      .put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91")))
+      .put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38")))
+      .put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
+      .build();
+
+    Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
+    Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
+    for (int i = 0; i < numHashFiles; i++) {
+      Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
+
+      MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
+      ImmutableBytesWritable key = new ImmutableBytesWritable();
+      ImmutableBytesWritable hash = new ImmutableBytesWritable();
+      while(reader.next(key, hash)) {
+        String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
+        LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16))
+            + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
+
+        int intKey = -1;
+        if (key.getLength() > 0) {
+          intKey = Bytes.toInt(key.get(),  key.getOffset(), key.getLength());
+        }
+        if (actualHashes.containsKey(intKey)) {
+          Assert.fail("duplicate key in data files: " + intKey);
+        }
+        actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
+      }
+      reader.close();
+    }
+
+    FileStatus[] files = fs.listStatus(testDir);
+    for (FileStatus file : files) {
+      LOG.debug("Output file: " + file.getPath());
+    }
+
+    files = fs.listStatus(dataDir);
+    for (FileStatus file : files) {
+      LOG.debug("Data file: " + file.getPath());
+    }
+
+    if (!expectedHashes.equals(actualHashes)) {
+      LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
+    }
+    Assert.assertEquals(expectedHashes, actualHashes);
+
+    TEST_UTIL.deleteTable(tableName);
+    TEST_UTIL.cleanupDataTestDirOnTestFS();
+  }
+
+
+}