You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2017/08/26 01:39:01 UTC
[01/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Repository: hbase
Updated Branches:
refs/heads/master 8d33949b8 -> 664b6be0e
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
deleted file mode 100644
index ad832e3..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.lang.reflect.Constructor;
-import java.net.InetAddress;
-import java.security.SecureRandom;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
-import java.util.Properties;
-import java.util.Random;
-import java.util.concurrent.atomic.AtomicReference;
-
-import javax.crypto.spec.SecretKeySpec;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.crypto.Cipher;
-import org.apache.hadoop.hbase.io.crypto.Encryption;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.security.EncryptionUtil;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.access.AccessControlClient;
-import org.apache.hadoop.hbase.security.access.Permission;
-import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
-import org.apache.hadoop.hbase.util.test.LoadTestDataGeneratorWithACL;
-import org.apache.hadoop.security.SecurityUtil;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A command-line utility that reads, writes, and verifies data. Unlike
- * {@link org.apache.hadoop.hbase.PerformanceEvaluation}, this tool validates the data written,
- * and supports simultaneously writing and reading the same set of keys.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class LoadTestTool extends AbstractHBaseTool {
-
- private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
- private static final String COLON = ":";
-
- /** Table name for the test */
- private TableName tableName;
-
- /** Column families for the test */
- private byte[][] families;
-
- /** Table name to use of not overridden on the command line */
- protected static final String DEFAULT_TABLE_NAME = "cluster_test";
-
- /** Column family used by the test */
- public static byte[] DEFAULT_COLUMN_FAMILY = Bytes.toBytes("test_cf");
-
- /** Column families used by the test */
- public static final byte[][] DEFAULT_COLUMN_FAMILIES = { DEFAULT_COLUMN_FAMILY };
-
- /** The default data size if not specified */
- protected static final int DEFAULT_DATA_SIZE = 64;
-
- /** The number of reader/writer threads if not specified */
- protected static final int DEFAULT_NUM_THREADS = 20;
-
- /** Usage string for the load option */
- protected static final String OPT_USAGE_LOAD =
- "<avg_cols_per_key>:<avg_data_size>" +
- "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
-
- /** Usage string for the read option */
- protected static final String OPT_USAGE_READ =
- "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
-
- /** Usage string for the update option */
- protected static final String OPT_USAGE_UPDATE =
- "<update_percent>[:<#threads=" + DEFAULT_NUM_THREADS
- + ">][:<#whether to ignore nonce collisions=0>]";
-
- protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
- Arrays.toString(BloomType.values());
-
- protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
- "one of " + Arrays.toString(Compression.Algorithm.values());
-
- public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
- "Encoding algorithm (e.g. prefix "
- + "compression) to use for data blocks in the test column family, "
- + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
-
- public static final String OPT_BLOOM = "bloom";
- public static final String OPT_COMPRESSION = "compression";
- public static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
- public static final String OPT_DEFERRED_LOG_FLUSH_USAGE = "Enable deferred log flush.";
-
- public static final String OPT_DATA_BLOCK_ENCODING =
- HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase(Locale.ROOT);
-
- public static final String OPT_INMEMORY = "in_memory";
- public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
- "inmemory as far as possible. Not guaranteed that reads are always served from inmemory";
-
- public static final String OPT_GENERATOR = "generator";
- public static final String OPT_GENERATOR_USAGE = "The class which generates load for the tool."
- + " Any args for this class can be passed as colon separated after class name";
-
- public static final String OPT_WRITER = "writer";
- public static final String OPT_WRITER_USAGE = "The class for executing the write requests";
-
- public static final String OPT_UPDATER = "updater";
- public static final String OPT_UPDATER_USAGE = "The class for executing the update requests";
-
- public static final String OPT_READER = "reader";
- public static final String OPT_READER_USAGE = "The class for executing the read requests";
-
- protected static final String OPT_KEY_WINDOW = "key_window";
- protected static final String OPT_WRITE = "write";
- protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
- public static final String OPT_MULTIPUT = "multiput";
- public static final String OPT_MULTIGET = "multiget_batchsize";
- protected static final String OPT_NUM_KEYS = "num_keys";
- protected static final String OPT_READ = "read";
- protected static final String OPT_START_KEY = "start_key";
- public static final String OPT_TABLE_NAME = "tn";
- public static final String OPT_COLUMN_FAMILIES = "families";
- protected static final String OPT_ZK_QUORUM = "zk";
- protected static final String OPT_ZK_PARENT_NODE = "zk_root";
- protected static final String OPT_SKIP_INIT = "skip_init";
- protected static final String OPT_INIT_ONLY = "init_only";
- protected static final String NUM_TABLES = "num_tables";
- protected static final String OPT_REGIONS_PER_SERVER = "regions_per_server";
- protected static final String OPT_BATCHUPDATE = "batchupdate";
- protected static final String OPT_UPDATE = "update";
-
- public static final String OPT_ENCRYPTION = "encryption";
- protected static final String OPT_ENCRYPTION_USAGE =
- "Enables transparent encryption on the test table, one of " +
- Arrays.toString(Encryption.getSupportedCiphers());
-
- public static final String OPT_NUM_REGIONS_PER_SERVER = "num_regions_per_server";
- protected static final String OPT_NUM_REGIONS_PER_SERVER_USAGE
- = "Desired number of regions per region server. Defaults to 5.";
- public static int DEFAULT_NUM_REGIONS_PER_SERVER = 5;
-
- public static final String OPT_REGION_REPLICATION = "region_replication";
- protected static final String OPT_REGION_REPLICATION_USAGE =
- "Desired number of replicas per region";
-
- public static final String OPT_REGION_REPLICA_ID = "region_replica_id";
- protected static final String OPT_REGION_REPLICA_ID_USAGE =
- "Region replica id to do the reads from";
-
- public static final String OPT_MOB_THRESHOLD = "mob_threshold";
- protected static final String OPT_MOB_THRESHOLD_USAGE =
- "Desired cell size to exceed in bytes that will use the MOB write path";
-
- protected static final long DEFAULT_START_KEY = 0;
-
- /** This will be removed as we factor out the dependency on command line */
- protected CommandLine cmd;
-
- protected MultiThreadedWriter writerThreads = null;
- protected MultiThreadedReader readerThreads = null;
- protected MultiThreadedUpdater updaterThreads = null;
-
- protected long startKey, endKey;
-
- protected boolean isWrite, isRead, isUpdate;
- protected boolean deferredLogFlush;
-
- // Column family options
- protected DataBlockEncoding dataBlockEncodingAlgo;
- protected Compression.Algorithm compressAlgo;
- protected BloomType bloomType;
- private boolean inMemoryCF;
-
- private User userOwner;
- // Writer options
- protected int numWriterThreads = DEFAULT_NUM_THREADS;
- protected int minColsPerKey, maxColsPerKey;
- protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
- protected boolean isMultiPut;
-
- // Updater options
- protected int numUpdaterThreads = DEFAULT_NUM_THREADS;
- protected int updatePercent;
- protected boolean ignoreConflicts = false;
- protected boolean isBatchUpdate;
-
- // Reader options
- private int numReaderThreads = DEFAULT_NUM_THREADS;
- private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
- private int multiGetBatchSize = MultiThreadedReader.DEFAULT_BATCH_SIZE;
- private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
- private int verifyPercent;
-
- private int numTables = 1;
-
- private String superUser;
-
- private String userNames;
- //This file is used to read authentication information in secure clusters.
- private String authnFileName;
-
- private int numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
- private int regionReplication = -1; // not set
- private int regionReplicaId = -1; // not set
-
- private int mobThreshold = -1; // not set
-
- // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
- // console tool itself should only be used from console.
- protected boolean isSkipInit = false;
- protected boolean isInitOnly = false;
-
- protected Cipher cipher = null;
-
- protected String[] splitColonSeparated(String option,
- int minNumCols, int maxNumCols) {
- String optVal = cmd.getOptionValue(option);
- String[] cols = optVal.split(COLON);
- if (cols.length < minNumCols || cols.length > maxNumCols) {
- throw new IllegalArgumentException("Expected at least "
- + minNumCols + " columns but no more than " + maxNumCols +
- " in the colon-separated value '" + optVal + "' of the " +
- "-" + option + " option");
- }
- return cols;
- }
-
- protected int getNumThreads(String numThreadsStr) {
- return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
- }
-
- public byte[][] getColumnFamilies() {
- return families;
- }
-
- /**
- * Apply column family options such as Bloom filters, compression, and data
- * block encoding.
- */
- protected void applyColumnFamilyOptions(TableName tableName,
- byte[][] columnFamilies) throws IOException {
- try (Connection conn = ConnectionFactory.createConnection(conf);
- Admin admin = conn.getAdmin()) {
- TableDescriptor tableDesc = admin.getTableDescriptor(tableName);
- LOG.info("Disabling table " + tableName);
- admin.disableTable(tableName);
- for (byte[] cf : columnFamilies) {
- HColumnDescriptor columnDesc = (HColumnDescriptor) tableDesc.getColumnFamily(cf);
- boolean isNewCf = columnDesc == null;
- if (isNewCf) {
- columnDesc = new HColumnDescriptor(cf);
- }
- if (bloomType != null) {
- columnDesc.setBloomFilterType(bloomType);
- }
- if (compressAlgo != null) {
- columnDesc.setCompressionType(compressAlgo);
- }
- if (dataBlockEncodingAlgo != null) {
- columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
- }
- if (inMemoryCF) {
- columnDesc.setInMemory(inMemoryCF);
- }
- if (cipher != null) {
- byte[] keyBytes = new byte[cipher.getKeyLength()];
- new SecureRandom().nextBytes(keyBytes);
- columnDesc.setEncryptionType(cipher.getName());
- columnDesc.setEncryptionKey(
- EncryptionUtil.wrapKey(conf,
- User.getCurrent().getShortName(),
- new SecretKeySpec(keyBytes,
- cipher.getName())));
- }
- if (mobThreshold >= 0) {
- columnDesc.setMobEnabled(true);
- columnDesc.setMobThreshold(mobThreshold);
- }
-
- if (isNewCf) {
- admin.addColumnFamily(tableName, columnDesc);
- } else {
- admin.modifyColumnFamily(tableName, columnDesc);
- }
- }
- LOG.info("Enabling table " + tableName);
- admin.enableTable(tableName);
- }
- }
-
- @Override
- protected void addOptions() {
- addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
- "without port numbers");
- addOptWithArg(OPT_ZK_PARENT_NODE, "name of parent znode in zookeeper");
- addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
- addOptWithArg(OPT_COLUMN_FAMILIES, "The name of the column families to use separated by comma");
- addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
- addOptWithArg(OPT_READ, OPT_USAGE_READ);
- addOptWithArg(OPT_UPDATE, OPT_USAGE_UPDATE);
- addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
- addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
- addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
- addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
- addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
- "to tolerate before terminating all reader threads. The default is " +
- MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
- addOptWithArg(OPT_MULTIGET, "Whether to use multi-gets as opposed to " +
- "separate gets for every column in a row");
- addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
- "reads and writes for concurrent write/read workload. The default " +
- "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
-
- addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
- "separate puts for every column in a row");
- addOptNoArg(OPT_BATCHUPDATE, "Whether to use batch as opposed to " +
- "separate updates for every column in a row");
- addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
- addOptWithArg(OPT_GENERATOR, OPT_GENERATOR_USAGE);
- addOptWithArg(OPT_WRITER, OPT_WRITER_USAGE);
- addOptWithArg(OPT_UPDATER, OPT_UPDATER_USAGE);
- addOptWithArg(OPT_READER, OPT_READER_USAGE);
-
- addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
- addOptWithArg(OPT_START_KEY, "The first key to read/write " +
- "(a 0-based index). The default value is " +
- DEFAULT_START_KEY + ".");
- addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
- + "already exists");
-
- addOptWithArg(NUM_TABLES,
- "A positive integer number. When a number n is speicfied, load test "
- + "tool will load n table parallely. -tn parameter value becomes "
- + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
-
- addOptWithArg(OPT_REGIONS_PER_SERVER,
- "A positive integer number. When a number n is specified, load test "
- + "tool will create the test table with n regions per server");
-
- addOptWithArg(OPT_ENCRYPTION, OPT_ENCRYPTION_USAGE);
- addOptNoArg(OPT_DEFERRED_LOG_FLUSH, OPT_DEFERRED_LOG_FLUSH_USAGE);
- addOptWithArg(OPT_NUM_REGIONS_PER_SERVER, OPT_NUM_REGIONS_PER_SERVER_USAGE);
- addOptWithArg(OPT_REGION_REPLICATION, OPT_REGION_REPLICATION_USAGE);
- addOptWithArg(OPT_REGION_REPLICA_ID, OPT_REGION_REPLICA_ID_USAGE);
- addOptWithArg(OPT_MOB_THRESHOLD, OPT_MOB_THRESHOLD_USAGE);
- }
-
- @Override
- protected void processOptions(CommandLine cmd) {
- this.cmd = cmd;
-
- tableName = TableName.valueOf(cmd.getOptionValue(OPT_TABLE_NAME,
- DEFAULT_TABLE_NAME));
-
- if (cmd.hasOption(OPT_COLUMN_FAMILIES)) {
- String[] list = cmd.getOptionValue(OPT_COLUMN_FAMILIES).split(",");
- families = new byte[list.length][];
- for (int i = 0; i < list.length; i++) {
- families[i] = Bytes.toBytes(list[i]);
- }
- } else {
- families = DEFAULT_COLUMN_FAMILIES;
- }
-
- isWrite = cmd.hasOption(OPT_WRITE);
- isRead = cmd.hasOption(OPT_READ);
- isUpdate = cmd.hasOption(OPT_UPDATE);
- isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
- deferredLogFlush = cmd.hasOption(OPT_DEFERRED_LOG_FLUSH);
-
- if (!isWrite && !isRead && !isUpdate && !isInitOnly) {
- throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
- "-" + OPT_UPDATE + " or -" + OPT_READ + " has to be specified");
- }
-
- if (isInitOnly && (isRead || isWrite || isUpdate)) {
- throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
- + " either -" + OPT_WRITE + " or -" + OPT_UPDATE + " or -" + OPT_READ);
- }
-
- if (!isInitOnly) {
- if (!cmd.hasOption(OPT_NUM_KEYS)) {
- throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
- + "read or write mode");
- }
- startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
- String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
- long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
- Long.MAX_VALUE - startKey);
- endKey = startKey + numKeys;
- isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
- System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
- }
-
- parseColumnFamilyOptions(cmd);
-
- if (isWrite) {
- String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
-
- int colIndex = 0;
- minColsPerKey = 1;
- maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
- int avgColDataSize =
- parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
- minColDataSize = avgColDataSize / 2;
- maxColDataSize = avgColDataSize * 3 / 2;
-
- if (colIndex < writeOpts.length) {
- numWriterThreads = getNumThreads(writeOpts[colIndex++]);
- }
-
- isMultiPut = cmd.hasOption(OPT_MULTIPUT);
-
- mobThreshold = -1;
- if (cmd.hasOption(OPT_MOB_THRESHOLD)) {
- mobThreshold = Integer.parseInt(cmd.getOptionValue(OPT_MOB_THRESHOLD));
- }
-
- System.out.println("Multi-puts: " + isMultiPut);
- System.out.println("Columns per key: " + minColsPerKey + ".."
- + maxColsPerKey);
- System.out.println("Data size per column: " + minColDataSize + ".."
- + maxColDataSize);
- }
-
- if (isUpdate) {
- String[] mutateOpts = splitColonSeparated(OPT_UPDATE, 1, 3);
- int colIndex = 0;
- updatePercent = parseInt(mutateOpts[colIndex++], 0, 100);
- if (colIndex < mutateOpts.length) {
- numUpdaterThreads = getNumThreads(mutateOpts[colIndex++]);
- }
- if (colIndex < mutateOpts.length) {
- ignoreConflicts = parseInt(mutateOpts[colIndex++], 0, 1) == 1;
- }
-
- isBatchUpdate = cmd.hasOption(OPT_BATCHUPDATE);
-
- System.out.println("Batch updates: " + isBatchUpdate);
- System.out.println("Percent of keys to update: " + updatePercent);
- System.out.println("Updater threads: " + numUpdaterThreads);
- System.out.println("Ignore nonce conflicts: " + ignoreConflicts);
- }
-
- if (isRead) {
- String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
- int colIndex = 0;
- verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
- if (colIndex < readOpts.length) {
- numReaderThreads = getNumThreads(readOpts[colIndex++]);
- }
-
- if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
- maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
- 0, Integer.MAX_VALUE);
- }
-
- if (cmd.hasOption(OPT_KEY_WINDOW)) {
- keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
- 0, Integer.MAX_VALUE);
- }
-
- if (cmd.hasOption(OPT_MULTIGET)) {
- multiGetBatchSize = parseInt(cmd.getOptionValue(OPT_MULTIGET),
- 0, Integer.MAX_VALUE);
- }
-
- System.out.println("Multi-gets (value of 1 means no multigets): " + multiGetBatchSize);
- System.out.println("Percent of keys to verify: " + verifyPercent);
- System.out.println("Reader threads: " + numReaderThreads);
- }
-
- numTables = 1;
- if (cmd.hasOption(NUM_TABLES)) {
- numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
- }
-
- numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
- if (cmd.hasOption(OPT_NUM_REGIONS_PER_SERVER)) {
- numRegionsPerServer = Integer.parseInt(cmd.getOptionValue(OPT_NUM_REGIONS_PER_SERVER));
- }
-
- regionReplication = 1;
- if (cmd.hasOption(OPT_REGION_REPLICATION)) {
- regionReplication = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICATION));
- }
-
- regionReplicaId = -1;
- if (cmd.hasOption(OPT_REGION_REPLICA_ID)) {
- regionReplicaId = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICA_ID));
- }
- }
-
- private void parseColumnFamilyOptions(CommandLine cmd) {
- String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
- dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
- DataBlockEncoding.valueOf(dataBlockEncodingStr);
-
- String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
- compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
- Compression.Algorithm.valueOf(compressStr);
-
- String bloomStr = cmd.getOptionValue(OPT_BLOOM);
- bloomType = bloomStr == null ? BloomType.ROW :
- BloomType.valueOf(bloomStr);
-
- inMemoryCF = cmd.hasOption(OPT_INMEMORY);
- if (cmd.hasOption(OPT_ENCRYPTION)) {
- cipher = Encryption.getCipher(conf, cmd.getOptionValue(OPT_ENCRYPTION));
- }
-
- }
-
- public void initTestTable() throws IOException {
- Durability durability = Durability.USE_DEFAULT;
- if (deferredLogFlush) {
- durability = Durability.ASYNC_WAL;
- }
-
- HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
- getColumnFamilies(), compressAlgo, dataBlockEncodingAlgo, numRegionsPerServer,
- regionReplication, durability);
- applyColumnFamilyOptions(tableName, getColumnFamilies());
- }
-
- @Override
- protected int doWork() throws IOException {
- if (numTables > 1) {
- return parallelLoadTables();
- } else {
- return loadTable();
- }
- }
-
- protected int loadTable() throws IOException {
- if (cmd.hasOption(OPT_ZK_QUORUM)) {
- conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
- }
- if (cmd.hasOption(OPT_ZK_PARENT_NODE)) {
- conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, cmd.getOptionValue(OPT_ZK_PARENT_NODE));
- }
-
- if (isInitOnly) {
- LOG.info("Initializing only; no reads or writes");
- initTestTable();
- return 0;
- }
-
- if (!isSkipInit) {
- initTestTable();
- }
- LoadTestDataGenerator dataGen = null;
- if (cmd.hasOption(OPT_GENERATOR)) {
- String[] clazzAndArgs = cmd.getOptionValue(OPT_GENERATOR).split(COLON);
- dataGen = getLoadGeneratorInstance(clazzAndArgs[0]);
- String[] args;
- if (dataGen instanceof LoadTestDataGeneratorWithACL) {
- LOG.info("Using LoadTestDataGeneratorWithACL");
- if (User.isHBaseSecurityEnabled(conf)) {
- LOG.info("Security is enabled");
- authnFileName = clazzAndArgs[1];
- superUser = clazzAndArgs[2];
- userNames = clazzAndArgs[3];
- args = Arrays.copyOfRange(clazzAndArgs, 2, clazzAndArgs.length);
- Properties authConfig = new Properties();
- authConfig.load(this.getClass().getClassLoader().getResourceAsStream(authnFileName));
- try {
- addAuthInfoToConf(authConfig, conf, superUser, userNames);
- } catch (IOException exp) {
- LOG.error(exp);
- return EXIT_FAILURE;
- }
- userOwner = User.create(loginAndReturnUGI(conf, superUser));
- } else {
- superUser = clazzAndArgs[1];
- userNames = clazzAndArgs[2];
- args = Arrays.copyOfRange(clazzAndArgs, 1, clazzAndArgs.length);
- userOwner = User.createUserForTesting(conf, superUser, new String[0]);
- }
- } else {
- args = clazzAndArgs.length == 1 ? new String[0] : Arrays.copyOfRange(clazzAndArgs, 1,
- clazzAndArgs.length);
- }
- dataGen.initialize(args);
- } else {
- // Default DataGenerator is MultiThreadedAction.DefaultDataGenerator
- dataGen = new MultiThreadedAction.DefaultDataGenerator(minColDataSize, maxColDataSize,
- minColsPerKey, maxColsPerKey, families);
- }
-
- if (userOwner != null) {
- LOG.info("Granting permissions for user " + userOwner.getShortName());
- Permission.Action[] actions = {
- Permission.Action.ADMIN, Permission.Action.CREATE,
- Permission.Action.READ, Permission.Action.WRITE };
- try {
- AccessControlClient.grant(ConnectionFactory.createConnection(conf),
- tableName, userOwner.getShortName(), null, null, actions);
- } catch (Throwable e) {
- LOG.fatal("Error in granting permission for the user " + userOwner.getShortName(), e);
- return EXIT_FAILURE;
- }
- }
-
- if (userNames != null) {
- // This will be comma separated list of expressions.
- String users[] = userNames.split(",");
- User user = null;
- for (String userStr : users) {
- if (User.isHBaseSecurityEnabled(conf)) {
- user = User.create(loginAndReturnUGI(conf, userStr));
- } else {
- user = User.createUserForTesting(conf, userStr, new String[0]);
- }
- }
- }
-
- if (isWrite) {
- if (userOwner != null) {
- writerThreads = new MultiThreadedWriterWithACL(dataGen, conf, tableName, userOwner);
- } else {
- String writerClass = null;
- if (cmd.hasOption(OPT_WRITER)) {
- writerClass = cmd.getOptionValue(OPT_WRITER);
- } else {
- writerClass = MultiThreadedWriter.class.getCanonicalName();
- }
-
- writerThreads = getMultiThreadedWriterInstance(writerClass, dataGen);
- }
- writerThreads.setMultiPut(isMultiPut);
- }
-
- if (isUpdate) {
- if (userOwner != null) {
- updaterThreads = new MultiThreadedUpdaterWithACL(dataGen, conf, tableName, updatePercent,
- userOwner, userNames);
- } else {
- String updaterClass = null;
- if (cmd.hasOption(OPT_UPDATER)) {
- updaterClass = cmd.getOptionValue(OPT_UPDATER);
- } else {
- updaterClass = MultiThreadedUpdater.class.getCanonicalName();
- }
- updaterThreads = getMultiThreadedUpdaterInstance(updaterClass, dataGen);
- }
- updaterThreads.setBatchUpdate(isBatchUpdate);
- updaterThreads.setIgnoreNonceConflicts(ignoreConflicts);
- }
-
- if (isRead) {
- if (userOwner != null) {
- readerThreads = new MultiThreadedReaderWithACL(dataGen, conf, tableName, verifyPercent,
- userNames);
- } else {
- String readerClass = null;
- if (cmd.hasOption(OPT_READER)) {
- readerClass = cmd.getOptionValue(OPT_READER);
- } else {
- readerClass = MultiThreadedReader.class.getCanonicalName();
- }
- readerThreads = getMultiThreadedReaderInstance(readerClass, dataGen);
- }
- readerThreads.setMaxErrors(maxReadErrors);
- readerThreads.setKeyWindow(keyWindow);
- readerThreads.setMultiGetBatchSize(multiGetBatchSize);
- readerThreads.setRegionReplicaId(regionReplicaId);
- }
-
- if (isUpdate && isWrite) {
- LOG.info("Concurrent write/update workload: making updaters aware of the " +
- "write point");
- updaterThreads.linkToWriter(writerThreads);
- }
-
- if (isRead && (isUpdate || isWrite)) {
- LOG.info("Concurrent write/read workload: making readers aware of the " +
- "write point");
- readerThreads.linkToWriter(isUpdate ? updaterThreads : writerThreads);
- }
-
- if (isWrite) {
- System.out.println("Starting to write data...");
- writerThreads.start(startKey, endKey, numWriterThreads);
- }
-
- if (isUpdate) {
- LOG.info("Starting to mutate data...");
- System.out.println("Starting to mutate data...");
- // TODO : currently append and increment operations not tested with tags
- // Will update this aftet it is done
- updaterThreads.start(startKey, endKey, numUpdaterThreads);
- }
-
- if (isRead) {
- System.out.println("Starting to read data...");
- readerThreads.start(startKey, endKey, numReaderThreads);
- }
-
- if (isWrite) {
- writerThreads.waitForFinish();
- }
-
- if (isUpdate) {
- updaterThreads.waitForFinish();
- }
-
- if (isRead) {
- readerThreads.waitForFinish();
- }
-
- boolean success = true;
- if (isWrite) {
- success = success && writerThreads.getNumWriteFailures() == 0;
- }
- if (isUpdate) {
- success = success && updaterThreads.getNumWriteFailures() == 0;
- }
- if (isRead) {
- success = success && readerThreads.getNumReadErrors() == 0
- && readerThreads.getNumReadFailures() == 0;
- }
- return success ? EXIT_SUCCESS : EXIT_FAILURE;
- }
-
- private LoadTestDataGenerator getLoadGeneratorInstance(String clazzName) throws IOException {
- try {
- Class<?> clazz = Class.forName(clazzName);
- Constructor<?> constructor = clazz.getConstructor(int.class, int.class, int.class, int.class,
- byte[][].class);
- return (LoadTestDataGenerator) constructor.newInstance(minColDataSize, maxColDataSize,
- minColsPerKey, maxColsPerKey, families);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- private MultiThreadedWriter getMultiThreadedWriterInstance(String clazzName
- , LoadTestDataGenerator dataGen) throws IOException {
- try {
- Class<?> clazz = Class.forName(clazzName);
- Constructor<?> constructor = clazz.getConstructor(
- LoadTestDataGenerator.class, Configuration.class, TableName.class);
- return (MultiThreadedWriter) constructor.newInstance(dataGen, conf, tableName);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- private MultiThreadedUpdater getMultiThreadedUpdaterInstance(String clazzName
- , LoadTestDataGenerator dataGen) throws IOException {
- try {
- Class<?> clazz = Class.forName(clazzName);
- Constructor<?> constructor = clazz.getConstructor(
- LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
- return (MultiThreadedUpdater) constructor.newInstance(
- dataGen, conf, tableName, updatePercent);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- private MultiThreadedReader getMultiThreadedReaderInstance(String clazzName
- , LoadTestDataGenerator dataGen) throws IOException {
- try {
- Class<?> clazz = Class.forName(clazzName);
- Constructor<?> constructor = clazz.getConstructor(
- LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
- return (MultiThreadedReader) constructor.newInstance(dataGen, conf, tableName, verifyPercent);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- public static byte[] generateData(final Random r, int length) {
- byte [] b = new byte [length];
- int i = 0;
-
- for(i = 0; i < (length-8); i += 8) {
- b[i] = (byte) (65 + r.nextInt(26));
- b[i+1] = b[i];
- b[i+2] = b[i];
- b[i+3] = b[i];
- b[i+4] = b[i];
- b[i+5] = b[i];
- b[i+6] = b[i];
- b[i+7] = b[i];
- }
-
- byte a = (byte) (65 + r.nextInt(26));
- for(; i < length; i++) {
- b[i] = a;
- }
- return b;
- }
- public static void main(String[] args) {
- new LoadTestTool().doStaticMain(args);
- }
-
- /**
- * When NUM_TABLES is specified, the function starts multiple worker threads
- * which individually start a LoadTestTool instance to load a table. Each
- * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
- * , table names will be "test_1", "test_2"
- *
- * @throws IOException
- */
- private int parallelLoadTables()
- throws IOException {
- // create new command args
- String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
- String[] newArgs = null;
- if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
- newArgs = new String[cmdLineArgs.length + 2];
- newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
- newArgs[1] = LoadTestTool.DEFAULT_TABLE_NAME;
- System.arraycopy(cmdLineArgs, 0, newArgs, 2, cmdLineArgs.length);
- } else {
- newArgs = cmdLineArgs;
- }
-
- int tableNameValueIndex = -1;
- for (int j = 0; j < newArgs.length; j++) {
- if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
- tableNameValueIndex = j + 1;
- } else if (newArgs[j].endsWith(NUM_TABLES)) {
- // change NUM_TABLES to 1 so that each worker loads one table
- newArgs[j + 1] = "1";
- }
- }
-
- // starting to load multiple tables
- List<WorkerThread> workers = new ArrayList<>();
- for (int i = 0; i < numTables; i++) {
- String[] workerArgs = newArgs.clone();
- workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
- WorkerThread worker = new WorkerThread(i, workerArgs);
- workers.add(worker);
- LOG.info(worker + " starting");
- worker.start();
- }
-
- // wait for all workers finish
- LOG.info("Waiting for worker threads to finish");
- for (WorkerThread t : workers) {
- try {
- t.join();
- } catch (InterruptedException ie) {
- IOException iie = new InterruptedIOException();
- iie.initCause(ie);
- throw iie;
- }
- checkForErrors();
- }
-
- return EXIT_SUCCESS;
- }
-
- // If an exception is thrown by one of worker threads, it will be
- // stored here.
- protected AtomicReference<Throwable> thrown = new AtomicReference<>();
-
- private void workerThreadError(Throwable t) {
- thrown.compareAndSet(null, t);
- }
-
- /**
- * Check for errors in the writer threads. If any is found, rethrow it.
- */
- private void checkForErrors() throws IOException {
- Throwable thrown = this.thrown.get();
- if (thrown == null) return;
- if (thrown instanceof IOException) {
- throw (IOException) thrown;
- } else {
- throw new RuntimeException(thrown);
- }
- }
-
- class WorkerThread extends Thread {
- private String[] workerArgs;
-
- WorkerThread(int i, String[] args) {
- super("WorkerThread-" + i);
- workerArgs = args;
- }
-
- @Override
- public void run() {
- try {
- int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
- if (ret != 0) {
- throw new RuntimeException("LoadTestTool exit with non-zero return code.");
- }
- } catch (Exception ex) {
- LOG.error("Error in worker thread", ex);
- workerThreadError(ex);
- }
- }
- }
-
- private void addAuthInfoToConf(Properties authConfig, Configuration conf, String owner,
- String userList) throws IOException {
- List<String> users = new ArrayList(Arrays.asList(userList.split(",")));
- users.add(owner);
- for (String user : users) {
- String keyTabFileConfKey = "hbase." + user + ".keytab.file";
- String principalConfKey = "hbase." + user + ".kerberos.principal";
- if (!authConfig.containsKey(keyTabFileConfKey) || !authConfig.containsKey(principalConfKey)) {
- throw new IOException("Authentication configs missing for user : " + user);
- }
- }
- for (String key : authConfig.stringPropertyNames()) {
- conf.set(key, authConfig.getProperty(key));
- }
- LOG.debug("Added authentication properties to config successfully.");
- }
-
- public static UserGroupInformation loginAndReturnUGI(Configuration conf, String username)
- throws IOException {
- String hostname = InetAddress.getLocalHost().getHostName();
- String keyTabFileConfKey = "hbase." + username + ".keytab.file";
- String keyTabFileLocation = conf.get(keyTabFileConfKey);
- String principalConfKey = "hbase." + username + ".kerberos.principal";
- String principal = SecurityUtil.getServerPrincipal(conf.get(principalConfKey), hostname);
- if (keyTabFileLocation == null || principal == null) {
- LOG.warn("Principal or key tab file null for : " + principalConfKey + ", "
- + keyTabFileConfKey);
- }
- UserGroupInformation ugi =
- UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTabFileLocation);
- return ugi;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
index 1d2e9a6..6550baa 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
@@ -49,7 +49,7 @@ import org.apache.hadoop.util.StringUtils;
/**
* Common base class for reader and writer parts of multi-thread HBase load
- * test ({@link LoadTestTool}).
+ * test (See LoadTestTool).
*/
public abstract class MultiThreadedAction {
private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class);
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
index 1e7e341..e951175 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.security.UserGroupInformation;
@@ -121,7 +122,7 @@ public class MultiThreadedReaderWithACL extends MultiThreadedReader {
UserGroupInformation realUserUgi;
if(!users.containsKey(userNames[mod])) {
if(User.isHBaseSecurityEnabled(conf)) {
- realUserUgi = LoadTestTool.loginAndReturnUGI(conf, userNames[mod]);
+ realUserUgi = HBaseKerberosUtils.loginAndReturnUGI(conf, userNames[mod]);
} else {
realUserUgi = UserGroupInformation.createRemoteUser(userNames[mod]);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
index 40e23fb..9d9bb63 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdaterWithACL.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.security.UserGroupInformation;
@@ -138,7 +139,7 @@ public class MultiThreadedUpdaterWithACL extends MultiThreadedUpdater {
try {
if (!users.containsKey(userNames[mod])) {
if (User.isHBaseSecurityEnabled(conf)) {
- realUserUgi = LoadTestTool.loginAndReturnUGI(conf, userNames[mod]);
+ realUserUgi = HBaseKerberosUtils.loginAndReturnUGI(conf, userNames[mod]);
} else {
realUserUgi = UserGroupInformation.createRemoteUser(userNames[mod]);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
index 6beb2e6..7972855 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
@@ -81,7 +81,7 @@ public class RestartMetaTest extends AbstractHBaseTool {
// start the writers
LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey,
- LoadTestTool.DEFAULT_COLUMN_FAMILY);
+ HFileTestUtil.DEFAULT_COLUMN_FAMILY);
MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
writer.setMultiPut(true);
writer.start(startKey, endKey, numThreads);
@@ -101,7 +101,7 @@ public class RestartMetaTest extends AbstractHBaseTool {
// create tables if needed
HBaseTestingUtility.createPreSplitLoadTestTable(conf, TABLE_NAME,
- LoadTestTool.DEFAULT_COLUMN_FAMILY, Compression.Algorithm.NONE,
+ HFileTestUtil.DEFAULT_COLUMN_FAMILY, Compression.Algorithm.NONE,
DataBlockEncoding.NONE);
LOG.debug("Loading data....\n\n");
@@ -143,8 +143,8 @@ public class RestartMetaTest extends AbstractHBaseTool {
@Override
protected void addOptions() {
addOptWithArg(OPT_NUM_RS, "Number of Region Servers");
- addOptWithArg(LoadTestTool.OPT_DATA_BLOCK_ENCODING,
- LoadTestTool.OPT_DATA_BLOCK_ENCODING_USAGE);
+ addOptWithArg(HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
+ HFileTestUtil.OPT_DATA_BLOCK_ENCODING_USAGE);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
deleted file mode 100644
index 51dc238..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSizeCalculator.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import static org.apache.hadoop.hbase.HConstants.DEFAULT_REGIONSERVER_PORT;
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.when;
-
-@Category({MiscTests.class, SmallTests.class})
-public class TestRegionSizeCalculator {
-
- private Configuration configuration = new Configuration();
- private final long megabyte = 1024L * 1024L;
- private final ServerName sn = ServerName.valueOf("local-rs", DEFAULT_REGIONSERVER_PORT,
- ServerName.NON_STARTCODE);
-
- @Test
- public void testSimpleTestCase() throws Exception {
-
- RegionLocator regionLocator = mockRegionLocator("region1", "region2", "region3");
-
- Admin admin = mockAdmin(
- mockRegion("region1", 123),
- mockRegion("region3", 1232),
- mockRegion("region2", 54321)
- );
-
- RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
-
- assertEquals(123 * megabyte, calculator.getRegionSize("region1".getBytes()));
- assertEquals(54321 * megabyte, calculator.getRegionSize("region2".getBytes()));
- assertEquals(1232 * megabyte, calculator.getRegionSize("region3".getBytes()));
- // if regionCalculator does not know about a region, it should return 0
- assertEquals(0 * megabyte, calculator.getRegionSize("otherTableRegion".getBytes()));
-
- assertEquals(3, calculator.getRegionSizeMap().size());
- }
-
-
- /**
- * When size of region in megabytes is larger than largest possible integer there could be
- * error caused by lost of precision.
- * */
- @Test
- public void testLargeRegion() throws Exception {
-
- RegionLocator regionLocator = mockRegionLocator("largeRegion");
-
- Admin admin = mockAdmin(
- mockRegion("largeRegion", Integer.MAX_VALUE)
- );
-
- RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
-
- assertEquals(((long) Integer.MAX_VALUE) * megabyte, calculator.getRegionSize("largeRegion".getBytes()));
- }
-
- /** When calculator is disabled, it should return 0 for each request.*/
- @Test
- public void testDisabled() throws Exception {
- String regionName = "cz.goout:/index.html";
- RegionLocator table = mockRegionLocator(regionName);
-
- Admin admin = mockAdmin(
- mockRegion(regionName, 999)
- );
-
- //first request on enabled calculator
- RegionSizeCalculator calculator = new RegionSizeCalculator(table, admin);
- assertEquals(999 * megabyte, calculator.getRegionSize(regionName.getBytes()));
-
- //then disabled calculator.
- configuration.setBoolean(RegionSizeCalculator.ENABLE_REGIONSIZECALCULATOR, false);
- RegionSizeCalculator disabledCalculator = new RegionSizeCalculator(table, admin);
- assertEquals(0 * megabyte, disabledCalculator.getRegionSize(regionName.getBytes()));
-
- assertEquals(0, disabledCalculator.getRegionSizeMap().size());
- }
-
- /**
- * Makes some table with given region names.
- * */
- private RegionLocator mockRegionLocator(String... regionNames) throws IOException {
- RegionLocator mockedTable = Mockito.mock(RegionLocator.class);
- when(mockedTable.getName()).thenReturn(TableName.valueOf("sizeTestTable"));
- List<HRegionLocation> regionLocations = new ArrayList<>(regionNames.length);
- when(mockedTable.getAllRegionLocations()).thenReturn(regionLocations);
-
- for (String regionName : regionNames) {
- HRegionInfo info = Mockito.mock(HRegionInfo.class);
- when(info.getRegionName()).thenReturn(regionName.getBytes());
- regionLocations.add(new HRegionLocation(info, sn));
- }
-
- return mockedTable;
- }
-
- /**
- * Creates mock returning RegionLoad info about given servers.
- */
- private Admin mockAdmin(RegionLoad... regionLoadArray) throws Exception {
- Admin mockAdmin = Mockito.mock(Admin.class);
- Map<byte[], RegionLoad> regionLoads = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (RegionLoad regionLoad : regionLoadArray) {
- regionLoads.put(regionLoad.getName(), regionLoad);
- }
- when(mockAdmin.getConfiguration()).thenReturn(configuration);
- when(mockAdmin.getRegionLoad(sn, TableName.valueOf("sizeTestTable"))).thenReturn(regionLoads);
- return mockAdmin;
- }
-
- /**
- * Creates mock of region with given name and size.
- *
- * @param fileSizeMb number of megabytes occupied by region in file store in megabytes
- * */
- private RegionLoad mockRegion(String regionName, int fileSizeMb) {
- RegionLoad region = Mockito.mock(RegionLoad.class);
- when(region.getName()).thenReturn(regionName.getBytes());
- when(region.getNameAsString()).thenReturn(regionName);
- when(region.getStorefileSizeMB()).thenReturn(fileSizeMb);
- return region;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
index bf7bf45..2deba00 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/test/LoadTestDataGenerator.java
@@ -17,6 +17,7 @@
package org.apache.hadoop.hbase.util.test;
import java.io.IOException;
+import java.util.Random;
import java.util.Set;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -56,6 +57,28 @@ public abstract class LoadTestDataGenerator {
this.kvGenerator = new LoadTestKVGenerator(minValueSize, maxValueSize);
}
+ public static byte[] generateData(final Random r, int length) {
+ byte [] b = new byte [length];
+ int i = 0;
+
+ for(i = 0; i < (length-8); i += 8) {
+ b[i] = (byte) (65 + r.nextInt(26));
+ b[i+1] = b[i];
+ b[i+2] = b[i];
+ b[i+3] = b[i];
+ b[i+4] = b[i];
+ b[i+5] = b[i];
+ b[i+6] = b[i];
+ b[i+7] = b[i];
+ }
+
+ byte a = (byte) (65 + r.nextInt(26));
+ for(; i < length; i++) {
+ b[i] = a;
+ }
+ return b;
+ }
+
/**
* initialize the LoadTestDataGenerator
*
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties b/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
deleted file mode 100644
index 6fca96a..0000000
--- a/hbase-server/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-# ResourceBundle properties file for Map-Reduce counters
-
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one
-# * or more contributor license agreements. See the NOTICE file
-# * distributed with this work for additional information
-# * regarding copyright ownership. The ASF licenses this file
-# * to you under the Apache License, Version 2.0 (the
-# * "License"); you may not use this file except in compliance
-# * with the License. You may obtain a copy of the License at
-# *
-# * http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-
-CounterGroupName= HBase Performance Evaluation
-ELAPSED_TIME.name= Elapsed time in milliseconds
-ROWS.name= Row count
-# ResourceBundle properties file for Map-Reduce counters
-
-CounterGroupName= HBase Performance Evaluation
-ELAPSED_TIME.name= Elapsed time in milliseconds
-ROWS.name= Row count
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format b/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
deleted file mode 100755
index 762ddd7..0000000
Binary files a/hbase-server/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format and /dev/null differ
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-spark/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-spark/pom.xml b/hbase-spark/pom.xml
index 51e67fa..d4ffb6b 100644
--- a/hbase-spark/pom.xml
+++ b/hbase-spark/pom.xml
@@ -488,6 +488,10 @@
<type>test-jar</type>
</dependency>
<dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
----------------------------------------------------------------------
diff --git a/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java b/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
index 93cd939..bfacbe8 100644
--- a/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
+++ b/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
@@ -27,7 +27,6 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 464d42f..e610c22 100755
--- a/pom.xml
+++ b/pom.xml
@@ -64,6 +64,7 @@
<module>hbase-build-support</module>
<module>hbase-build-configuration</module>
<module>hbase-replication</module>
+ <module>hbase-mapreduce</module>
<module>hbase-resource-bundle</module>
<module>hbase-server</module>
<module>hbase-thrift</module>
@@ -1628,6 +1629,18 @@
<scope>test</scope>
</dependency>
<dependency>
+ <artifactId>hbase-mapreduce</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <artifactId>hbase-mapreduce</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<artifactId>hbase-endpoint</artifactId>
<groupId>org.apache.hbase</groupId>
<version>${project.version}</version>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 6181b13..f96cd6c 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -2478,7 +2478,7 @@ void rename(Admin admin, String oldTableName, TableName newTableName) {
RegionServer Grouping (A.K.A `rsgroup`) is an advanced feature for
partitioning regionservers into distinctive groups for strict isolation. It
should only be used by users who are sophisticated enough to understand the
-full implications and have a sufficient background in managing HBase clusters.
+full implications and have a sufficient background in managing HBase clusters.
It was developed by Yahoo! and they run it at scale on their large grid cluster.
See link:http://www.slideshare.net/HBaseCon/keynote-apache-hbase-at-yahoo-scale[HBase at Yahoo! Scale].
@@ -2491,20 +2491,20 @@ rsgroup at a time. By default, all tables and regionservers belong to the
APIs. A custom balancer implementation tracks assignments per rsgroup and makes
sure to move regions to the relevant regionservers in that rsgroup. The rsgroup
information is stored in a regular HBase table, and a zookeeper-based read-only
-cache is used at cluster bootstrap time.
+cache is used at cluster bootstrap time.
-To enable, add the following to your hbase-site.xml and restart your Master:
+To enable, add the following to your hbase-site.xml and restart your Master:
[source,xml]
----
- <property>
- <name>hbase.coprocessor.master.classes</name>
- <value>org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint</value>
- </property>
- <property>
- <name>hbase.master.loadbalancer.class</name>
- <value>org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer</value>
- </property>
+ <property>
+ <name>hbase.coprocessor.master.classes</name>
+ <value>org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint</value>
+ </property>
+ <property>
+ <name>hbase.master.loadbalancer.class</name>
+ <value>org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer</value>
+ </property>
----
Then use the shell _rsgroup_ commands to create and manipulate RegionServer
@@ -2514,7 +2514,7 @@ rsgroup commands available in the hbase shell type:
[source, bash]
----
hbase(main):008:0> help ‘rsgroup’
- Took 0.5610 seconds
+ Took 0.5610 seconds
----
High level, you create a rsgroup that is other than the `default` group using
@@ -2531,8 +2531,8 @@ Here is example using a few of the rsgroup commands. To add a group, do as foll
[source, bash]
----
- hbase(main):008:0> add_rsgroup 'my_group'
- Took 0.5610 seconds
+ hbase(main):008:0> add_rsgroup 'my_group'
+ Took 0.5610 seconds
----
@@ -2556,11 +2556,11 @@ ERROR: org.apache.hadoop.hbase.exceptions.UnknownProtocolException: No registere
====
Add a server (specified by hostname + port) to the just-made group using the
-_move_servers_rsgroup_ command as follows:
+_move_servers_rsgroup_ command as follows:
[source, bash]
----
- hbase(main):010:0> move_servers_rsgroup 'my_group',['k.att.net:51129']
+ hbase(main):010:0> move_servers_rsgroup 'my_group',['k.att.net:51129']
----
.Hostname and Port vs ServerName
[11/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
deleted file mode 100644
index e80410f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++ /dev/null
@@ -1,1111 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import java.io.BufferedInputStream;
-import java.io.FileNotFoundException;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.Option;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileChecksum;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.io.FileLink;
-import org.apache.hadoop.hbase.io.HFileLink;
-import org.apache.hadoop.hbase.io.WALLink;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mob.MobUtils;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.util.AbstractHBaseTool;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.HFileArchiveUtil;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.mapreduce.security.TokenCache;
-import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.util.Tool;
-
-/**
- * Export the specified snapshot to a given FileSystem.
- *
- * The .snapshot/name folder is copied to the destination cluster
- * and then all the hfiles/wals are copied using a Map-Reduce Job in the .archive/ location.
- * When everything is done, the second cluster can restore the snapshot.
- */
-@InterfaceAudience.Public
-public class ExportSnapshot extends AbstractHBaseTool implements Tool {
- public static final String NAME = "exportsnapshot";
- /** Configuration prefix for overrides for the source filesystem */
- public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
- /** Configuration prefix for overrides for the destination filesystem */
- public static final String CONF_DEST_PREFIX = NAME + ".to.";
-
- private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
-
- private static final String MR_NUM_MAPS = "mapreduce.job.maps";
- private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
- private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
- private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
- private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
- private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
- private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
- private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
- private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
- private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
- private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
- private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
- private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
- protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
-
- static class Testing {
- static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
- static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
- int failuresCountToInject = 0;
- int injectedFailureCount = 0;
- }
-
- // Command line options and defaults.
- static final class Options {
- static final Option SNAPSHOT = new Option(null, "snapshot", true, "Snapshot to restore.");
- static final Option TARGET_NAME = new Option(null, "target", true,
- "Target name for the snapshot.");
- static final Option COPY_TO = new Option(null, "copy-to", true, "Remote "
- + "destination hdfs://");
- static final Option COPY_FROM = new Option(null, "copy-from", true,
- "Input folder hdfs:// (default hbase.rootdir)");
- static final Option NO_CHECKSUM_VERIFY = new Option(null, "no-checksum-verify", false,
- "Do not verify checksum, use name+length only.");
- static final Option NO_TARGET_VERIFY = new Option(null, "no-target-verify", false,
- "Do not verify the integrity of the exported snapshot.");
- static final Option OVERWRITE = new Option(null, "overwrite", false,
- "Rewrite the snapshot manifest if already exists.");
- static final Option CHUSER = new Option(null, "chuser", true,
- "Change the owner of the files to the specified one.");
- static final Option CHGROUP = new Option(null, "chgroup", true,
- "Change the group of the files to the specified one.");
- static final Option CHMOD = new Option(null, "chmod", true,
- "Change the permission of the files to the specified one.");
- static final Option MAPPERS = new Option(null, "mappers", true,
- "Number of mappers to use during the copy (mapreduce.job.maps).");
- static final Option BANDWIDTH = new Option(null, "bandwidth", true,
- "Limit bandwidth to this value in MB/second.");
- }
-
- // Export Map-Reduce Counters, to keep track of the progress
- public enum Counter {
- MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
- BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
- }
-
- private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
- NullWritable, NullWritable> {
- private static final Log LOG = LogFactory.getLog(ExportMapper.class);
- final static int REPORT_SIZE = 1 * 1024 * 1024;
- final static int BUFFER_SIZE = 64 * 1024;
-
- private boolean verifyChecksum;
- private String filesGroup;
- private String filesUser;
- private short filesMode;
- private int bufferSize;
-
- private FileSystem outputFs;
- private Path outputArchive;
- private Path outputRoot;
-
- private FileSystem inputFs;
- private Path inputArchive;
- private Path inputRoot;
-
- private static Testing testing = new Testing();
-
- @Override
- public void setup(Context context) throws IOException {
- Configuration conf = context.getConfiguration();
-
- Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
- Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
-
- verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
-
- filesGroup = conf.get(CONF_FILES_GROUP);
- filesUser = conf.get(CONF_FILES_USER);
- filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
- outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
- inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
-
- inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
- outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
-
- try {
- srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
- inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
- } catch (IOException e) {
- throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
- }
-
- try {
- destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
- outputFs = FileSystem.get(outputRoot.toUri(), destConf);
- } catch (IOException e) {
- throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
- }
-
- // Use the default block size of the outputFs if bigger
- int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(outputRoot), BUFFER_SIZE);
- bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
- LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
-
- for (Counter c : Counter.values()) {
- context.getCounter(c).increment(0);
- }
- if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
- testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
- // Get number of times we have already injected failure based on attempt number of this
- // task.
- testing.injectedFailureCount = context.getTaskAttemptID().getId();
- }
- }
-
- @Override
- protected void cleanup(Context context) {
- IOUtils.closeStream(inputFs);
- IOUtils.closeStream(outputFs);
- }
-
- @Override
- public void map(BytesWritable key, NullWritable value, Context context)
- throws InterruptedException, IOException {
- SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(key.copyBytes());
- Path outputPath = getOutputPath(inputInfo);
-
- copyFile(context, inputInfo, outputPath);
- }
-
- /**
- * Returns the location where the inputPath will be copied.
- */
- private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
- Path path = null;
- switch (inputInfo.getType()) {
- case HFILE:
- Path inputPath = new Path(inputInfo.getHfile());
- String family = inputPath.getParent().getName();
- TableName table =HFileLink.getReferencedTableName(inputPath.getName());
- String region = HFileLink.getReferencedRegionName(inputPath.getName());
- String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
- path = new Path(FSUtils.getTableDir(new Path("./"), table),
- new Path(region, new Path(family, hfile)));
- break;
- case WAL:
- LOG.warn("snapshot does not keeps WALs: " + inputInfo);
- break;
- default:
- throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
- }
- return new Path(outputArchive, path);
- }
-
- /**
- * Used by TestExportSnapshot to test for retries when failures happen.
- * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
- */
- private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
- throws IOException {
- if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
- if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
- testing.injectedFailureCount++;
- context.getCounter(Counter.COPY_FAILED).increment(1);
- LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
- throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
- testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
- }
-
- private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
- final Path outputPath) throws IOException {
- // Get the file information
- FileStatus inputStat = getSourceFileStatus(context, inputInfo);
-
- // Verify if the output file exists and is the same that we want to copy
- if (outputFs.exists(outputPath)) {
- FileStatus outputStat = outputFs.getFileStatus(outputPath);
- if (outputStat != null && sameFile(inputStat, outputStat)) {
- LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
- context.getCounter(Counter.FILES_SKIPPED).increment(1);
- context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
- return;
- }
- }
-
- InputStream in = openSourceFile(context, inputInfo);
- int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
- if (Integer.MAX_VALUE != bandwidthMB) {
- in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L);
- }
-
- try {
- context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
-
- // Ensure that the output folder is there and copy the file
- createOutputPath(outputPath.getParent());
- FSDataOutputStream out = outputFs.create(outputPath, true);
- try {
- copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
- } finally {
- out.close();
- }
-
- // Try to Preserve attributes
- if (!preserveAttributes(outputPath, inputStat)) {
- LOG.warn("You may have to run manually chown on: " + outputPath);
- }
- } finally {
- in.close();
- injectTestFailure(context, inputInfo);
- }
- }
-
- /**
- * Create the output folder and optionally set ownership.
- */
- private void createOutputPath(final Path path) throws IOException {
- if (filesUser == null && filesGroup == null) {
- outputFs.mkdirs(path);
- } else {
- Path parent = path.getParent();
- if (!outputFs.exists(parent) && !parent.isRoot()) {
- createOutputPath(parent);
- }
- outputFs.mkdirs(path);
- if (filesUser != null || filesGroup != null) {
- // override the owner when non-null user/group is specified
- outputFs.setOwner(path, filesUser, filesGroup);
- }
- if (filesMode > 0) {
- outputFs.setPermission(path, new FsPermission(filesMode));
- }
- }
- }
-
- /**
- * Try to Preserve the files attribute selected by the user copying them from the source file
- * This is only required when you are exporting as a different user than "hbase" or on a system
- * that doesn't have the "hbase" user.
- *
- * This is not considered a blocking failure since the user can force a chmod with the user
- * that knows is available on the system.
- */
- private boolean preserveAttributes(final Path path, final FileStatus refStat) {
- FileStatus stat;
- try {
- stat = outputFs.getFileStatus(path);
- } catch (IOException e) {
- LOG.warn("Unable to get the status for file=" + path);
- return false;
- }
-
- try {
- if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
- outputFs.setPermission(path, new FsPermission(filesMode));
- } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
- outputFs.setPermission(path, refStat.getPermission());
- }
- } catch (IOException e) {
- LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
- return false;
- }
-
- boolean hasRefStat = (refStat != null);
- String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
- String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
- if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
- try {
- if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
- outputFs.setOwner(path, user, group);
- }
- } catch (IOException e) {
- LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
- LOG.warn("The user/group may not exist on the destination cluster: user=" +
- user + " group=" + group);
- return false;
- }
- }
-
- return true;
- }
-
- private boolean stringIsNotEmpty(final String str) {
- return str != null && str.length() > 0;
- }
-
- private void copyData(final Context context,
- final Path inputPath, final InputStream in,
- final Path outputPath, final FSDataOutputStream out,
- final long inputFileSize)
- throws IOException {
- final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
- " (%.1f%%)";
-
- try {
- byte[] buffer = new byte[bufferSize];
- long totalBytesWritten = 0;
- int reportBytes = 0;
- int bytesRead;
-
- long stime = System.currentTimeMillis();
- while ((bytesRead = in.read(buffer)) > 0) {
- out.write(buffer, 0, bytesRead);
- totalBytesWritten += bytesRead;
- reportBytes += bytesRead;
-
- if (reportBytes >= REPORT_SIZE) {
- context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
- context.setStatus(String.format(statusMessage,
- StringUtils.humanReadableInt(totalBytesWritten),
- (totalBytesWritten/(float)inputFileSize) * 100.0f) +
- " from " + inputPath + " to " + outputPath);
- reportBytes = 0;
- }
- }
- long etime = System.currentTimeMillis();
-
- context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
- context.setStatus(String.format(statusMessage,
- StringUtils.humanReadableInt(totalBytesWritten),
- (totalBytesWritten/(float)inputFileSize) * 100.0f) +
- " from " + inputPath + " to " + outputPath);
-
- // Verify that the written size match
- if (totalBytesWritten != inputFileSize) {
- String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
- " expected=" + inputFileSize + " for file=" + inputPath;
- throw new IOException(msg);
- }
-
- LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
- LOG.info("size=" + totalBytesWritten +
- " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
- " time=" + StringUtils.formatTimeDiff(etime, stime) +
- String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
- context.getCounter(Counter.FILES_COPIED).increment(1);
- } catch (IOException e) {
- LOG.error("Error copying " + inputPath + " to " + outputPath, e);
- context.getCounter(Counter.COPY_FAILED).increment(1);
- throw e;
- }
- }
-
- /**
- * Try to open the "source" file.
- * Throws an IOException if the communication with the inputFs fail or
- * if the file is not found.
- */
- private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
- throws IOException {
- try {
- Configuration conf = context.getConfiguration();
- FileLink link = null;
- switch (fileInfo.getType()) {
- case HFILE:
- Path inputPath = new Path(fileInfo.getHfile());
- link = getFileLink(inputPath, conf);
- break;
- case WAL:
- String serverName = fileInfo.getWalServer();
- String logName = fileInfo.getWalName();
- link = new WALLink(inputRoot, serverName, logName);
- break;
- default:
- throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
- }
- return link.open(inputFs);
- } catch (IOException e) {
- context.getCounter(Counter.MISSING_FILES).increment(1);
- LOG.error("Unable to open source file=" + fileInfo.toString(), e);
- throw e;
- }
- }
-
- private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
- throws IOException {
- try {
- Configuration conf = context.getConfiguration();
- FileLink link = null;
- switch (fileInfo.getType()) {
- case HFILE:
- Path inputPath = new Path(fileInfo.getHfile());
- link = getFileLink(inputPath, conf);
- break;
- case WAL:
- link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
- break;
- default:
- throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
- }
- return link.getFileStatus(inputFs);
- } catch (FileNotFoundException e) {
- context.getCounter(Counter.MISSING_FILES).increment(1);
- LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
- throw e;
- } catch (IOException e) {
- LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
- throw e;
- }
- }
-
- private FileLink getFileLink(Path path, Configuration conf) throws IOException{
- String regionName = HFileLink.getReferencedRegionName(path.getName());
- TableName tableName = HFileLink.getReferencedTableName(path.getName());
- if(MobUtils.getMobRegionInfo(tableName).getEncodedName().equals(regionName)) {
- return HFileLink.buildFromHFileLinkPattern(MobUtils.getQualifiedMobRootDir(conf),
- HFileArchiveUtil.getArchivePath(conf), path);
- }
- return HFileLink.buildFromHFileLinkPattern(inputRoot, inputArchive, path);
- }
-
- private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
- try {
- return fs.getFileChecksum(path);
- } catch (IOException e) {
- LOG.warn("Unable to get checksum for file=" + path, e);
- return null;
- }
- }
-
- /**
- * Check if the two files are equal by looking at the file length,
- * and at the checksum (if user has specified the verifyChecksum flag).
- */
- private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
- // Not matching length
- if (inputStat.getLen() != outputStat.getLen()) return false;
-
- // Mark files as equals, since user asked for no checksum verification
- if (!verifyChecksum) return true;
-
- // If checksums are not available, files are not the same.
- FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
- if (inChecksum == null) return false;
-
- FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
- if (outChecksum == null) return false;
-
- return inChecksum.equals(outChecksum);
- }
- }
-
- // ==========================================================================
- // Input Format
- // ==========================================================================
-
- /**
- * Extract the list of files (HFiles/WALs) to copy using Map-Reduce.
- * @return list of files referenced by the snapshot (pair of path and size)
- */
- private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
- final FileSystem fs, final Path snapshotDir) throws IOException {
- SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
-
- final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>();
- final TableName table = TableName.valueOf(snapshotDesc.getTable());
-
- // Get snapshot files
- LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
- SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
- new SnapshotReferenceUtil.SnapshotVisitor() {
- @Override
- public void storeFile(final HRegionInfo regionInfo, final String family,
- final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
- // for storeFile.hasReference() case, copied as part of the manifest
- if (!storeFile.hasReference()) {
- String region = regionInfo.getEncodedName();
- String hfile = storeFile.getName();
- Path path = HFileLink.createPath(table, region, family, hfile);
-
- SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
- .setType(SnapshotFileInfo.Type.HFILE)
- .setHfile(path.toString())
- .build();
-
- long size;
- if (storeFile.hasFileSize()) {
- size = storeFile.getFileSize();
- } else {
- size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
- }
- files.add(new Pair<>(fileInfo, size));
- }
- }
- });
-
- return files;
- }
-
- /**
- * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
- * The groups created will have similar amounts of bytes.
- * <p>
- * The algorithm used is pretty straightforward; the file list is sorted by size,
- * and then each group fetch the bigger file available, iterating through groups
- * alternating the direction.
- */
- static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
- final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
- // Sort files by size, from small to big
- Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
- public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
- long r = a.getSecond() - b.getSecond();
- return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
- }
- });
-
- // create balanced groups
- List<List<Pair<SnapshotFileInfo, Long>>> fileGroups = new LinkedList<>();
- long[] sizeGroups = new long[ngroups];
- int hi = files.size() - 1;
- int lo = 0;
-
- List<Pair<SnapshotFileInfo, Long>> group;
- int dir = 1;
- int g = 0;
-
- while (hi >= lo) {
- if (g == fileGroups.size()) {
- group = new LinkedList<>();
- fileGroups.add(group);
- } else {
- group = fileGroups.get(g);
- }
-
- Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
-
- // add the hi one
- sizeGroups[g] += fileInfo.getSecond();
- group.add(fileInfo);
-
- // change direction when at the end or the beginning
- g += dir;
- if (g == ngroups) {
- dir = -1;
- g = ngroups - 1;
- } else if (g < 0) {
- dir = 1;
- g = 0;
- }
- }
-
- if (LOG.isDebugEnabled()) {
- for (int i = 0; i < sizeGroups.length; ++i) {
- LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
- }
- }
-
- return fileGroups;
- }
-
- private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
- @Override
- public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
- TaskAttemptContext tac) throws IOException, InterruptedException {
- return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
- }
-
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
- Configuration conf = context.getConfiguration();
- Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
- FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
-
- List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
- int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
- if (mappers == 0 && snapshotFiles.size() > 0) {
- mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
- mappers = Math.min(mappers, snapshotFiles.size());
- conf.setInt(CONF_NUM_SPLITS, mappers);
- conf.setInt(MR_NUM_MAPS, mappers);
- }
-
- List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
- List<InputSplit> splits = new ArrayList(groups.size());
- for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
- splits.add(new ExportSnapshotInputSplit(files));
- }
- return splits;
- }
-
- private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
- private List<Pair<BytesWritable, Long>> files;
- private long length;
-
- public ExportSnapshotInputSplit() {
- this.files = null;
- }
-
- public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
- this.files = new ArrayList(snapshotFiles.size());
- for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
- this.files.add(new Pair<>(
- new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
- this.length += fileInfo.getSecond();
- }
- }
-
- private List<Pair<BytesWritable, Long>> getSplitKeys() {
- return files;
- }
-
- @Override
- public long getLength() throws IOException, InterruptedException {
- return length;
- }
-
- @Override
- public String[] getLocations() throws IOException, InterruptedException {
- return new String[] {};
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- int count = in.readInt();
- files = new ArrayList<>(count);
- length = 0;
- for (int i = 0; i < count; ++i) {
- BytesWritable fileInfo = new BytesWritable();
- fileInfo.readFields(in);
- long size = in.readLong();
- files.add(new Pair<>(fileInfo, size));
- length += size;
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(files.size());
- for (final Pair<BytesWritable, Long> fileInfo: files) {
- fileInfo.getFirst().write(out);
- out.writeLong(fileInfo.getSecond());
- }
- }
- }
-
- private static class ExportSnapshotRecordReader
- extends RecordReader<BytesWritable, NullWritable> {
- private final List<Pair<BytesWritable, Long>> files;
- private long totalSize = 0;
- private long procSize = 0;
- private int index = -1;
-
- ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
- this.files = files;
- for (Pair<BytesWritable, Long> fileInfo: files) {
- totalSize += fileInfo.getSecond();
- }
- }
-
- @Override
- public void close() { }
-
- @Override
- public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
-
- @Override
- public NullWritable getCurrentValue() { return NullWritable.get(); }
-
- @Override
- public float getProgress() { return (float)procSize / totalSize; }
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext tac) { }
-
- @Override
- public boolean nextKeyValue() {
- if (index >= 0) {
- procSize += files.get(index).getSecond();
- }
- return(++index < files.size());
- }
- }
- }
-
- // ==========================================================================
- // Tool
- // ==========================================================================
-
- /**
- * Run Map-Reduce Job to perform the files copy.
- */
- private void runCopyJob(final Path inputRoot, final Path outputRoot,
- final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
- final String filesUser, final String filesGroup, final int filesMode,
- final int mappers, final int bandwidthMB)
- throws IOException, InterruptedException, ClassNotFoundException {
- Configuration conf = getConf();
- if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
- if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
- if (mappers > 0) {
- conf.setInt(CONF_NUM_SPLITS, mappers);
- conf.setInt(MR_NUM_MAPS, mappers);
- }
- conf.setInt(CONF_FILES_MODE, filesMode);
- conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
- conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
- conf.set(CONF_INPUT_ROOT, inputRoot.toString());
- conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
- conf.set(CONF_SNAPSHOT_NAME, snapshotName);
- conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
-
- Job job = new Job(conf);
- job.setJobName("ExportSnapshot-" + snapshotName);
- job.setJarByClass(ExportSnapshot.class);
- TableMapReduceUtil.addDependencyJars(job);
- job.setMapperClass(ExportMapper.class);
- job.setInputFormatClass(ExportSnapshotInputFormat.class);
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setMapSpeculativeExecution(false);
- job.setNumReduceTasks(0);
-
- // Acquire the delegation Tokens
- Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
- TokenCache.obtainTokensForNamenodes(job.getCredentials(),
- new Path[] { inputRoot }, srcConf);
- Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
- TokenCache.obtainTokensForNamenodes(job.getCredentials(),
- new Path[] { outputRoot }, destConf);
-
- // Run the MR Job
- if (!job.waitForCompletion(true)) {
- // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
- // when it will be available on all the supported versions.
- throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
- }
- }
-
- private void verifySnapshot(final Configuration baseConf,
- final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
- // Update the conf with the current root dir, since may be a different cluster
- Configuration conf = new Configuration(baseConf);
- FSUtils.setRootDir(conf, rootDir);
- FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
- SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
- SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
- }
-
- /**
- * Set path ownership.
- */
- private void setOwner(final FileSystem fs, final Path path, final String user,
- final String group, final boolean recursive) throws IOException {
- if (user != null || group != null) {
- if (recursive && fs.isDirectory(path)) {
- for (FileStatus child : fs.listStatus(path)) {
- setOwner(fs, child.getPath(), user, group, recursive);
- }
- }
- fs.setOwner(path, user, group);
- }
- }
-
- /**
- * Set path permission.
- */
- private void setPermission(final FileSystem fs, final Path path, final short filesMode,
- final boolean recursive) throws IOException {
- if (filesMode > 0) {
- FsPermission perm = new FsPermission(filesMode);
- if (recursive && fs.isDirectory(path)) {
- for (FileStatus child : fs.listStatus(path)) {
- setPermission(fs, child.getPath(), filesMode, recursive);
- }
- }
- fs.setPermission(path, perm);
- }
- }
-
- private boolean verifyTarget = true;
- private boolean verifyChecksum = true;
- private String snapshotName = null;
- private String targetName = null;
- private boolean overwrite = false;
- private String filesGroup = null;
- private String filesUser = null;
- private Path outputRoot = null;
- private Path inputRoot = null;
- private int bandwidthMB = Integer.MAX_VALUE;
- private int filesMode = 0;
- private int mappers = 0;
-
- @Override
- protected void processOptions(CommandLine cmd) {
- snapshotName = cmd.getOptionValue(Options.SNAPSHOT.getLongOpt(), snapshotName);
- targetName = cmd.getOptionValue(Options.TARGET_NAME.getLongOpt(), targetName);
- if (cmd.hasOption(Options.COPY_TO.getLongOpt())) {
- outputRoot = new Path(cmd.getOptionValue(Options.COPY_TO.getLongOpt()));
- }
- if (cmd.hasOption(Options.COPY_FROM.getLongOpt())) {
- inputRoot = new Path(cmd.getOptionValue(Options.COPY_FROM.getLongOpt()));
- }
- mappers = getOptionAsInt(cmd, Options.MAPPERS.getLongOpt(), mappers);
- filesUser = cmd.getOptionValue(Options.CHUSER.getLongOpt(), filesUser);
- filesGroup = cmd.getOptionValue(Options.CHGROUP.getLongOpt(), filesGroup);
- filesMode = getOptionAsInt(cmd, Options.CHMOD.getLongOpt(), filesMode);
- bandwidthMB = getOptionAsInt(cmd, Options.BANDWIDTH.getLongOpt(), bandwidthMB);
- overwrite = cmd.hasOption(Options.OVERWRITE.getLongOpt());
- // And verifyChecksum and verifyTarget with values read from old args in processOldArgs(...).
- verifyChecksum = !cmd.hasOption(Options.NO_CHECKSUM_VERIFY.getLongOpt());
- verifyTarget = !cmd.hasOption(Options.NO_TARGET_VERIFY.getLongOpt());
- }
-
- /**
- * Execute the export snapshot by copying the snapshot metadata, hfiles and wals.
- * @return 0 on success, and != 0 upon failure.
- */
- @Override
- public int doWork() throws IOException {
- Configuration conf = getConf();
-
- // Check user options
- if (snapshotName == null) {
- System.err.println("Snapshot name not provided.");
- LOG.error("Use -h or --help for usage instructions.");
- return 0;
- }
-
- if (outputRoot == null) {
- System.err.println("Destination file-system (--" + Options.COPY_TO.getLongOpt()
- + ") not provided.");
- LOG.error("Use -h or --help for usage instructions.");
- return 0;
- }
-
- if (targetName == null) {
- targetName = snapshotName;
- }
- if (inputRoot == null) {
- inputRoot = FSUtils.getRootDir(conf);
- } else {
- FSUtils.setRootDir(conf, inputRoot);
- }
-
- Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
- srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
- FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
- LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
- Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
- destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
- FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
- LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
-
- boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
-
- Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
- Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
- Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
- Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
-
- // Find the necessary directory which need to change owner and group
- Path needSetOwnerDir = SnapshotDescriptionUtils.getSnapshotRootDir(outputRoot);
- if (outputFs.exists(needSetOwnerDir)) {
- if (skipTmp) {
- needSetOwnerDir = outputSnapshotDir;
- } else {
- needSetOwnerDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(outputRoot);
- if (outputFs.exists(needSetOwnerDir)) {
- needSetOwnerDir = snapshotTmpDir;
- }
- }
- }
-
- // Check if the snapshot already exists
- if (outputFs.exists(outputSnapshotDir)) {
- if (overwrite) {
- if (!outputFs.delete(outputSnapshotDir, true)) {
- System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
- return 1;
- }
- } else {
- System.err.println("The snapshot '" + targetName +
- "' already exists in the destination: " + outputSnapshotDir);
- return 1;
- }
- }
-
- if (!skipTmp) {
- // Check if the snapshot already in-progress
- if (outputFs.exists(snapshotTmpDir)) {
- if (overwrite) {
- if (!outputFs.delete(snapshotTmpDir, true)) {
- System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
- return 1;
- }
- } else {
- System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
- System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
- System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
- return 1;
- }
- }
- }
-
- // Step 1 - Copy fs1:/.snapshot/<snapshot> to fs2:/.snapshot/.tmp/<snapshot>
- // The snapshot references must be copied before the hfiles otherwise the cleaner
- // will remove them because they are unreferenced.
- try {
- LOG.info("Copy Snapshot Manifest");
- FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
- } catch (IOException e) {
- throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
- snapshotDir + " to=" + initialOutputSnapshotDir, e);
- } finally {
- if (filesUser != null || filesGroup != null) {
- LOG.warn((filesUser == null ? "" : "Change the owner of " + needSetOwnerDir + " to "
- + filesUser)
- + (filesGroup == null ? "" : ", Change the group of " + needSetOwnerDir + " to "
- + filesGroup));
- setOwner(outputFs, needSetOwnerDir, filesUser, filesGroup, true);
- }
- if (filesMode > 0) {
- LOG.warn("Change the permission of " + needSetOwnerDir + " to " + filesMode);
- setPermission(outputFs, needSetOwnerDir, (short)filesMode, true);
- }
- }
-
- // Write a new .snapshotinfo if the target name is different from the source name
- if (!targetName.equals(snapshotName)) {
- SnapshotDescription snapshotDesc =
- SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
- .toBuilder()
- .setName(targetName)
- .build();
- SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, initialOutputSnapshotDir, outputFs);
- if (filesUser != null || filesGroup != null) {
- outputFs.setOwner(new Path(initialOutputSnapshotDir,
- SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), filesUser, filesGroup);
- }
- if (filesMode > 0) {
- outputFs.setPermission(new Path(initialOutputSnapshotDir,
- SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), new FsPermission((short)filesMode));
- }
- }
-
- // Step 2 - Start MR Job to copy files
- // The snapshot references must be copied before the files otherwise the files gets removed
- // by the HFileArchiver, since they have no references.
- try {
- runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
- filesUser, filesGroup, filesMode, mappers, bandwidthMB);
-
- LOG.info("Finalize the Snapshot Export");
- if (!skipTmp) {
- // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
- if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
- throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
- snapshotTmpDir + " to=" + outputSnapshotDir);
- }
- }
-
- // Step 4 - Verify snapshot integrity
- if (verifyTarget) {
- LOG.info("Verify snapshot integrity");
- verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
- }
-
- LOG.info("Export Completed: " + targetName);
- return 0;
- } catch (Exception e) {
- LOG.error("Snapshot export failed", e);
- if (!skipTmp) {
- outputFs.delete(snapshotTmpDir, true);
- }
- outputFs.delete(outputSnapshotDir, true);
- return 1;
- } finally {
- IOUtils.closeStream(inputFs);
- IOUtils.closeStream(outputFs);
- }
- }
-
- @Override
- protected void printUsage() {
- super.printUsage();
- System.out.println("\n"
- + "Examples:\n"
- + " hbase snapshot export \\\n"
- + " --snapshot MySnapshot --copy-to hdfs://srv2:8082/hbase \\\n"
- + " --chuser MyUser --chgroup MyGroup --chmod 700 --mappers 16\n"
- + "\n"
- + " hbase snapshot export \\\n"
- + " --snapshot MySnapshot --copy-from hdfs://srv2:8082/hbase \\\n"
- + " --copy-to hdfs://srv1:50070/hbase");
- }
-
- @Override protected void addOptions() {
- addRequiredOption(Options.SNAPSHOT);
- addOption(Options.COPY_TO);
- addOption(Options.COPY_FROM);
- addOption(Options.TARGET_NAME);
- addOption(Options.NO_CHECKSUM_VERIFY);
- addOption(Options.NO_TARGET_VERIFY);
- addOption(Options.OVERWRITE);
- addOption(Options.CHUSER);
- addOption(Options.CHGROUP);
- addOption(Options.CHMOD);
- addOption(Options.MAPPERS);
- addOption(Options.BANDWIDTH);
- }
-
- public static void main(String[] args) {
- new ExportSnapshot().doStaticMain(args);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
deleted file mode 100644
index e8f073d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * Generate a classpath string containing any jars required by mapreduce jobs. Specify
- * additional values by providing a comma-separated list of paths via -Dtmpjars.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class MapreduceDependencyClasspathTool implements Tool {
-
- private Configuration conf;
-
- @Override
- public void setConf(Configuration conf) {
- this.conf = conf;
- }
-
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length > 0) {
- System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
- System.err.println(" Construct a CLASSPATH containing dependency jars required to run a mapreduce");
- System.err.println(" job. By default, includes any jars detected by TableMapReduceUtils. Provide");
- System.err.println(" additional entries by specifying a comma-separated list in tmpjars.");
- return 0;
- }
-
- TableMapReduceUtil.addHBaseDependencyJars(getConf());
- System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
- return 0;
- }
-
- public static void main(String[] argv) throws Exception {
- // Silence the usual noise. This is probably fragile...
- Logger logger = Logger.getLogger("org.apache.hadoop.hbase");
- if (logger != null) {
- logger.setLevel(Level.WARN);
- }
- System.exit(ToolRunner.run(
- HBaseConfiguration.create(), new MapreduceDependencyClasspathTool(), argv));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
deleted file mode 100644
index 99769b7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-
-/**
- * Computes size of each region for given table and given column families.
- * The value is used by MapReduce for better scheduling.
- * */
-@InterfaceStability.Evolving
-@InterfaceAudience.Private
-public class RegionSizeCalculator {
-
- private static final Log LOG = LogFactory.getLog(RegionSizeCalculator.class);
-
- /**
- * Maps each region to its size in bytes.
- * */
- private final Map<byte[], Long> sizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-
- static final String ENABLE_REGIONSIZECALCULATOR = "hbase.regionsizecalculator.enable";
- private static final long MEGABYTE = 1024L * 1024L;
-
- /**
- * Computes size of each region for table and given column families.
- *
- * @deprecated Use {@link #RegionSizeCalculator(RegionLocator, Admin)} instead.
- */
- @Deprecated
- public RegionSizeCalculator(Table table) throws IOException {
- try (Connection conn = ConnectionFactory.createConnection(table.getConfiguration());
- RegionLocator locator = conn.getRegionLocator(table.getName());
- Admin admin = conn.getAdmin()) {
- init(locator, admin);
- }
- }
-
- /**
- * Computes size of each region for table and given column families.
- * */
- public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) throws IOException {
- init(regionLocator, admin);
- }
-
- private void init(RegionLocator regionLocator, Admin admin)
- throws IOException {
- if (!enabled(admin.getConfiguration())) {
- LOG.info("Region size calculation disabled.");
- return;
- }
-
- if (regionLocator.getName().isSystemTable()) {
- LOG.info("Region size calculation disabled for system tables.");
- return;
- }
-
- LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\".");
-
- // Get the servers which host regions of the table
- Set<ServerName> tableServers = getRegionServersOfTable(regionLocator);
-
- for (ServerName tableServerName : tableServers) {
- Map<byte[], RegionLoad> regionLoads =
- admin.getRegionLoad(tableServerName, regionLocator.getName());
- for (RegionLoad regionLoad : regionLoads.values()) {
-
- byte[] regionId = regionLoad.getName();
- long regionSizeBytes = regionLoad.getStorefileSizeMB() * MEGABYTE;
- sizeMap.put(regionId, regionSizeBytes);
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes);
- }
- }
- }
- LOG.debug("Region sizes calculated");
- }
-
- private Set<ServerName> getRegionServersOfTable(RegionLocator regionLocator)
- throws IOException {
-
- Set<ServerName> tableServers = Sets.newHashSet();
- for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) {
- tableServers.add(regionLocation.getServerName());
- }
- return tableServers;
- }
-
- boolean enabled(Configuration configuration) {
- return configuration.getBoolean(ENABLE_REGIONSIZECALCULATOR, true);
- }
-
- /**
- * Returns size of given region in bytes. Returns 0 if region was not found.
- * */
- public long getRegionSize(byte[] regionId) {
- Long size = sizeMap.get(regionId);
- if (size == null) {
- LOG.debug("Unknown region:" + Arrays.toString(regionId));
- return 0;
- } else {
- return size;
- }
- }
-
- public Map<byte[], Long> getRegionSizeMap() {
- return Collections.unmodifiableMap(sizeMap);
- }
-}
[35/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
new file mode 100644
index 0000000..c72a0c3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
@@ -0,0 +1,786 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Collections;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterators;
+
+public class SyncTable extends Configured implements Tool {
+
+ private static final Log LOG = LogFactory.getLog(SyncTable.class);
+
+ static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
+ static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
+ static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
+ static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
+ static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
+ static final String DRY_RUN_CONF_KEY="sync.table.dry.run";
+
+ Path sourceHashDir;
+ String sourceTableName;
+ String targetTableName;
+
+ String sourceZkCluster;
+ String targetZkCluster;
+ boolean dryRun;
+
+ Counters counters;
+
+ public SyncTable(Configuration conf) {
+ super(conf);
+ }
+
+ public Job createSubmittableJob(String[] args) throws IOException {
+ FileSystem fs = sourceHashDir.getFileSystem(getConf());
+ if (!fs.exists(sourceHashDir)) {
+ throw new IOException("Source hash dir not found: " + sourceHashDir);
+ }
+
+ HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
+ LOG.info("Read source hash manifest: " + tableHash);
+ LOG.info("Read " + tableHash.partitions.size() + " partition keys");
+ if (!tableHash.tableName.equals(sourceTableName)) {
+ LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
+ + tableHash.tableName + " but job is reading from: " + sourceTableName);
+ }
+ if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
+ throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ + " should be 1 more than the number of partition keys. However, the manifest file "
+ + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
+ + " found in the partitions file is " + tableHash.partitions.size());
+ }
+
+ Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
+ int dataSubdirCount = 0;
+ for (FileStatus file : fs.listStatus(dataDir)) {
+ if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
+ dataSubdirCount++;
+ }
+ }
+
+ if (dataSubdirCount != tableHash.numHashFiles) {
+ throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ + " should be 1 more than the number of partition keys. However, the number of data dirs"
+ + " found is " + dataSubdirCount + " but the number of partition keys"
+ + " found in the partitions file is " + tableHash.partitions.size());
+ }
+
+ Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
+ "syncTable_" + sourceTableName + "-" + targetTableName));
+ Configuration jobConf = job.getConfiguration();
+ job.setJarByClass(HashTable.class);
+ jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
+ jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
+ jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
+ if (sourceZkCluster != null) {
+ jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
+ }
+ if (targetZkCluster != null) {
+ jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
+ }
+ jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
+
+ TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
+ SyncMapper.class, null, null, job);
+
+ job.setNumReduceTasks(0);
+
+ if (dryRun) {
+ job.setOutputFormatClass(NullOutputFormat.class);
+ } else {
+ // No reducers. Just write straight to table. Call initTableReducerJob
+ // because it sets up the TableOutputFormat.
+ TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
+ targetZkCluster, null, null);
+
+ // would be nice to add an option for bulk load instead
+ }
+
+ // Obtain an authentication token, for the specified cluster, on behalf of the current user
+ if (sourceZkCluster != null) {
+ Configuration peerConf =
+ HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
+ TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
+ }
+ return job;
+ }
+
+ public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
+ Path sourceHashDir;
+
+ Connection sourceConnection;
+ Connection targetConnection;
+ Table sourceTable;
+ Table targetTable;
+ boolean dryRun;
+
+ HashTable.TableHash sourceTableHash;
+ HashTable.TableHash.Reader sourceHashReader;
+ ImmutableBytesWritable currentSourceHash;
+ ImmutableBytesWritable nextSourceKey;
+ HashTable.ResultHasher targetHasher;
+
+ Throwable mapperException;
+
+ public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
+ SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
+ MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
+
+ @Override
+ protected void setup(Context context) throws IOException {
+
+ Configuration conf = context.getConfiguration();
+ sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
+ sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
+ targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
+ TableOutputFormat.OUTPUT_CONF_PREFIX);
+ sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
+ targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
+ dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false);
+
+ sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
+ LOG.info("Read source hash manifest: " + sourceTableHash);
+ LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
+
+ TableSplit split = (TableSplit) context.getInputSplit();
+ ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
+
+ sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
+ findNextKeyHashPair();
+
+ // create a hasher, but don't start it right away
+ // instead, find the first hash batch at or after the start row
+ // and skip any rows that come before. they will be caught by the previous task
+ targetHasher = new HashTable.ResultHasher();
+ }
+
+ private static Connection openConnection(Configuration conf, String zkClusterConfKey,
+ String configPrefix)
+ throws IOException {
+ String zkCluster = conf.get(zkClusterConfKey);
+ Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
+ zkCluster, configPrefix);
+ return ConnectionFactory.createConnection(clusterConf);
+ }
+
+ private static Table openTable(Connection connection, Configuration conf,
+ String tableNameConfKey) throws IOException {
+ return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
+ }
+
+ /**
+ * Attempt to read the next source key/hash pair.
+ * If there are no more, set nextSourceKey to null
+ */
+ private void findNextKeyHashPair() throws IOException {
+ boolean hasNext = sourceHashReader.next();
+ if (hasNext) {
+ nextSourceKey = sourceHashReader.getCurrentKey();
+ } else {
+ // no more keys - last hash goes to the end
+ nextSourceKey = null;
+ }
+ }
+
+ @Override
+ protected void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ try {
+ // first, finish any hash batches that end before the scanned row
+ while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
+ moveToNextBatch(context);
+ }
+
+ // next, add the scanned row (as long as we've reached the first batch)
+ if (targetHasher.isBatchStarted()) {
+ targetHasher.hashResult(value);
+ }
+ } catch (Throwable t) {
+ mapperException = t;
+ Throwables.propagateIfInstanceOf(t, IOException.class);
+ Throwables.propagateIfInstanceOf(t, InterruptedException.class);
+ Throwables.propagate(t);
+ }
+ }
+
+ /**
+ * If there is an open hash batch, complete it and sync if there are diffs.
+ * Start a new batch, and seek to read the
+ */
+ private void moveToNextBatch(Context context) throws IOException, InterruptedException {
+ if (targetHasher.isBatchStarted()) {
+ finishBatchAndCompareHashes(context);
+ }
+ targetHasher.startBatch(nextSourceKey);
+ currentSourceHash = sourceHashReader.getCurrentHash();
+
+ findNextKeyHashPair();
+ }
+
+ /**
+ * Finish the currently open hash batch.
+ * Compare the target hash to the given source hash.
+ * If they do not match, then sync the covered key range.
+ */
+ private void finishBatchAndCompareHashes(Context context)
+ throws IOException, InterruptedException {
+ targetHasher.finishBatch();
+ context.getCounter(Counter.BATCHES).increment(1);
+ if (targetHasher.getBatchSize() == 0) {
+ context.getCounter(Counter.EMPTY_BATCHES).increment(1);
+ }
+ ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
+ if (targetHash.equals(currentSourceHash)) {
+ context.getCounter(Counter.HASHES_MATCHED).increment(1);
+ } else {
+ context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
+
+ ImmutableBytesWritable stopRow = nextSourceKey == null
+ ? new ImmutableBytesWritable(sourceTableHash.stopRow)
+ : nextSourceKey;
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey())
+ + " to " + toHex(stopRow)
+ + " sourceHash: " + toHex(currentSourceHash)
+ + " targetHash: " + toHex(targetHash));
+ }
+
+ syncRange(context, targetHasher.getBatchStartKey(), stopRow);
+ }
+ }
+ private static String toHex(ImmutableBytesWritable bytes) {
+ return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
+ }
+
+ private static final CellScanner EMPTY_CELL_SCANNER
+ = new CellScanner(Collections.<Result>emptyIterator());
+
+ /**
+ * Rescan the given range directly from the source and target tables.
+ * Count and log differences, and if this is not a dry run, output Puts and Deletes
+ * to make the target table match the source table for this range
+ */
+ private void syncRange(Context context, ImmutableBytesWritable startRow,
+ ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
+ Scan scan = sourceTableHash.initScan();
+ scan.setStartRow(startRow.copyBytes());
+ scan.setStopRow(stopRow.copyBytes());
+
+ ResultScanner sourceScanner = sourceTable.getScanner(scan);
+ CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
+
+ ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
+ CellScanner targetCells = new CellScanner(targetScanner.iterator());
+
+ boolean rangeMatched = true;
+ byte[] nextSourceRow = sourceCells.nextRow();
+ byte[] nextTargetRow = targetCells.nextRow();
+ while(nextSourceRow != null || nextTargetRow != null) {
+ boolean rowMatched;
+ int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
+ if (rowComparison < 0) {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
+ }
+ context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
+
+ rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
+ nextSourceRow = sourceCells.nextRow(); // advance only source to next row
+ } else if (rowComparison > 0) {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
+ }
+ context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
+
+ rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
+ nextTargetRow = targetCells.nextRow(); // advance only target to next row
+ } else {
+ // current row is the same on both sides, compare cell by cell
+ rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
+ nextSourceRow = sourceCells.nextRow();
+ nextTargetRow = targetCells.nextRow();
+ }
+
+ if (!rowMatched) {
+ rangeMatched = false;
+ }
+ }
+
+ sourceScanner.close();
+ targetScanner.close();
+
+ context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
+ .increment(1);
+ }
+
+ private static class CellScanner {
+ private final Iterator<Result> results;
+
+ private byte[] currentRow;
+ private Result currentRowResult;
+ private int nextCellInRow;
+
+ private Result nextRowResult;
+
+ public CellScanner(Iterator<Result> results) {
+ this.results = results;
+ }
+
+ /**
+ * Advance to the next row and return its row key.
+ * Returns null iff there are no more rows.
+ */
+ public byte[] nextRow() {
+ if (nextRowResult == null) {
+ // no cached row - check scanner for more
+ while (results.hasNext()) {
+ nextRowResult = results.next();
+ Cell nextCell = nextRowResult.rawCells()[0];
+ if (currentRow == null
+ || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
+ nextCell.getRowOffset(), nextCell.getRowLength())) {
+ // found next row
+ break;
+ } else {
+ // found another result from current row, keep scanning
+ nextRowResult = null;
+ }
+ }
+
+ if (nextRowResult == null) {
+ // end of data, no more rows
+ currentRowResult = null;
+ currentRow = null;
+ return null;
+ }
+ }
+
+ // advance to cached result for next row
+ currentRowResult = nextRowResult;
+ nextCellInRow = 0;
+ currentRow = currentRowResult.getRow();
+ nextRowResult = null;
+ return currentRow;
+ }
+
+ /**
+ * Returns the next Cell in the current row or null iff none remain.
+ */
+ public Cell nextCellInRow() {
+ if (currentRowResult == null) {
+ // nothing left in current row
+ return null;
+ }
+
+ Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
+ nextCellInRow++;
+ if (nextCellInRow == currentRowResult.size()) {
+ if (results.hasNext()) {
+ Result result = results.next();
+ Cell cell = result.rawCells()[0];
+ if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
+ cell.getRowOffset(), cell.getRowLength())) {
+ // result is part of current row
+ currentRowResult = result;
+ nextCellInRow = 0;
+ } else {
+ // result is part of next row, cache it
+ nextRowResult = result;
+ // current row is complete
+ currentRowResult = null;
+ }
+ } else {
+ // end of data
+ currentRowResult = null;
+ }
+ }
+ return nextCell;
+ }
+ }
+
+ /**
+ * Compare the cells for the given row from the source and target tables.
+ * Count and log any differences.
+ * If not a dry run, output a Put and/or Delete needed to sync the target table
+ * to match the source table.
+ */
+ private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
+ CellScanner targetCells) throws IOException, InterruptedException {
+ Put put = null;
+ Delete delete = null;
+ long matchingCells = 0;
+ boolean matchingRow = true;
+ Cell sourceCell = sourceCells.nextCellInRow();
+ Cell targetCell = targetCells.nextCellInRow();
+ while (sourceCell != null || targetCell != null) {
+
+ int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
+ if (cellKeyComparison < 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Target missing cell: " + sourceCell);
+ }
+ context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
+ matchingRow = false;
+
+ if (!dryRun) {
+ if (put == null) {
+ put = new Put(rowKey);
+ }
+ put.add(sourceCell);
+ }
+
+ sourceCell = sourceCells.nextCellInRow();
+ } else if (cellKeyComparison > 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Source missing cell: " + targetCell);
+ }
+ context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
+ matchingRow = false;
+
+ if (!dryRun) {
+ if (delete == null) {
+ delete = new Delete(rowKey);
+ }
+ // add a tombstone to exactly match the target cell that is missing on the source
+ delete.addColumn(CellUtil.cloneFamily(targetCell),
+ CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
+ }
+
+ targetCell = targetCells.nextCellInRow();
+ } else {
+ // the cell keys are equal, now check values
+ if (CellUtil.matchingValue(sourceCell, targetCell)) {
+ matchingCells++;
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Different values: ");
+ LOG.debug(" source cell: " + sourceCell
+ + " value: " + Bytes.toHex(sourceCell.getValueArray(),
+ sourceCell.getValueOffset(), sourceCell.getValueLength()));
+ LOG.debug(" target cell: " + targetCell
+ + " value: " + Bytes.toHex(targetCell.getValueArray(),
+ targetCell.getValueOffset(), targetCell.getValueLength()));
+ }
+ context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
+ matchingRow = false;
+
+ if (!dryRun) {
+ // overwrite target cell
+ if (put == null) {
+ put = new Put(rowKey);
+ }
+ put.add(sourceCell);
+ }
+ }
+ sourceCell = sourceCells.nextCellInRow();
+ targetCell = targetCells.nextCellInRow();
+ }
+
+ if (!dryRun && sourceTableHash.scanBatch > 0) {
+ if (put != null && put.size() >= sourceTableHash.scanBatch) {
+ context.write(new ImmutableBytesWritable(rowKey), put);
+ put = null;
+ }
+ if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
+ context.write(new ImmutableBytesWritable(rowKey), delete);
+ delete = null;
+ }
+ }
+ }
+
+ if (!dryRun) {
+ if (put != null) {
+ context.write(new ImmutableBytesWritable(rowKey), put);
+ }
+ if (delete != null) {
+ context.write(new ImmutableBytesWritable(rowKey), delete);
+ }
+ }
+
+ if (matchingCells > 0) {
+ context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
+ }
+ if (matchingRow) {
+ context.getCounter(Counter.MATCHINGROWS).increment(1);
+ return true;
+ } else {
+ context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
+ return false;
+ }
+ }
+
+ /**
+ * Compare row keys of the given Result objects.
+ * Nulls are after non-nulls
+ */
+ private static int compareRowKeys(byte[] r1, byte[] r2) {
+ if (r1 == null) {
+ return 1; // source missing row
+ } else if (r2 == null) {
+ return -1; // target missing row
+ } else {
+ // Sync on no META tables only. We can directly do what CellComparator is doing inside.
+ // Never the call going to MetaCellComparator.
+ return Bytes.compareTo(r1, 0, r1.length, r2, 0, r2.length);
+ }
+ }
+
+ /**
+ * Compare families, qualifiers, and timestamps of the given Cells.
+ * They are assumed to be of the same row.
+ * Nulls are after non-nulls.
+ */
+ private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
+ if (c1 == null) {
+ return 1; // source missing cell
+ }
+ if (c2 == null) {
+ return -1; // target missing cell
+ }
+
+ int result = CellComparator.compareFamilies(c1, c2);
+ if (result != 0) {
+ return result;
+ }
+
+ result = CellComparator.compareQualifiers(c1, c2);
+ if (result != 0) {
+ return result;
+ }
+
+ // note timestamp comparison is inverted - more recent cells first
+ return CellComparator.compareTimestamps(c1, c2);
+ }
+
+ @Override
+ protected void cleanup(Context context)
+ throws IOException, InterruptedException {
+ if (mapperException == null) {
+ try {
+ finishRemainingHashRanges(context);
+ } catch (Throwable t) {
+ mapperException = t;
+ }
+ }
+
+ try {
+ sourceTable.close();
+ targetTable.close();
+ sourceConnection.close();
+ targetConnection.close();
+ } catch (Throwable t) {
+ if (mapperException == null) {
+ mapperException = t;
+ } else {
+ LOG.error("Suppressing exception from closing tables", t);
+ }
+ }
+
+ // propagate first exception
+ if (mapperException != null) {
+ Throwables.propagateIfInstanceOf(mapperException, IOException.class);
+ Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
+ Throwables.propagate(mapperException);
+ }
+ }
+
+ private void finishRemainingHashRanges(Context context) throws IOException,
+ InterruptedException {
+ TableSplit split = (TableSplit) context.getInputSplit();
+ byte[] splitEndRow = split.getEndRow();
+ boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
+
+ // if there are more hash batches that begin before the end of this split move to them
+ while (nextSourceKey != null
+ && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
+ moveToNextBatch(context);
+ }
+
+ if (targetHasher.isBatchStarted()) {
+ // need to complete the final open hash batch
+
+ if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
+ || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
+ // the open hash range continues past the end of this region
+ // add a scan to complete the current hash range
+ Scan scan = sourceTableHash.initScan();
+ scan.setStartRow(splitEndRow);
+ if (nextSourceKey == null) {
+ scan.setStopRow(sourceTableHash.stopRow);
+ } else {
+ scan.setStopRow(nextSourceKey.copyBytes());
+ }
+
+ ResultScanner targetScanner = null;
+ try {
+ targetScanner = targetTable.getScanner(scan);
+ for (Result row : targetScanner) {
+ targetHasher.hashResult(row);
+ }
+ } finally {
+ if (targetScanner != null) {
+ targetScanner.close();
+ }
+ }
+ } // else current batch ends exactly at split end row
+
+ finishBatchAndCompareHashes(context);
+ }
+ }
+ }
+
+ private static final int NUM_ARGS = 3;
+ private static void printUsage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ System.err.println();
+ }
+ System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
+ System.err.println();
+ System.err.println("Options:");
+
+ System.err.println(" sourcezkcluster ZK cluster key of the source table");
+ System.err.println(" (defaults to cluster in classpath's config)");
+ System.err.println(" targetzkcluster ZK cluster key of the target table");
+ System.err.println(" (defaults to cluster in classpath's config)");
+ System.err.println(" dryrun if true, output counters but no writes");
+ System.err.println(" (defaults to false)");
+ System.err.println();
+ System.err.println("Args:");
+ System.err.println(" sourcehashdir path to HashTable output dir for source table");
+ System.err.println(" (see org.apache.hadoop.hbase.mapreduce.HashTable)");
+ System.err.println(" sourcetable Name of the source table to sync from");
+ System.err.println(" targettable Name of the target table to sync to");
+ System.err.println();
+ System.err.println("Examples:");
+ System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
+ System.err.println(" to a local target cluster:");
+ System.err.println(" $ hbase " +
+ "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
+ + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
+ + " hdfs://nn:9000/hashes/tableA tableA tableA");
+ }
+
+ private boolean doCommandLine(final String[] args) {
+ if (args.length < NUM_ARGS) {
+ printUsage(null);
+ return false;
+ }
+ try {
+ sourceHashDir = new Path(args[args.length - 3]);
+ sourceTableName = args[args.length - 2];
+ targetTableName = args[args.length - 1];
+
+ for (int i = 0; i < args.length - NUM_ARGS; i++) {
+ String cmd = args[i];
+ if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ printUsage(null);
+ return false;
+ }
+
+ final String sourceZkClusterKey = "--sourcezkcluster=";
+ if (cmd.startsWith(sourceZkClusterKey)) {
+ sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
+ continue;
+ }
+
+ final String targetZkClusterKey = "--targetzkcluster=";
+ if (cmd.startsWith(targetZkClusterKey)) {
+ targetZkCluster = cmd.substring(targetZkClusterKey.length());
+ continue;
+ }
+
+ final String dryRunKey = "--dryrun=";
+ if (cmd.startsWith(dryRunKey)) {
+ dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
+ continue;
+ }
+
+ printUsage("Invalid argument '" + cmd + "'");
+ return false;
+ }
+
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ printUsage("Can't start because " + e.getMessage());
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Main entry point.
+ */
+ public static void main(String[] args) throws Exception {
+ int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
+ System.exit(ret);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
+ if (!doCommandLine(otherArgs)) {
+ return 1;
+ }
+
+ Job job = createSubmittableJob(otherArgs);
+ if (!job.waitForCompletion(true)) {
+ LOG.info("Map-reduce job failed!");
+ return 1;
+ }
+ counters = job.getCounters();
+ return 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
new file mode 100644
index 0000000..63868da
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
@@ -0,0 +1,294 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Convert HBase tabular data into a format that is consumable by Map/Reduce.
+ */
+@InterfaceAudience.Public
+public class TableInputFormat extends TableInputFormatBase
+implements Configurable {
+
+ @SuppressWarnings("hiding")
+ private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
+
+ /** Job parameter that specifies the input table. */
+ public static final String INPUT_TABLE = "hbase.mapreduce.inputtable";
+ /**
+ * If specified, use start keys of this table to split.
+ * This is useful when you are preparing data for bulkload.
+ */
+ private static final String SPLIT_TABLE = "hbase.mapreduce.splittable";
+ /** Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
+ * See {@link TableMapReduceUtil#convertScanToString(Scan)} for more details.
+ */
+ public static final String SCAN = "hbase.mapreduce.scan";
+ /** Scan start row */
+ public static final String SCAN_ROW_START = "hbase.mapreduce.scan.row.start";
+ /** Scan stop row */
+ public static final String SCAN_ROW_STOP = "hbase.mapreduce.scan.row.stop";
+ /** Column Family to Scan */
+ public static final String SCAN_COLUMN_FAMILY = "hbase.mapreduce.scan.column.family";
+ /** Space delimited list of columns and column families to scan. */
+ public static final String SCAN_COLUMNS = "hbase.mapreduce.scan.columns";
+ /** The timestamp used to filter columns with a specific timestamp. */
+ public static final String SCAN_TIMESTAMP = "hbase.mapreduce.scan.timestamp";
+ /** The starting timestamp used to filter columns with a specific range of versions. */
+ public static final String SCAN_TIMERANGE_START = "hbase.mapreduce.scan.timerange.start";
+ /** The ending timestamp used to filter columns with a specific range of versions. */
+ public static final String SCAN_TIMERANGE_END = "hbase.mapreduce.scan.timerange.end";
+ /** The maximum number of version to return. */
+ public static final String SCAN_MAXVERSIONS = "hbase.mapreduce.scan.maxversions";
+ /** Set to false to disable server-side caching of blocks for this scan. */
+ public static final String SCAN_CACHEBLOCKS = "hbase.mapreduce.scan.cacheblocks";
+ /** The number of rows for caching that will be passed to scanners. */
+ public static final String SCAN_CACHEDROWS = "hbase.mapreduce.scan.cachedrows";
+ /** Set the maximum number of values to return for each call to next(). */
+ public static final String SCAN_BATCHSIZE = "hbase.mapreduce.scan.batchsize";
+ /** Specify if we have to shuffle the map tasks. */
+ public static final String SHUFFLE_MAPS = "hbase.mapreduce.inputtable.shufflemaps";
+
+ /** The configuration. */
+ private Configuration conf = null;
+
+ /**
+ * Returns the current configuration.
+ *
+ * @return The current configuration.
+ * @see org.apache.hadoop.conf.Configurable#getConf()
+ */
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Sets the configuration. This is used to set the details for the table to
+ * be scanned.
+ *
+ * @param configuration The configuration to set.
+ * @see org.apache.hadoop.conf.Configurable#setConf(
+ * org.apache.hadoop.conf.Configuration)
+ */
+ @Override
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
+ justification="Intentional")
+ public void setConf(Configuration configuration) {
+ this.conf = configuration;
+
+ Scan scan = null;
+
+ if (conf.get(SCAN) != null) {
+ try {
+ scan = TableMapReduceUtil.convertStringToScan(conf.get(SCAN));
+ } catch (IOException e) {
+ LOG.error("An error occurred.", e);
+ }
+ } else {
+ try {
+ scan = createScanFromConfiguration(conf);
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ }
+ }
+
+ setScan(scan);
+ }
+
+ /**
+ * Sets up a {@link Scan} instance, applying settings from the configuration property
+ * constants defined in {@code TableInputFormat}. This allows specifying things such as:
+ * <ul>
+ * <li>start and stop rows</li>
+ * <li>column qualifiers or families</li>
+ * <li>timestamps or timerange</li>
+ * <li>scanner caching and batch size</li>
+ * </ul>
+ */
+ public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
+ Scan scan = new Scan();
+
+ if (conf.get(SCAN_ROW_START) != null) {
+ scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
+ }
+
+ if (conf.get(SCAN_ROW_STOP) != null) {
+ scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
+ }
+
+ if (conf.get(SCAN_COLUMNS) != null) {
+ addColumns(scan, conf.get(SCAN_COLUMNS));
+ }
+
+ for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
+ scan.addFamily(Bytes.toBytes(columnFamily));
+ }
+
+ if (conf.get(SCAN_TIMESTAMP) != null) {
+ scan.setTimeStamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
+ }
+
+ if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
+ scan.setTimeRange(
+ Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
+ Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
+ }
+
+ if (conf.get(SCAN_MAXVERSIONS) != null) {
+ scan.setMaxVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
+ }
+
+ if (conf.get(SCAN_CACHEDROWS) != null) {
+ scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
+ }
+
+ if (conf.get(SCAN_BATCHSIZE) != null) {
+ scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
+ }
+
+ // false by default, full table scans generate too much BC churn
+ scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));
+
+ return scan;
+ }
+
+ @Override
+ protected void initialize(JobContext context) throws IOException {
+ // Do we have to worry about mis-matches between the Configuration from setConf and the one
+ // in this context?
+ TableName tableName = TableName.valueOf(conf.get(INPUT_TABLE));
+ try {
+ initializeTable(ConnectionFactory.createConnection(new Configuration(conf)), tableName);
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ }
+ }
+
+ /**
+ * Parses a combined family and qualifier and adds either both or just the
+ * family in case there is no qualifier. This assumes the older colon
+ * divided notation, e.g. "family:qualifier".
+ *
+ * @param scan The Scan to update.
+ * @param familyAndQualifier family and qualifier
+ * @throws IllegalArgumentException When familyAndQualifier is invalid.
+ */
+ private static void addColumn(Scan scan, byte[] familyAndQualifier) {
+ byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
+ if (fq.length == 1) {
+ scan.addFamily(fq[0]);
+ } else if (fq.length == 2) {
+ scan.addColumn(fq[0], fq[1]);
+ } else {
+ throw new IllegalArgumentException("Invalid familyAndQualifier provided.");
+ }
+ }
+
+ /**
+ * Adds an array of columns specified using old format, family:qualifier.
+ * <p>
+ * Overrides previous calls to {@link Scan#addColumn(byte[], byte[])}for any families in the
+ * input.
+ *
+ * @param scan The Scan to update.
+ * @param columns array of columns, formatted as <code>family:qualifier</code>
+ * @see Scan#addColumn(byte[], byte[])
+ */
+ public static void addColumns(Scan scan, byte [][] columns) {
+ for (byte[] column : columns) {
+ addColumn(scan, column);
+ }
+ }
+
+ /**
+ * Calculates the splits that will serve as input for the map tasks. The
+ * number of splits matches the number of regions in a table. Splits are shuffled if
+ * required.
+ * @param context The current job context.
+ * @return The list of input splits.
+ * @throws IOException When creating the list of splits fails.
+ * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+ * org.apache.hadoop.mapreduce.JobContext)
+ */
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ List<InputSplit> splits = super.getSplits(context);
+ if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) {
+ Collections.shuffle(splits);
+ }
+ return splits;
+ }
+
+ /**
+ * Convenience method to parse a string representation of an array of column specifiers.
+ *
+ * @param scan The Scan to update.
+ * @param columns The columns to parse.
+ */
+ private static void addColumns(Scan scan, String columns) {
+ String[] cols = columns.split(" ");
+ for (String col : cols) {
+ addColumn(scan, Bytes.toBytes(col));
+ }
+ }
+
+ @Override
+ protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
+ if (conf.get(SPLIT_TABLE) != null) {
+ TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
+ try (Connection conn = ConnectionFactory.createConnection(getConf())) {
+ try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
+ return rl.getStartEndKeys();
+ }
+ }
+ }
+
+ return super.getStartEndKeys();
+ }
+
+ /**
+ * Sets split table in map-reduce job.
+ */
+ public static void configureSplitTable(Job job, TableName tableName) {
+ job.getConfiguration().set(SPLIT_TABLE, tableName.getNameAsString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
new file mode 100644
index 0000000..fb38ebe
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -0,0 +1,652 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Addressing;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Strings;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName},
+ * an {@link Scan} instance that defines the input columns etc. Subclasses may use
+ * other TableRecordReader implementations.
+ *
+ * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
+ * function properly. Each of the entry points to this class used by the MapReduce framework,
+ * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
+ * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information. If your subclass overrides either of these
+ * methods, either call the parent version or call initialize yourself.
+ *
+ * <p>
+ * An example of a subclass:
+ * <pre>
+ * class ExampleTIF extends TableInputFormatBase {
+ *
+ * {@literal @}Override
+ * protected void initialize(JobContext context) throws IOException {
+ * // We are responsible for the lifecycle of this connection until we hand it over in
+ * // initializeTable.
+ * Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
+ * job.getConfiguration()));
+ * TableName tableName = TableName.valueOf("exampleTable");
+ * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
+ * initializeTable(connection, tableName);
+ * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ * Bytes.toBytes("columnB") };
+ * // optional, by default we'll get everything for the table.
+ * Scan scan = new Scan();
+ * for (byte[] family : inputColumns) {
+ * scan.addFamily(family);
+ * }
+ * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ * scan.setFilter(exampleFilter);
+ * setScan(scan);
+ * }
+ * }
+ * </pre>
+ */
+@InterfaceAudience.Public
+public abstract class TableInputFormatBase
+extends InputFormat<ImmutableBytesWritable, Result> {
+
+ /** Specify if we enable auto-balance for input in M/R jobs.*/
+ public static final String MAPREDUCE_INPUT_AUTOBALANCE = "hbase.mapreduce.input.autobalance";
+ /** Specify if ratio for data skew in M/R jobs, it goes well with the enabling hbase.mapreduce
+ * .input.autobalance property.*/
+ public static final String INPUT_AUTOBALANCE_MAXSKEWRATIO = "hbase.mapreduce.input.autobalance" +
+ ".maxskewratio";
+ /** Specify if the row key in table is text (ASCII between 32~126),
+ * default is true. False means the table is using binary row key*/
+ public static final String TABLE_ROW_TEXTKEY = "hbase.table.row.textkey";
+
+ private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
+
+ private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
+ "initialized. Ensure you call initializeTable either in your constructor or initialize " +
+ "method";
+ private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
+ " previous error. Please look at the previous logs lines from" +
+ " the task's full log for more details.";
+
+ /** Holds the details for the internal scanner.
+ *
+ * @see Scan */
+ private Scan scan = null;
+ /** The {@link Admin}. */
+ private Admin admin;
+ /** The {@link Table} to scan. */
+ private Table table;
+ /** The {@link RegionLocator} of the table. */
+ private RegionLocator regionLocator;
+ /** The reader scanning the table, can be a custom one. */
+ private TableRecordReader tableRecordReader = null;
+ /** The underlying {@link Connection} of the table. */
+ private Connection connection;
+
+
+ /** The reverse DNS lookup cache mapping: IPAddress => HostName */
+ private HashMap<InetAddress, String> reverseDNSCacheMap = new HashMap<>();
+
+ /**
+ * Builds a {@link TableRecordReader}. If no {@link TableRecordReader} was provided, uses
+ * the default.
+ *
+ * @param split The split to work with.
+ * @param context The current context.
+ * @return The newly created record reader.
+ * @throws IOException When creating the reader fails.
+ * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
+ * org.apache.hadoop.mapreduce.InputSplit,
+ * org.apache.hadoop.mapreduce.TaskAttemptContext)
+ */
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+ InputSplit split, TaskAttemptContext context)
+ throws IOException {
+ // Just in case a subclass is relying on JobConfigurable magic.
+ if (table == null) {
+ initialize(context);
+ }
+ // null check in case our child overrides getTable to not throw.
+ try {
+ if (getTable() == null) {
+ // initialize() must not have been implemented in the subclass.
+ throw new IOException(INITIALIZATION_ERROR);
+ }
+ } catch (IllegalStateException exception) {
+ throw new IOException(INITIALIZATION_ERROR, exception);
+ }
+ TableSplit tSplit = (TableSplit) split;
+ LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
+ final TableRecordReader trr =
+ this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
+ Scan sc = new Scan(this.scan);
+ sc.setStartRow(tSplit.getStartRow());
+ sc.setStopRow(tSplit.getEndRow());
+ trr.setScan(sc);
+ trr.setTable(getTable());
+ return new RecordReader<ImmutableBytesWritable, Result>() {
+
+ @Override
+ public void close() throws IOException {
+ trr.close();
+ closeTable();
+ }
+
+ @Override
+ public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+ return trr.getCurrentKey();
+ }
+
+ @Override
+ public Result getCurrentValue() throws IOException, InterruptedException {
+ return trr.getCurrentValue();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return trr.getProgress();
+ }
+
+ @Override
+ public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ trr.initialize(inputsplit, context);
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ return trr.nextKeyValue();
+ }
+ };
+ }
+
+ protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
+ return getRegionLocator().getStartEndKeys();
+ }
+
+ /**
+ * Calculates the splits that will serve as input for the map tasks. The
+ * number of splits matches the number of regions in a table.
+ *
+ * @param context The current job context.
+ * @return The list of input splits.
+ * @throws IOException When creating the list of splits fails.
+ * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+ * org.apache.hadoop.mapreduce.JobContext)
+ */
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ boolean closeOnFinish = false;
+
+ // Just in case a subclass is relying on JobConfigurable magic.
+ if (table == null) {
+ initialize(context);
+ closeOnFinish = true;
+ }
+
+ // null check in case our child overrides getTable to not throw.
+ try {
+ if (getTable() == null) {
+ // initialize() must not have been implemented in the subclass.
+ throw new IOException(INITIALIZATION_ERROR);
+ }
+ } catch (IllegalStateException exception) {
+ throw new IOException(INITIALIZATION_ERROR, exception);
+ }
+
+ try {
+ RegionSizeCalculator sizeCalculator =
+ new RegionSizeCalculator(getRegionLocator(), getAdmin());
+
+ TableName tableName = getTable().getName();
+
+ Pair<byte[][], byte[][]> keys = getStartEndKeys();
+ if (keys == null || keys.getFirst() == null ||
+ keys.getFirst().length == 0) {
+ HRegionLocation regLoc =
+ getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
+ if (null == regLoc) {
+ throw new IOException("Expecting at least one region.");
+ }
+ List<InputSplit> splits = new ArrayList<>(1);
+ long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
+ TableSplit split = new TableSplit(tableName, scan,
+ HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
+ .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
+ splits.add(split);
+ return splits;
+ }
+ List<InputSplit> splits = new ArrayList<>(keys.getFirst().length);
+ for (int i = 0; i < keys.getFirst().length; i++) {
+ if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+ continue;
+ }
+
+ byte[] startRow = scan.getStartRow();
+ byte[] stopRow = scan.getStopRow();
+ // determine if the given start an stop key fall into the region
+ if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+ Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+ (stopRow.length == 0 ||
+ Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
+ byte[] splitStart = startRow.length == 0 ||
+ Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
+ keys.getFirst()[i] : startRow;
+ byte[] splitStop = (stopRow.length == 0 ||
+ Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
+ keys.getSecond()[i].length > 0 ?
+ keys.getSecond()[i] : stopRow;
+
+ HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
+ // The below InetSocketAddress creation does a name resolution.
+ InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
+ if (isa.isUnresolved()) {
+ LOG.warn("Failed resolve " + isa);
+ }
+ InetAddress regionAddress = isa.getAddress();
+ String regionLocation;
+ regionLocation = reverseDNS(regionAddress);
+
+ byte[] regionName = location.getRegionInfo().getRegionName();
+ String encodedRegionName = location.getRegionInfo().getEncodedName();
+ long regionSize = sizeCalculator.getRegionSize(regionName);
+ TableSplit split = new TableSplit(tableName, scan,
+ splitStart, splitStop, regionLocation, encodedRegionName, regionSize);
+ splits.add(split);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getSplits: split -> " + i + " -> " + split);
+ }
+ }
+ }
+ //The default value of "hbase.mapreduce.input.autobalance" is false, which means not enabled.
+ boolean enableAutoBalance = context.getConfiguration()
+ .getBoolean(MAPREDUCE_INPUT_AUTOBALANCE, false);
+ if (enableAutoBalance) {
+ long totalRegionSize=0;
+ for (int i = 0; i < splits.size(); i++){
+ TableSplit ts = (TableSplit)splits.get(i);
+ totalRegionSize += ts.getLength();
+ }
+ long averageRegionSize = totalRegionSize / splits.size();
+ // the averageRegionSize must be positive.
+ if (averageRegionSize <= 0) {
+ LOG.warn("The averageRegionSize is not positive: "+ averageRegionSize + ", " +
+ "set it to 1.");
+ averageRegionSize = 1;
+ }
+ return calculateRebalancedSplits(splits, context, averageRegionSize);
+ } else {
+ return splits;
+ }
+ } finally {
+ if (closeOnFinish) {
+ closeTable();
+ }
+ }
+ }
+
+ String reverseDNS(InetAddress ipAddress) throws UnknownHostException {
+ String hostName = this.reverseDNSCacheMap.get(ipAddress);
+ if (hostName == null) {
+ String ipAddressString = null;
+ try {
+ ipAddressString = DNS.reverseDns(ipAddress, null);
+ } catch (Exception e) {
+ // We can use InetAddress in case the jndi failed to pull up the reverse DNS entry from the
+ // name service. Also, in case of ipv6, we need to use the InetAddress since resolving
+ // reverse DNS using jndi doesn't work well with ipv6 addresses.
+ ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
+ }
+ if (ipAddressString == null) throw new UnknownHostException("No host found for " + ipAddress);
+ hostName = Strings.domainNamePointerToHostName(ipAddressString);
+ this.reverseDNSCacheMap.put(ipAddress, hostName);
+ }
+ return hostName;
+ }
+
+ /**
+ * Calculates the number of MapReduce input splits for the map tasks. The number of
+ * MapReduce input splits depends on the average region size and the "data skew ratio" user set in
+ * configuration.
+ *
+ * @param list The list of input splits before balance.
+ * @param context The current job context.
+ * @param average The average size of all regions .
+ * @return The list of input splits.
+ * @throws IOException When creating the list of splits fails.
+ * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
+ * org.apache.hadoop.mapreduce.JobContext)
+ */
+ private List<InputSplit> calculateRebalancedSplits(List<InputSplit> list, JobContext context,
+ long average) throws IOException {
+ List<InputSplit> resultList = new ArrayList<>();
+ Configuration conf = context.getConfiguration();
+ //The default data skew ratio is 3
+ long dataSkewRatio = conf.getLong(INPUT_AUTOBALANCE_MAXSKEWRATIO, 3);
+ //It determines which mode to use: text key mode or binary key mode. The default is text mode.
+ boolean isTextKey = context.getConfiguration().getBoolean(TABLE_ROW_TEXTKEY, true);
+ long dataSkewThreshold = dataSkewRatio * average;
+ int count = 0;
+ while (count < list.size()) {
+ TableSplit ts = (TableSplit)list.get(count);
+ TableName tableName = ts.getTable();
+ String regionLocation = ts.getRegionLocation();
+ String encodedRegionName = ts.getEncodedRegionName();
+ long regionSize = ts.getLength();
+ if (regionSize >= dataSkewThreshold) {
+ // if the current region size is large than the data skew threshold,
+ // split the region into two MapReduce input splits.
+ byte[] splitKey = getSplitKey(ts.getStartRow(), ts.getEndRow(), isTextKey);
+ if (Arrays.equals(ts.getEndRow(), splitKey)) {
+ // Not splitting since the end key is the same as the split key
+ resultList.add(ts);
+ } else {
+ //Set the size of child TableSplit as 1/2 of the region size. The exact size of the
+ // MapReduce input splits is not far off.
+ TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), splitKey,
+ regionLocation, regionSize / 2);
+ TableSplit t2 = new TableSplit(tableName, scan, splitKey, ts.getEndRow(), regionLocation,
+ regionSize - regionSize / 2);
+ resultList.add(t1);
+ resultList.add(t2);
+ }
+ count++;
+ } else if (regionSize >= average) {
+ // if the region size between average size and data skew threshold size,
+ // make this region as one MapReduce input split.
+ resultList.add(ts);
+ count++;
+ } else {
+ // if the total size of several small continuous regions less than the average region size,
+ // combine them into one MapReduce input split.
+ long totalSize = regionSize;
+ byte[] splitStartKey = ts.getStartRow();
+ byte[] splitEndKey = ts.getEndRow();
+ count++;
+ for (; count < list.size(); count++) {
+ TableSplit nextRegion = (TableSplit)list.get(count);
+ long nextRegionSize = nextRegion.getLength();
+ if (totalSize + nextRegionSize <= dataSkewThreshold) {
+ totalSize = totalSize + nextRegionSize;
+ splitEndKey = nextRegion.getEndRow();
+ } else {
+ break;
+ }
+ }
+ TableSplit t = new TableSplit(tableName, scan, splitStartKey, splitEndKey,
+ regionLocation, encodedRegionName, totalSize);
+ resultList.add(t);
+ }
+ }
+ return resultList;
+ }
+
+ /**
+ * select a split point in the region. The selection of the split point is based on an uniform
+ * distribution assumption for the keys in a region.
+ * Here are some examples:
+ *
+ * <table>
+ * <tr>
+ * <th>start key</th>
+ * <th>end key</th>
+ * <th>is text</th>
+ * <th>split point</th>
+ * </tr>
+ * <tr>
+ * <td>'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'</td>
+ * <td>'a', 'a', 'a', 'f', 'f', 'f'</td>
+ * <td>true</td>
+ * <td>'a', 'a', 'a', 'd', 'd', -78, 50, -77, 51</td>
+ * </tr>
+ * <tr>
+ * <td>'1', '1', '1', '0', '0', '0'</td>
+ * <td>'1', '1', '2', '5', '7', '9', '0'</td>
+ * <td>true</td>
+ * <td>'1', '1', '1', -78, -77, -76, -104</td>
+ * </tr>
+ * <tr>
+ * <td>'1', '1', '1', '0'</td>
+ * <td>'1', '1', '2', '0'</td>
+ * <td>true</td>
+ * <td>'1', '1', '1', -80</td>
+ * </tr>
+ * <tr>
+ * <td>13, -19, 126, 127</td>
+ * <td>13, -19, 127, 0</td>
+ * <td>false</td>
+ * <td>13, -19, 126, -65</td>
+ * </tr>
+ * </table>
+ *
+ * Set this function as "public static", make it easier for test.
+ *
+ * @param start Start key of the region
+ * @param end End key of the region
+ * @param isText It determines to use text key mode or binary key mode
+ * @return The split point in the region.
+ */
+ @InterfaceAudience.Private
+ public static byte[] getSplitKey(byte[] start, byte[] end, boolean isText) {
+ byte upperLimitByte;
+ byte lowerLimitByte;
+ //Use text mode or binary mode.
+ if (isText) {
+ //The range of text char set in ASCII is [32,126], the lower limit is space and the upper
+ // limit is '~'.
+ upperLimitByte = '~';
+ lowerLimitByte = ' ';
+ } else {
+ upperLimitByte = -1;
+ lowerLimitByte = 0;
+ }
+ // For special case
+ // Example 1 : startkey=null, endkey="hhhqqqwww", splitKey="h"
+ // Example 2 (text key mode): startKey="ffffaaa", endKey=null, splitkey="f~~~~~~"
+ if (start.length == 0 && end.length == 0){
+ return new byte[]{(byte) ((lowerLimitByte + upperLimitByte) / 2)};
+ }
+ if (start.length == 0 && end.length != 0){
+ return new byte[]{ end[0] };
+ }
+ if (start.length != 0 && end.length == 0){
+ byte[] result =new byte[start.length];
+ result[0]=start[0];
+ for (int k = 1; k < start.length; k++){
+ result[k] = upperLimitByte;
+ }
+ return result;
+ }
+ return Bytes.split(start, end, false, 1)[1];
+ }
+
+ /**
+ * Test if the given region is to be included in the InputSplit while splitting
+ * the regions of a table.
+ * <p>
+ * This optimization is effective when there is a specific reasoning to exclude an entire region from the M-R job,
+ * (and hence, not contributing to the InputSplit), given the start and end keys of the same. <br>
+ * Useful when we need to remember the last-processed top record and revisit the [last, current) interval for M-R processing,
+ * continuously. In addition to reducing InputSplits, reduces the load on the region server as well, due to the ordering of the keys.
+ * <br>
+ * <br>
+ * Note: It is possible that <code>endKey.length() == 0 </code> , for the last (recent) region.
+ * <br>
+ * Override this method, if you want to bulk exclude regions altogether from M-R. By default, no region is excluded( i.e. all regions are included).
+ *
+ *
+ * @param startKey Start key of the region
+ * @param endKey End key of the region
+ * @return true, if this region needs to be included as part of the input (default).
+ *
+ */
+ protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
+ return true;
+ }
+
+ /**
+ * Allows subclasses to get the {@link RegionLocator}.
+ */
+ protected RegionLocator getRegionLocator() {
+ if (regionLocator == null) {
+ throw new IllegalStateException(NOT_INITIALIZED);
+ }
+ return regionLocator;
+ }
+
+ /**
+ * Allows subclasses to get the {@link Table}.
+ */
+ protected Table getTable() {
+ if (table == null) {
+ throw new IllegalStateException(NOT_INITIALIZED);
+ }
+ return table;
+ }
+
+ /**
+ * Allows subclasses to get the {@link Admin}.
+ */
+ protected Admin getAdmin() {
+ if (admin == null) {
+ throw new IllegalStateException(NOT_INITIALIZED);
+ }
+ return admin;
+ }
+
+ /**
+ * Allows subclasses to initialize the table information.
+ *
+ * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.
+ * @param tableName The {@link TableName} of the table to process.
+ * @throws IOException
+ */
+ protected void initializeTable(Connection connection, TableName tableName) throws IOException {
+ if (this.table != null || this.connection != null) {
+ LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
+ "reference; TableInputFormatBase will not close these old references when done.");
+ }
+ this.table = connection.getTable(tableName);
+ this.regionLocator = connection.getRegionLocator(tableName);
+ this.admin = connection.getAdmin();
+ this.connection = connection;
+ }
+
+ /**
+ * Gets the scan defining the actual details like columns etc.
+ *
+ * @return The internal scan instance.
+ */
+ public Scan getScan() {
+ if (this.scan == null) this.scan = new Scan();
+ return scan;
+ }
+
+ /**
+ * Sets the scan defining the actual details like columns etc.
+ *
+ * @param scan The scan to set.
+ */
+ public void setScan(Scan scan) {
+ this.scan = scan;
+ }
+
+ /**
+ * Allows subclasses to set the {@link TableRecordReader}.
+ *
+ * @param tableRecordReader A different {@link TableRecordReader}
+ * implementation.
+ */
+ protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+ this.tableRecordReader = tableRecordReader;
+ }
+
+ /**
+ * Handle subclass specific set up.
+ * Each of the entry points used by the MapReduce framework,
+ * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
+ * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information and calling
+ * {@link #initializeTable(Connection, TableName)}.
+ *
+ * Subclasses should implement their initialize call such that it is safe to call multiple times.
+ * The current TableInputFormatBase implementation relies on a non-null table reference to decide
+ * if an initialize call is needed, but this behavior may change in the future. In particular,
+ * it is critical that initializeTable not be called multiple times since this will leak
+ * Connection instances.
+ *
+ */
+ protected void initialize(JobContext context) throws IOException {
+ }
+
+ /**
+ * Close the Table and related objects that were initialized via
+ * {@link #initializeTable(Connection, TableName)}.
+ *
+ * @throws IOException
+ */
+ protected void closeTable() throws IOException {
+ close(admin, table, regionLocator, connection);
+ admin = null;
+ table = null;
+ regionLocator = null;
+ connection = null;
+ }
+
+ private void close(Closeable... closables) throws IOException {
+ for (Closeable c : closables) {
+ if(c != null) { c.close(); }
+ }
+ }
+
+}
[34/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
new file mode 100644
index 0000000..ff458ff
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
@@ -0,0 +1,1027 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.token.TokenUtil;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKConfig;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.StringUtils;
+
+import com.codahale.metrics.MetricRegistry;
+
+/**
+ * Utility for {@link TableMapper} and {@link TableReducer}
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+@InterfaceAudience.Public
+public class TableMapReduceUtil {
+ private static final Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(String table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job)
+ throws IOException {
+ initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
+ job, true);
+ }
+
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(TableName table,
+ Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass,
+ Job job) throws IOException {
+ initTableMapperJob(table.getNameAsString(),
+ scan,
+ mapper,
+ outputKeyClass,
+ outputValueClass,
+ job,
+ true);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table Binary representation of the table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(byte[] table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job)
+ throws IOException {
+ initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
+ job, true);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(String table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
+ throws IOException {
+ initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
+ addDependencyJars, true, inputFormatClass);
+ }
+
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @param initCredentials whether to initialize hbase auth credentials for the job
+ * @param inputFormatClass the input format
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(String table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars, boolean initCredentials,
+ Class<? extends InputFormat> inputFormatClass)
+ throws IOException {
+ job.setInputFormatClass(inputFormatClass);
+ if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
+ if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
+ job.setMapperClass(mapper);
+ if (Put.class.equals(outputValueClass)) {
+ job.setCombinerClass(PutCombiner.class);
+ }
+ Configuration conf = job.getConfiguration();
+ HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+ conf.set(TableInputFormat.INPUT_TABLE, table);
+ conf.set(TableInputFormat.SCAN, convertScanToString(scan));
+ conf.setStrings("io.serializations", conf.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+ KeyValueSerialization.class.getName());
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
+ if (initCredentials) {
+ initCredentials(job);
+ }
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table Binary representation of the table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @param inputFormatClass The class of the input format
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(byte[] table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
+ throws IOException {
+ initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
+ outputValueClass, job, addDependencyJars, inputFormatClass);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table Binary representation of the table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(byte[] table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars)
+ throws IOException {
+ initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
+ outputValueClass, job, addDependencyJars, TableInputFormat.class);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(String table, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars)
+ throws IOException {
+ initTableMapperJob(table, scan, mapper, outputKeyClass,
+ outputValueClass, job, addDependencyJars, TableInputFormat.class);
+ }
+
+ /**
+ * Enable a basic on-heap cache for these jobs. Any BlockCache implementation based on
+ * direct memory will likely cause the map tasks to OOM when opening the region. This
+ * is done here instead of in TableSnapshotRegionRecordReader in case an advanced user
+ * wants to override this behavior in their job.
+ */
+ public static void resetCacheConfig(Configuration conf) {
+ conf.setFloat(
+ HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
+ conf.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0f);
+ conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
+ }
+
+ /**
+ * Sets up the job for reading from one or more table snapshots, with one or more scans
+ * per snapshot.
+ * It bypasses hbase servers and read directly from snapshot files.
+ *
+ * @param snapshotScans map of snapshot name to scans on that snapshot.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ */
+ public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
+ Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
+ Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
+ MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);
+
+ job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
+ if (outputValueClass != null) {
+ job.setMapOutputValueClass(outputValueClass);
+ }
+ if (outputKeyClass != null) {
+ job.setMapOutputKeyClass(outputKeyClass);
+ }
+ job.setMapperClass(mapper);
+ Configuration conf = job.getConfiguration();
+ HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ addDependencyJarsForClasses(job.getConfiguration(), MetricRegistry.class);
+ }
+
+ resetCacheConfig(job.getConfiguration());
+ }
+
+ /**
+ * Sets up the job for reading from a table snapshot. It bypasses hbase servers
+ * and read directly from snapshot files.
+ *
+ * @param snapshotName The name of the snapshot (of a table) to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ *
+ * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
+ * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+ * After the job is finished, restore directory can be deleted.
+ * @throws IOException When setting up the details fails.
+ * @see TableSnapshotInputFormat
+ */
+ public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars, Path tmpRestoreDir)
+ throws IOException {
+ TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
+ initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
+ outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
+ resetCacheConfig(job.getConfiguration());
+ }
+
+ /**
+ * Use this before submitting a Multi TableMap job. It will appropriately set
+ * up the job.
+ *
+ * @param scans The list of {@link Scan} objects to read from.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is carrying
+ * all necessary HBase configuration.
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(List<Scan> scans,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job) throws IOException {
+ initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
+ true);
+ }
+
+ /**
+ * Use this before submitting a Multi TableMap job. It will appropriately set
+ * up the job.
+ *
+ * @param scans The list of {@link Scan} objects to read from.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is carrying
+ * all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the
+ * configured job classes via the distributed cache (tmpjars).
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(List<Scan> scans,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars) throws IOException {
+ initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
+ addDependencyJars, true);
+ }
+
+ /**
+ * Use this before submitting a Multi TableMap job. It will appropriately set
+ * up the job.
+ *
+ * @param scans The list of {@link Scan} objects to read from.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is carrying
+ * all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the
+ * configured job classes via the distributed cache (tmpjars).
+ * @param initCredentials whether to initialize hbase auth credentials for the job
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(List<Scan> scans,
+ Class<? extends TableMapper> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, Job job,
+ boolean addDependencyJars,
+ boolean initCredentials) throws IOException {
+ job.setInputFormatClass(MultiTableInputFormat.class);
+ if (outputValueClass != null) {
+ job.setMapOutputValueClass(outputValueClass);
+ }
+ if (outputKeyClass != null) {
+ job.setMapOutputKeyClass(outputKeyClass);
+ }
+ job.setMapperClass(mapper);
+ Configuration conf = job.getConfiguration();
+ HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+ List<String> scanStrings = new ArrayList<>();
+
+ for (Scan scan : scans) {
+ scanStrings.add(convertScanToString(scan));
+ }
+ job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
+ scanStrings.toArray(new String[scanStrings.size()]));
+
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
+
+ if (initCredentials) {
+ initCredentials(job);
+ }
+ }
+
+ public static void initCredentials(Job job) throws IOException {
+ UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
+ if (userProvider.isHadoopSecurityEnabled()) {
+ // propagate delegation related props from launcher job to MR job
+ if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+ job.getConfiguration().set("mapreduce.job.credentials.binary",
+ System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+ }
+ }
+
+ if (userProvider.isHBaseSecurityEnabled()) {
+ try {
+ // init credentials for remote cluster
+ String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
+ User user = userProvider.getCurrent();
+ if (quorumAddress != null) {
+ Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
+ quorumAddress, TableOutputFormat.OUTPUT_CONF_PREFIX);
+ Connection peerConn = ConnectionFactory.createConnection(peerConf);
+ try {
+ TokenUtil.addTokenForJob(peerConn, user, job);
+ } finally {
+ peerConn.close();
+ }
+ }
+
+ Connection conn = ConnectionFactory.createConnection(job.getConfiguration());
+ try {
+ TokenUtil.addTokenForJob(conn, user, job);
+ } finally {
+ conn.close();
+ }
+ } catch (InterruptedException ie) {
+ LOG.info("Interrupted obtaining user authentication token");
+ Thread.currentThread().interrupt();
+ }
+ }
+ }
+
+ /**
+ * Obtain an authentication token, for the specified cluster, on behalf of the current user
+ * and add it to the credentials for the given map reduce job.
+ *
+ * The quorumAddress is the key to the ZK ensemble, which contains:
+ * hbase.zookeeper.quorum, hbase.zookeeper.client.port and
+ * zookeeper.znode.parent
+ *
+ * @param job The job that requires the permission.
+ * @param quorumAddress string that contains the 3 required configuratins
+ * @throws IOException When the authentication token cannot be obtained.
+ * @deprecated Since 1.2.0, use {@link #initCredentialsForCluster(Job, Configuration)} instead.
+ */
+ @Deprecated
+ public static void initCredentialsForCluster(Job job, String quorumAddress)
+ throws IOException {
+ Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
+ quorumAddress);
+ initCredentialsForCluster(job, peerConf);
+ }
+
+ /**
+ * Obtain an authentication token, for the specified cluster, on behalf of the current user
+ * and add it to the credentials for the given map reduce job.
+ *
+ * @param job The job that requires the permission.
+ * @param conf The configuration to use in connecting to the peer cluster
+ * @throws IOException When the authentication token cannot be obtained.
+ */
+ public static void initCredentialsForCluster(Job job, Configuration conf)
+ throws IOException {
+ UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
+ if (userProvider.isHBaseSecurityEnabled()) {
+ try {
+ Connection peerConn = ConnectionFactory.createConnection(conf);
+ try {
+ TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
+ } finally {
+ peerConn.close();
+ }
+ } catch (InterruptedException e) {
+ LOG.info("Interrupted obtaining user authentication token");
+ Thread.interrupted();
+ }
+ }
+ }
+
+ /**
+ * Writes the given scan into a Base64 encoded string.
+ *
+ * @param scan The scan to write out.
+ * @return The scan saved in a Base64 encoded string.
+ * @throws IOException When writing the scan fails.
+ */
+ public static String convertScanToString(Scan scan) throws IOException {
+ ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
+ return Base64.encodeBytes(proto.toByteArray());
+ }
+
+ /**
+ * Converts the given Base64 string back into a Scan instance.
+ *
+ * @param base64 The scan details.
+ * @return The newly created Scan instance.
+ * @throws IOException When reading the scan instance fails.
+ */
+ public static Scan convertStringToScan(String base64) throws IOException {
+ byte [] decoded = Base64.decode(base64);
+ return ProtobufUtil.toScan(ClientProtos.Scan.parseFrom(decoded));
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job to adjust.
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReducerJob(String table,
+ Class<? extends TableReducer> reducer, Job job)
+ throws IOException {
+ initTableReducerJob(table, reducer, job, null);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job to adjust.
+ * @param partitioner Partitioner to use. Pass <code>null</code> to use
+ * default partitioner.
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReducerJob(String table,
+ Class<? extends TableReducer> reducer, Job job,
+ Class partitioner) throws IOException {
+ initTableReducerJob(table, reducer, job, partitioner, null, null, null);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param partitioner Partitioner to use. Pass <code>null</code> to use
+ * default partitioner.
+ * @param quorumAddress Distant cluster to write to; default is null for
+ * output to the cluster that is designated in <code>hbase-site.xml</code>.
+ * Set this String to the zookeeper ensemble of an alternate remote cluster
+ * when you would have the reduce write a cluster that is other than the
+ * default; e.g. copying tables between clusters, the source would be
+ * designated by <code>hbase-site.xml</code> and this param would have the
+ * ensemble address of the remote cluster. The format to pass is particular.
+ * Pass <code> <hbase.zookeeper.quorum>:<
+ * hbase.zookeeper.client.port>:<zookeeper.znode.parent>
+ * </code> such as <code>server,server2,server3:2181:/hbase</code>.
+ * @param serverClass redefined hbase.regionserver.class
+ * @param serverImpl redefined hbase.regionserver.impl
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReducerJob(String table,
+ Class<? extends TableReducer> reducer, Job job,
+ Class partitioner, String quorumAddress, String serverClass,
+ String serverImpl) throws IOException {
+ initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
+ serverClass, serverImpl, true);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param partitioner Partitioner to use. Pass <code>null</code> to use
+ * default partitioner.
+ * @param quorumAddress Distant cluster to write to; default is null for
+ * output to the cluster that is designated in <code>hbase-site.xml</code>.
+ * Set this String to the zookeeper ensemble of an alternate remote cluster
+ * when you would have the reduce write a cluster that is other than the
+ * default; e.g. copying tables between clusters, the source would be
+ * designated by <code>hbase-site.xml</code> and this param would have the
+ * ensemble address of the remote cluster. The format to pass is particular.
+ * Pass <code> <hbase.zookeeper.quorum>:<
+ * hbase.zookeeper.client.port>:<zookeeper.znode.parent>
+ * </code> such as <code>server,server2,server3:2181:/hbase</code>.
+ * @param serverClass redefined hbase.regionserver.class
+ * @param serverImpl redefined hbase.regionserver.impl
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReducerJob(String table,
+ Class<? extends TableReducer> reducer, Job job,
+ Class partitioner, String quorumAddress, String serverClass,
+ String serverImpl, boolean addDependencyJars) throws IOException {
+
+ Configuration conf = job.getConfiguration();
+ HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
+ job.setOutputFormatClass(TableOutputFormat.class);
+ if (reducer != null) job.setReducerClass(reducer);
+ conf.set(TableOutputFormat.OUTPUT_TABLE, table);
+ conf.setStrings("io.serializations", conf.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName());
+ // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
+ if (quorumAddress != null) {
+ // Calling this will validate the format
+ ZKConfig.validateClusterKey(quorumAddress);
+ conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
+ }
+ if (serverClass != null && serverImpl != null) {
+ conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
+ conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
+ }
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(Writable.class);
+ if (partitioner == HRegionPartitioner.class) {
+ job.setPartitionerClass(HRegionPartitioner.class);
+ int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
+ if (job.getNumReduceTasks() > regions) {
+ job.setNumReduceTasks(regions);
+ }
+ } else if (partitioner != null) {
+ job.setPartitionerClass(partitioner);
+ }
+
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
+
+ initCredentials(job);
+ }
+
+ /**
+ * Ensures that the given number of reduce tasks for the given job
+ * configuration does not exceed the number of regions for the given table.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ public static void limitNumReduceTasks(String table, Job job)
+ throws IOException {
+ int regions =
+ MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table));
+ if (job.getNumReduceTasks() > regions)
+ job.setNumReduceTasks(regions);
+ }
+
+ /**
+ * Sets the number of reduce tasks for the given job configuration to the
+ * number of regions the given table has.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ public static void setNumReduceTasks(String table, Job job)
+ throws IOException {
+ job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),
+ TableName.valueOf(table)));
+ }
+
+ /**
+ * Sets the number of rows to return and cache with each scanner iteration.
+ * Higher caching values will enable faster mapreduce jobs at the expense of
+ * requiring more heap to contain the cached rows.
+ *
+ * @param job The current job to adjust.
+ * @param batchSize The number of rows to return in batch with each scanner
+ * iteration.
+ */
+ public static void setScannerCaching(Job job, int batchSize) {
+ job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
+ }
+
+ /**
+ * Add HBase and its dependencies (only) to the job configuration.
+ * <p>
+ * This is intended as a low-level API, facilitating code reuse between this
+ * class and its mapred counterpart. It also of use to external tools that
+ * need to build a MapReduce job that interacts with HBase but want
+ * fine-grained control over the jars shipped to the cluster.
+ * </p>
+ * @param conf The Configuration object to extend with dependencies.
+ * @see org.apache.hadoop.hbase.mapred.TableMapReduceUtil
+ * @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
+ */
+ public static void addHBaseDependencyJars(Configuration conf) throws IOException {
+
+ // PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
+ // dependencies, MR jobs that write encoded hfiles will fail.
+ // We used reflection here so to prevent a circular module dependency.
+ // TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
+ Class prefixTreeCodecClass = null;
+ try {
+ prefixTreeCodecClass =
+ Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
+ } catch (ClassNotFoundException e) {
+ // this will show up in unit tests but should not show in real deployments
+ LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
+ " Continuing without it.");
+ }
+
+ addDependencyJarsForClasses(conf,
+ // explicitly pull a class from each module
+ org.apache.hadoop.hbase.HConstants.class, // hbase-common
+ org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // hbase-protocol
+ org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.class, // hbase-protocol-shaded
+ org.apache.hadoop.hbase.client.Put.class, // hbase-client
+ org.apache.hadoop.hbase.CompatibilityFactory.class, // hbase-hadoop-compat
+ org.apache.hadoop.hbase.mapreduce.JobUtil.class, // hbase-hadoop2-compat
+ org.apache.hadoop.hbase.mapreduce.TableMapper.class, // hbase-server
+ org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class, // hbase-metrics
+ org.apache.hadoop.hbase.metrics.Snapshot.class, // hbase-metrics-api
+ prefixTreeCodecClass, // hbase-prefix-tree (if null will be skipped)
+ // pull necessary dependencies
+ org.apache.zookeeper.ZooKeeper.class,
+ org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
+ com.google.protobuf.Message.class,
+ org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists.class,
+ org.apache.htrace.Trace.class,
+ com.codahale.metrics.MetricRegistry.class);
+ }
+
+ /**
+ * Returns a classpath string built from the content of the "tmpjars" value in {@code conf}.
+ * Also exposed to shell scripts via `bin/hbase mapredcp`.
+ */
+ public static String buildDependencyClasspath(Configuration conf) {
+ if (conf == null) {
+ throw new IllegalArgumentException("Must provide a configuration object.");
+ }
+ Set<String> paths = new HashSet<>(conf.getStringCollection("tmpjars"));
+ if (paths.isEmpty()) {
+ throw new IllegalArgumentException("Configuration contains no tmpjars.");
+ }
+ StringBuilder sb = new StringBuilder();
+ for (String s : paths) {
+ // entries can take the form 'file:/path/to/file.jar'.
+ int idx = s.indexOf(":");
+ if (idx != -1) s = s.substring(idx + 1);
+ if (sb.length() > 0) sb.append(File.pathSeparator);
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Add the HBase dependency jars as well as jars for any of the configured
+ * job classes to the job configuration, so that JobClient will ship them
+ * to the cluster and add them to the DistributedCache.
+ */
+ public static void addDependencyJars(Job job) throws IOException {
+ addHBaseDependencyJars(job.getConfiguration());
+ try {
+ addDependencyJarsForClasses(job.getConfiguration(),
+ // when making changes here, consider also mapred.TableMapReduceUtil
+ // pull job classes
+ job.getMapOutputKeyClass(),
+ job.getMapOutputValueClass(),
+ job.getInputFormatClass(),
+ job.getOutputKeyClass(),
+ job.getOutputValueClass(),
+ job.getOutputFormatClass(),
+ job.getPartitionerClass(),
+ job.getCombinerClass());
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ }
+ }
+
+ /**
+ * Add the jars containing the given classes to the job's configuration
+ * such that JobClient will ship them to the cluster and add them to
+ * the DistributedCache.
+ * @deprecated rely on {@link #addDependencyJars(Job)} instead.
+ */
+ @Deprecated
+ public static void addDependencyJars(Configuration conf,
+ Class<?>... classes) throws IOException {
+ LOG.warn("The addDependencyJars(Configuration, Class<?>...) method has been deprecated since it"
+ + " is easy to use incorrectly. Most users should rely on addDependencyJars(Job) " +
+ "instead. See HBASE-8386 for more details.");
+ addDependencyJarsForClasses(conf, classes);
+ }
+
+ /**
+ * Add the jars containing the given classes to the job's configuration
+ * such that JobClient will ship them to the cluster and add them to
+ * the DistributedCache.
+ *
+ * N.B. that this method at most adds one jar per class given. If there is more than one
+ * jar available containing a class with the same name as a given class, we don't define
+ * which of those jars might be chosen.
+ *
+ * @param conf The Hadoop Configuration to modify
+ * @param classes will add just those dependencies needed to find the given classes
+ * @throws IOException if an underlying library call fails.
+ */
+ @InterfaceAudience.Private
+ public static void addDependencyJarsForClasses(Configuration conf,
+ Class<?>... classes) throws IOException {
+
+ FileSystem localFs = FileSystem.getLocal(conf);
+ Set<String> jars = new HashSet<>();
+ // Add jars that are already in the tmpjars variable
+ jars.addAll(conf.getStringCollection("tmpjars"));
+
+ // add jars as we find them to a map of contents jar name so that we can avoid
+ // creating new jars for classes that have already been packaged.
+ Map<String, String> packagedClasses = new HashMap<>();
+
+ // Add jars containing the specified classes
+ for (Class<?> clazz : classes) {
+ if (clazz == null) continue;
+
+ Path path = findOrCreateJar(clazz, localFs, packagedClasses);
+ if (path == null) {
+ LOG.warn("Could not find jar for class " + clazz +
+ " in order to ship it to the cluster.");
+ continue;
+ }
+ if (!localFs.exists(path)) {
+ LOG.warn("Could not validate jar file " + path + " for class "
+ + clazz);
+ continue;
+ }
+ jars.add(path.toString());
+ }
+ if (jars.isEmpty()) return;
+
+ conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
+ }
+
+ /**
+ * Finds the Jar for a class or creates it if it doesn't exist. If the class is in
+ * a directory in the classpath, it creates a Jar on the fly with the
+ * contents of the directory and returns the path to that Jar. If a Jar is
+ * created, it is created in the system temporary directory. Otherwise,
+ * returns an existing jar that contains a class of the same name. Maintains
+ * a mapping from jar contents to the tmp jar created.
+ * @param my_class the class to find.
+ * @param fs the FileSystem with which to qualify the returned path.
+ * @param packagedClasses a map of class name to path.
+ * @return a jar file that contains the class.
+ * @throws IOException
+ */
+ private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
+ Map<String, String> packagedClasses)
+ throws IOException {
+ // attempt to locate an existing jar for the class.
+ String jar = findContainingJar(my_class, packagedClasses);
+ if (null == jar || jar.isEmpty()) {
+ jar = getJar(my_class);
+ updateMap(jar, packagedClasses);
+ }
+
+ if (null == jar || jar.isEmpty()) {
+ return null;
+ }
+
+ LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
+ return new Path(jar).makeQualified(fs);
+ }
+
+ /**
+ * Add entries to <code>packagedClasses</code> corresponding to class files
+ * contained in <code>jar</code>.
+ * @param jar The jar who's content to list.
+ * @param packagedClasses map[class -> jar]
+ */
+ private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
+ if (null == jar || jar.isEmpty()) {
+ return;
+ }
+ ZipFile zip = null;
+ try {
+ zip = new ZipFile(jar);
+ for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
+ ZipEntry entry = iter.nextElement();
+ if (entry.getName().endsWith("class")) {
+ packagedClasses.put(entry.getName(), jar);
+ }
+ }
+ } finally {
+ if (null != zip) zip.close();
+ }
+ }
+
+ /**
+ * Find a jar that contains a class of the same name, if any. It will return
+ * a jar file, even if that is not the first thing on the class path that
+ * has a class with the same name. Looks first on the classpath and then in
+ * the <code>packagedClasses</code> map.
+ * @param my_class the class to find.
+ * @return a jar file that contains the class, or null.
+ * @throws IOException
+ */
+ private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
+ throws IOException {
+ ClassLoader loader = my_class.getClassLoader();
+
+ String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
+
+ if (loader != null) {
+ // first search the classpath
+ for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
+ URL url = itr.nextElement();
+ if ("jar".equals(url.getProtocol())) {
+ String toReturn = url.getPath();
+ if (toReturn.startsWith("file:")) {
+ toReturn = toReturn.substring("file:".length());
+ }
+ // URLDecoder is a misnamed class, since it actually decodes
+ // x-www-form-urlencoded MIME type rather than actual
+ // URL encoding (which the file path has). Therefore it would
+ // decode +s to ' 's which is incorrect (spaces are actually
+ // either unencoded or encoded as "%20"). Replace +s first, so
+ // that they are kept sacred during the decoding process.
+ toReturn = toReturn.replaceAll("\\+", "%2B");
+ toReturn = URLDecoder.decode(toReturn, "UTF-8");
+ return toReturn.replaceAll("!.*$", "");
+ }
+ }
+ }
+
+ // now look in any jars we've packaged using JarFinder. Returns null when
+ // no jar is found.
+ return packagedClasses.get(class_file);
+ }
+
+ /**
+ * Invoke 'getJar' on a custom JarFinder implementation. Useful for some job
+ * configuration contexts (HBASE-8140) and also for testing on MRv2.
+ * check if we have HADOOP-9426.
+ * @param my_class the class to find.
+ * @return a jar file that contains the class, or null.
+ */
+ private static String getJar(Class<?> my_class) {
+ String ret = null;
+ try {
+ ret = JarFinder.getJar(my_class);
+ } catch (Exception e) {
+ // toss all other exceptions, related to reflection failure
+ throw new RuntimeException("getJar invocation failed.", e);
+ }
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
new file mode 100644
index 0000000..9a7dcb7
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+
+/**
+ * Extends the base <code>Mapper</code> class to add the required input key
+ * and value classes.
+ *
+ * @param <KEYOUT> The type of the key.
+ * @param <VALUEOUT> The type of the value.
+ * @see org.apache.hadoop.mapreduce.Mapper
+ */
+@InterfaceAudience.Public
+public abstract class TableMapper<KEYOUT, VALUEOUT>
+extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
new file mode 100644
index 0000000..749fd85
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
@@ -0,0 +1,67 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Small committer class that does not do anything.
+ */
+@InterfaceAudience.Public
+public class TableOutputCommitter extends OutputCommitter {
+
+ @Override
+ public void abortTask(TaskAttemptContext arg0) throws IOException {
+ }
+
+ @Override
+ public void cleanupJob(JobContext arg0) throws IOException {
+ }
+
+ @Override
+ public void commitTask(TaskAttemptContext arg0) throws IOException {
+ }
+
+ @Override
+ public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+ return false;
+ }
+
+ @Override
+ public void setupJob(JobContext arg0) throws IOException {
+ }
+
+ @Override
+ public void setupTask(TaskAttemptContext arg0) throws IOException {
+ }
+
+ public boolean isRecoverySupported() {
+ return true;
+ }
+
+ public void recoverTask(TaskAttemptContext taskContext)
+ throws IOException
+ {
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
new file mode 100644
index 0000000..604ef00
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
@@ -0,0 +1,239 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
+ * while the output value <u>must</u> be either a {@link Put} or a
+ * {@link Delete} instance.
+ */
+@InterfaceAudience.Public
+public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
+implements Configurable {
+
+ private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
+
+ /** Job parameter that specifies the output table. */
+ public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
+
+ /**
+ * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
+ * For keys matching this prefix, the prefix is stripped, and the value is set in the
+ * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
+ * would be set in the configuration as "key1 = value1". Use this to set properties
+ * which should only be applied to the {@code TableOutputFormat} configuration and not the
+ * input configuration.
+ */
+ public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
+
+ /**
+ * Optional job parameter to specify a peer cluster.
+ * Used specifying remote cluster when copying between hbase clusters (the
+ * source is picked up from <code>hbase-site.xml</code>).
+ * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
+ */
+ public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
+
+ /** Optional job parameter to specify peer cluster's ZK client port */
+ public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
+
+ /** Optional specification of the rs class name of the peer cluster */
+ public static final String
+ REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
+ /** Optional specification of the rs impl name of the peer cluster */
+ public static final String
+ REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
+
+ /** The configuration. */
+ private Configuration conf = null;
+
+ /**
+ * Writes the reducer output to an HBase table.
+ */
+ protected class TableRecordWriter
+ extends RecordWriter<KEY, Mutation> {
+
+ private Connection connection;
+ private BufferedMutator mutator;
+
+ /**
+ * @throws IOException
+ *
+ */
+ public TableRecordWriter() throws IOException {
+ String tableName = conf.get(OUTPUT_TABLE);
+ this.connection = ConnectionFactory.createConnection(conf);
+ this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
+ LOG.info("Created table instance for " + tableName);
+ }
+ /**
+ * Closes the writer, in this case flush table commits.
+ *
+ * @param context The context.
+ * @throws IOException When closing the writer fails.
+ * @see RecordWriter#close(TaskAttemptContext)
+ */
+ @Override
+ public void close(TaskAttemptContext context) throws IOException {
+ try {
+ if (mutator != null) {
+ mutator.close();
+ }
+ } finally {
+ if (connection != null) {
+ connection.close();
+ }
+ }
+ }
+
+ /**
+ * Writes a key/value pair into the table.
+ *
+ * @param key The key.
+ * @param value The value.
+ * @throws IOException When writing fails.
+ * @see RecordWriter#write(Object, Object)
+ */
+ @Override
+ public void write(KEY key, Mutation value)
+ throws IOException {
+ if (!(value instanceof Put) && !(value instanceof Delete)) {
+ throw new IOException("Pass a Delete or a Put");
+ }
+ mutator.mutate(value);
+ }
+ }
+
+ /**
+ * Creates a new record writer.
+ *
+ * Be aware that the baseline javadoc gives the impression that there is a single
+ * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
+ * RecordWriter per call of this method. You must close the returned RecordWriter when done.
+ * Failure to do so will drop writes.
+ *
+ * @param context The current task context.
+ * @return The newly created writer instance.
+ * @throws IOException When creating the writer fails.
+ * @throws InterruptedException When the jobs is cancelled.
+ */
+ @Override
+ public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ return new TableRecordWriter();
+ }
+
+ /**
+ * Checks if the output table exists and is enabled.
+ *
+ * @param context The current context.
+ * @throws IOException When the check fails.
+ * @throws InterruptedException When the job is aborted.
+ * @see OutputFormat#checkOutputSpecs(JobContext)
+ */
+ @Override
+ public void checkOutputSpecs(JobContext context) throws IOException,
+ InterruptedException {
+
+ try (Admin admin = ConnectionFactory.createConnection(getConf()).getAdmin()) {
+ TableName tableName = TableName.valueOf(this.conf.get(OUTPUT_TABLE));
+ if (!admin.tableExists(tableName)) {
+ throw new TableNotFoundException("Can't write, table does not exist:" +
+ tableName.getNameAsString());
+ }
+
+ if (!admin.isTableEnabled(tableName)) {
+ throw new TableNotEnabledException("Can't write, table is not enabled: " +
+ tableName.getNameAsString());
+ }
+ }
+ }
+
+ /**
+ * Returns the output committer.
+ *
+ * @param context The current context.
+ * @return The committer.
+ * @throws IOException When creating the committer fails.
+ * @throws InterruptedException When the job is aborted.
+ * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
+ */
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ return new TableOutputCommitter();
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration otherConf) {
+ String tableName = otherConf.get(OUTPUT_TABLE);
+ if(tableName == null || tableName.length() <= 0) {
+ throw new IllegalArgumentException("Must specify table name");
+ }
+
+ String address = otherConf.get(QUORUM_ADDRESS);
+ int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
+ String serverClass = otherConf.get(REGION_SERVER_CLASS);
+ String serverImpl = otherConf.get(REGION_SERVER_IMPL);
+
+ try {
+ this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
+
+ if (serverClass != null) {
+ this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
+ }
+ if (zkClientPort != 0) {
+ this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
+ }
+ } catch(IOException e) {
+ LOG.error(e);
+ throw new RuntimeException(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
new file mode 100644
index 0000000..f66520b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
@@ -0,0 +1,147 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
+ * pairs.
+ */
+@InterfaceAudience.Public
+public class TableRecordReader
+extends RecordReader<ImmutableBytesWritable, Result> {
+
+ private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
+
+ /**
+ * Restart from survivable exceptions by creating a new scanner.
+ *
+ * @param firstRow The first row to start at.
+ * @throws IOException When restarting fails.
+ */
+ public void restart(byte[] firstRow) throws IOException {
+ this.recordReaderImpl.restart(firstRow);
+ }
+
+ /**
+ * @param table the {@link Table} to scan.
+ */
+ public void setTable(Table table) {
+ this.recordReaderImpl.setHTable(table);
+ }
+
+ /**
+ * Sets the scan defining the actual details like columns etc.
+ *
+ * @param scan The scan to set.
+ */
+ public void setScan(Scan scan) {
+ this.recordReaderImpl.setScan(scan);
+ }
+
+ /**
+ * Closes the split.
+ *
+ * @see org.apache.hadoop.mapreduce.RecordReader#close()
+ */
+ @Override
+ public void close() {
+ this.recordReaderImpl.close();
+ }
+
+ /**
+ * Returns the current key.
+ *
+ * @return The current key.
+ * @throws IOException
+ * @throws InterruptedException When the job is aborted.
+ * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey()
+ */
+ @Override
+ public ImmutableBytesWritable getCurrentKey() throws IOException,
+ InterruptedException {
+ return this.recordReaderImpl.getCurrentKey();
+ }
+
+ /**
+ * Returns the current value.
+ *
+ * @return The current value.
+ * @throws IOException When the value is faulty.
+ * @throws InterruptedException When the job is aborted.
+ * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue()
+ */
+ @Override
+ public Result getCurrentValue() throws IOException, InterruptedException {
+ return this.recordReaderImpl.getCurrentValue();
+ }
+
+ /**
+ * Initializes the reader.
+ *
+ * @param inputsplit The split to work with.
+ * @param context The current task context.
+ * @throws IOException When setting up the reader fails.
+ * @throws InterruptedException When the job is aborted.
+ * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
+ * org.apache.hadoop.mapreduce.InputSplit,
+ * org.apache.hadoop.mapreduce.TaskAttemptContext)
+ */
+ @Override
+ public void initialize(InputSplit inputsplit,
+ TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ this.recordReaderImpl.initialize(inputsplit, context);
+ }
+
+ /**
+ * Positions the record reader to the next record.
+ *
+ * @return <code>true</code> if there was another record.
+ * @throws IOException When reading the record failed.
+ * @throws InterruptedException When the job was aborted.
+ * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue()
+ */
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ return this.recordReaderImpl.nextKeyValue();
+ }
+
+ /**
+ * The current progress of the record reader through its data.
+ *
+ * @return A number between 0.0 and 1.0, the fraction of the data read.
+ * @see org.apache.hadoop.mapreduce.RecordReader#getProgress()
+ */
+ @Override
+ public float getProgress() {
+ return this.recordReaderImpl.getProgress();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
new file mode 100644
index 0000000..5f85537
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
@@ -0,0 +1,315 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.ScannerCallable;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
+ * pairs.
+ */
+@InterfaceAudience.Public
+public class TableRecordReaderImpl {
+ public static final String LOG_PER_ROW_COUNT
+ = "hbase.mapreduce.log.scanner.rowcount";
+
+ private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
+
+ // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase
+ @VisibleForTesting
+ static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
+ private ResultScanner scanner = null;
+ private Scan scan = null;
+ private Scan currentScan = null;
+ private Table htable = null;
+ private byte[] lastSuccessfulRow = null;
+ private ImmutableBytesWritable key = null;
+ private Result value = null;
+ private TaskAttemptContext context = null;
+ private Method getCounter = null;
+ private long numRestarts = 0;
+ private long numStale = 0;
+ private long timestamp;
+ private int rowcount;
+ private boolean logScannerActivity = false;
+ private int logPerRowCount = 100;
+
+ /**
+ * Restart from survivable exceptions by creating a new scanner.
+ *
+ * @param firstRow The first row to start at.
+ * @throws IOException When restarting fails.
+ */
+ public void restart(byte[] firstRow) throws IOException {
+ currentScan = new Scan(scan);
+ currentScan.withStartRow(firstRow);
+ currentScan.setScanMetricsEnabled(true);
+ if (this.scanner != null) {
+ if (logScannerActivity) {
+ LOG.info("Closing the previously opened scanner object.");
+ }
+ this.scanner.close();
+ }
+ this.scanner = this.htable.getScanner(currentScan);
+ if (logScannerActivity) {
+ LOG.info("Current scan=" + currentScan.toString());
+ timestamp = System.currentTimeMillis();
+ rowcount = 0;
+ }
+ }
+
+ /**
+ * In new mapreduce APIs, TaskAttemptContext has two getCounter methods
+ * Check if getCounter(String, String) method is available.
+ * @return The getCounter method or null if not available.
+ * @throws IOException
+ */
+ protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
+ throws IOException {
+ Method m = null;
+ try {
+ m = context.getClass().getMethod("getCounter",
+ new Class [] {String.class, String.class});
+ } catch (SecurityException e) {
+ throw new IOException("Failed test for getCounter", e);
+ } catch (NoSuchMethodException e) {
+ // Ignore
+ }
+ return m;
+ }
+
+ /**
+ * Sets the HBase table.
+ *
+ * @param htable The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+ */
+ public void setHTable(Table htable) {
+ Configuration conf = htable.getConfiguration();
+ logScannerActivity = conf.getBoolean(
+ ScannerCallable.LOG_SCANNER_ACTIVITY, false);
+ logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
+ this.htable = htable;
+ }
+
+ /**
+ * Sets the scan defining the actual details like columns etc.
+ *
+ * @param scan The scan to set.
+ */
+ public void setScan(Scan scan) {
+ this.scan = scan;
+ }
+
+ /**
+ * Build the scanner. Not done in constructor to allow for extension.
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public void initialize(InputSplit inputsplit,
+ TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ if (context != null) {
+ this.context = context;
+ getCounter = retrieveGetCounterWithStringsParams(context);
+ }
+ restart(scan.getStartRow());
+ }
+
+ /**
+ * Closes the split.
+ *
+ *
+ */
+ public void close() {
+ if (this.scanner != null) {
+ this.scanner.close();
+ }
+ try {
+ this.htable.close();
+ } catch (IOException ioe) {
+ LOG.warn("Error closing table", ioe);
+ }
+ }
+
+ /**
+ * Returns the current key.
+ *
+ * @return The current key.
+ * @throws IOException
+ * @throws InterruptedException When the job is aborted.
+ */
+ public ImmutableBytesWritable getCurrentKey() throws IOException,
+ InterruptedException {
+ return key;
+ }
+
+ /**
+ * Returns the current value.
+ *
+ * @return The current value.
+ * @throws IOException When the value is faulty.
+ * @throws InterruptedException When the job is aborted.
+ */
+ public Result getCurrentValue() throws IOException, InterruptedException {
+ return value;
+ }
+
+
+ /**
+ * Positions the record reader to the next record.
+ *
+ * @return <code>true</code> if there was another record.
+ * @throws IOException When reading the record failed.
+ * @throws InterruptedException When the job was aborted.
+ */
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (key == null) key = new ImmutableBytesWritable();
+ if (value == null) value = new Result();
+ try {
+ try {
+ value = this.scanner.next();
+ if (value != null && value.isStale()) numStale++;
+ if (logScannerActivity) {
+ rowcount ++;
+ if (rowcount >= logPerRowCount) {
+ long now = System.currentTimeMillis();
+ LOG.info("Mapper took " + (now-timestamp)
+ + "ms to process " + rowcount + " rows");
+ timestamp = now;
+ rowcount = 0;
+ }
+ }
+ } catch (IOException e) {
+ // do not retry if the exception tells us not to do so
+ if (e instanceof DoNotRetryIOException) {
+ throw e;
+ }
+ // try to handle all other IOExceptions by restarting
+ // the scanner, if the second call fails, it will be rethrown
+ LOG.info("recovered from " + StringUtils.stringifyException(e));
+ if (lastSuccessfulRow == null) {
+ LOG.warn("We are restarting the first next() invocation," +
+ " if your mapper has restarted a few other times like this" +
+ " then you should consider killing this job and investigate" +
+ " why it's taking so long.");
+ }
+ if (lastSuccessfulRow == null) {
+ restart(scan.getStartRow());
+ } else {
+ restart(lastSuccessfulRow);
+ scanner.next(); // skip presumed already mapped row
+ }
+ value = scanner.next();
+ if (value != null && value.isStale()) numStale++;
+ numRestarts++;
+ }
+ if (value != null && value.size() > 0) {
+ key.set(value.getRow());
+ lastSuccessfulRow = key.get();
+ return true;
+ }
+
+ updateCounters();
+ return false;
+ } catch (IOException ioe) {
+ if (logScannerActivity) {
+ long now = System.currentTimeMillis();
+ LOG.info("Mapper took " + (now-timestamp)
+ + "ms to process " + rowcount + " rows");
+ LOG.info(ioe);
+ String lastRow = lastSuccessfulRow == null ?
+ "null" : Bytes.toStringBinary(lastSuccessfulRow);
+ LOG.info("lastSuccessfulRow=" + lastRow);
+ }
+ throw ioe;
+ }
+ }
+
+ /**
+ * If hbase runs on new version of mapreduce, RecordReader has access to
+ * counters thus can update counters based on scanMetrics.
+ * If hbase runs on old version of mapreduce, it won't be able to get
+ * access to counters and TableRecorderReader can't update counter values.
+ * @throws IOException
+ */
+ private void updateCounters() throws IOException {
+ ScanMetrics scanMetrics = scanner.getScanMetrics();
+ if (scanMetrics == null) {
+ return;
+ }
+
+ updateCounters(scanMetrics, numRestarts, getCounter, context, numStale);
+ }
+
+ protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
+ Method getCounter, TaskAttemptContext context, long numStale) {
+ // we can get access to counters only if hbase uses new mapreduce APIs
+ if (getCounter == null) {
+ return;
+ }
+
+ try {
+ for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
+ Counter ct = (Counter)getCounter.invoke(context,
+ HBASE_COUNTER_GROUP_NAME, entry.getKey());
+
+ ct.increment(entry.getValue());
+ }
+ ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
+ "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
+ ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
+ "NUM_SCAN_RESULTS_STALE")).increment(numStale);
+ } catch (Exception e) {
+ LOG.debug("can't update counter." + StringUtils.stringifyException(e));
+ }
+ }
+
+ /**
+ * The current progress of the record reader through its data.
+ *
+ * @return A number between 0.0 and 1.0, the fraction of the data read.
+ */
+ public float getProgress() {
+ // Depends on the total number of tuples
+ return 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
new file mode 100644
index 0000000..f0bfc74
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
@@ -0,0 +1,45 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Extends the basic <code>Reducer</code> class to add the required key and
+ * value input/output classes. While the input key and value as well as the
+ * output key can be anything handed in from the previous map phase the output
+ * value <u>must</u> be either a {@link org.apache.hadoop.hbase.client.Put Put}
+ * or a {@link org.apache.hadoop.hbase.client.Delete Delete} instance when
+ * using the {@link TableOutputFormat} class.
+ * <p>
+ * This class is extended by {@link IdentityTableReducer} but can also be
+ * subclassed to implement similar features or any custom code needed. It has
+ * the advantage to enforce the output value to a specific basic type.
+ *
+ * @param <KEYIN> The type of the input key.
+ * @param <VALUEIN> The type of the input value.
+ * @param <KEYOUT> The type of the output key.
+ * @see org.apache.hadoop.mapreduce.Reducer
+ */
+@InterfaceAudience.Public
+public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT>
+extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
new file mode 100644
index 0000000..691f0c5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. The job
+ * bypasses HBase servers, and directly accesses the underlying files (hfile, recovered edits,
+ * wals, etc) directly to provide maximum performance. The snapshot is not required to be
+ * restored to the live cluster or cloned. This also allows to run the mapreduce job from an
+ * online or offline hbase cluster. The snapshot files can be exported by using the
+ * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool, to a pure-hdfs cluster,
+ * and this InputFormat can be used to run the mapreduce job directly over the snapshot files.
+ * The snapshot should not be deleted while there are jobs reading from snapshot files.
+ * <p>
+ * Usage is similar to TableInputFormat, and
+ * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job, boolean, Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Scan scan = new Scan();
+ * TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ * scan, MyTableMapper.class, MyMapKeyOutput.class,
+ * MyMapOutputValueWritable.class, job, true);
+ * }
+ * </pre>
+ * <p>
+ * Internally, this input format restores the snapshot into the given tmp directory. Similar to
+ * {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading
+ * from each RecordReader. An internal RegionScanner is used to execute the
+ * {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
+ * <p>
+ * HBase owns all the data and snapshot files on the filesystem. Only the 'hbase' user can read from
+ * snapshot files and data files.
+ * To read from snapshot files directly from the file system, the user who is running the MR job
+ * must have sufficient permissions to access snapshot and reference files.
+ * This means that to run mapreduce over snapshot files, the MR job has to be run as the HBase
+ * user or the user must have group or other privileges in the filesystem (See HBASE-8369).
+ * Note that, given other users access to read from snapshot/data files will completely circumvent
+ * the access control enforced by HBase.
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
+
+ public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
+ private TableSnapshotInputFormatImpl.InputSplit delegate;
+
+ // constructor for mapreduce framework / Writable
+ public TableSnapshotRegionSplit() {
+ this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
+ }
+
+ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
+ this.delegate = delegate;
+ }
+
+ public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
+ List<String> locations, Scan scan, Path restoreDir) {
+ this.delegate =
+ new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
+ }
+
+ @Override
+ public long getLength() throws IOException, InterruptedException {
+ return delegate.getLength();
+ }
+
+ @Override
+ public String[] getLocations() throws IOException, InterruptedException {
+ return delegate.getLocations();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ delegate.write(out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ delegate.readFields(in);
+ }
+
+ public HRegionInfo getRegionInfo() {
+ return delegate.getRegionInfo();
+ }
+
+ }
+
+ @VisibleForTesting
+ static class TableSnapshotRegionRecordReader extends
+ RecordReader<ImmutableBytesWritable, Result> {
+ private TableSnapshotInputFormatImpl.RecordReader delegate =
+ new TableSnapshotInputFormatImpl.RecordReader();
+ private TaskAttemptContext context;
+ private Method getCounter;
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ this.context = context;
+ getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
+ delegate.initialize(
+ ((TableSnapshotRegionSplit) split).delegate,
+ context.getConfiguration());
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ boolean result = delegate.nextKeyValue();
+ if (result) {
+ ScanMetrics scanMetrics = delegate.getScanner().getScanMetrics();
+ if (scanMetrics != null && context != null) {
+ TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context, 0);
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+ return delegate.getCurrentKey();
+ }
+
+ @Override
+ public Result getCurrentValue() throws IOException, InterruptedException {
+ return delegate.getCurrentValue();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return delegate.getProgress();
+ }
+
+ @Override
+ public void close() throws IOException {
+ delegate.close();
+ }
+ }
+
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+ return new TableSnapshotRegionRecordReader();
+ }
+
+ @Override
+ public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
+ List<InputSplit> results = new ArrayList<>();
+ for (TableSnapshotInputFormatImpl.InputSplit split :
+ TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) {
+ results.add(new TableSnapshotRegionSplit(split));
+ }
+ return results;
+ }
+
+ /**
+ * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+ * @param job the job to configure
+ * @param snapshotName the name of the snapshot to read from
+ * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+ * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+ * After the job is finished, restoreDir can be deleted.
+ * @throws IOException if an error occurs
+ */
+ public static void setInput(Job job, String snapshotName, Path restoreDir)
+ throws IOException {
+ TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
+ }
+}
[28/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
new file mode 100644
index 0000000..835117c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+
+import static org.junit.Assert.fail;
+
+/**
+ * Spark creates many instances of TableOutputFormat within a single process. We need to make
+ * sure we can have many instances and not leak connections.
+ *
+ * This test creates a few TableOutputFormats and shouldn't fail due to ZK connection exhaustion.
+ */
+@Category(MediumTests.class)
+public class TestTableOutputFormatConnectionExhaust {
+
+ private static final Log LOG =
+ LogFactory.getLog(TestTableOutputFormatConnectionExhaust.class);
+
+ private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ static final String TABLE = "TestTableOutputFormatConnectionExhaust";
+ static final String FAMILY = "family";
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // Default in ZookeeperMiniCluster is 1000, setting artificially low to trigger exhaustion.
+ // need min of 7 to properly start the default mini HBase cluster
+ UTIL.getConfiguration().setInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 10);
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws IOException {
+ LOG.info("before");
+ UTIL.ensureSomeRegionServersAvailable(1);
+ LOG.info("before done");
+ }
+
+ /**
+ * Open and close a TableOutputFormat. The closing the RecordWriter should release HBase
+ * Connection (ZK) resources, and will throw exception if they are exhausted.
+ */
+ static void openCloseTableOutputFormat(int iter) throws IOException {
+ LOG.info("Instantiating TableOutputFormat connection " + iter);
+ JobConf conf = new JobConf();
+ conf.addResource(UTIL.getConfiguration());
+ conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
+ TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
+ TableOutputFormat tof = new TableOutputFormat();
+ RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
+ rw.close(null);
+ }
+
+ @Test
+ public void testConnectionExhaustion() throws IOException {
+ int MAX_INSTANCES = 5; // fails on iteration 3 if zk connections leak
+ for (int i = 0; i < MAX_INSTANCES; i++) {
+ final int iter = i;
+ try {
+ openCloseTableOutputFormat(iter);
+ } catch (Exception e) {
+ LOG.error("Exception encountered", e);
+ fail("Failed on iteration " + i);
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
new file mode 100644
index 0000000..1c72f2a
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
+
+ private static final byte[] aaa = Bytes.toBytes("aaa");
+ private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
+ private static final String COLUMNS =
+ Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
+
+ @Rule
+ public TestName name = new TestName();
+
+ @Override
+ protected byte[] getStartRow() {
+ return aaa;
+ }
+
+ @Override
+ protected byte[] getEndRow() {
+ return after_zzz;
+ }
+
+ static class TestTableSnapshotMapper extends MapReduceBase
+ implements TableMap<ImmutableBytesWritable, NullWritable> {
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
+ throws IOException {
+ verifyRowFromMap(key, value);
+ collector.collect(key, NullWritable.get());
+ }
+ }
+
+ public static class TestTableSnapshotReducer extends MapReduceBase
+ implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
+ HBaseTestingUtility.SeenRowTracker rowTracker =
+ new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
+
+ @Override
+ public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
+ OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
+ throws IOException {
+ rowTracker.addRow(key.get());
+ }
+
+ @Override
+ public void close() {
+ rowTracker.validate();
+ }
+ }
+
+ @Test
+ public void testInitTableSnapshotMapperJobConfig() throws Exception {
+ setupCluster();
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ String snapshotName = "foo";
+
+ try {
+ createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+ JobConf job = new JobConf(UTIL.getConfiguration());
+ Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+ TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+ COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+
+ // TODO: would be better to examine directly the cache instance that results from this
+ // config. Currently this is not possible because BlockCache initialization is static.
+ Assert.assertEquals(
+ "Snapshot job should be configured for default LruBlockCache.",
+ HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
+ job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
+ Assert.assertEquals(
+ "Snapshot job should not use BucketCache.",
+ 0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ // TODO: mapred does not support limiting input range by startrow, endrow.
+ // Thus the following tests must override parameterverification.
+
+ @Test
+ @Override
+ public void testWithMockedMapReduceMultiRegion() throws Exception {
+ testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
+ }
+
+ @Test
+ @Override
+ public void testWithMapReduceMultiRegion() throws Exception {
+ testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
+ }
+
+ @Test
+ @Override
+ // run the MR job while HBase is offline
+ public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
+ testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
+ }
+
+ @Override
+ public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+ String snapshotName, Path tmpTableDir) throws Exception {
+ JobConf job = new JobConf(UTIL.getConfiguration());
+ TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+ COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+ }
+
+ @Override
+ protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+ int numRegions, int expectedNumSplits) throws Exception {
+ setupCluster();
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ try {
+ createTableAndSnapshot(
+ util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
+
+ JobConf job = new JobConf(util.getConfiguration());
+ Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
+
+ TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
+ COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+
+ // mapred doesn't support start and end keys? o.O
+ verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
+
+ } finally {
+ util.getAdmin().deleteSnapshot(snapshotName);
+ util.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
+ byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
+ TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
+ InputSplit[] splits = tsif.getSplits(job, 0);
+
+ Assert.assertEquals(expectedNumSplits, splits.length);
+
+ HBaseTestingUtility.SeenRowTracker rowTracker =
+ new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
+
+ for (int i = 0; i < splits.length; i++) {
+ // validate input split
+ InputSplit split = splits[i];
+ Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
+
+ // validate record reader
+ OutputCollector collector = mock(OutputCollector.class);
+ Reporter reporter = mock(Reporter.class);
+ RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
+
+ // validate we can read all the data back
+ ImmutableBytesWritable key = rr.createKey();
+ Result value = rr.createValue();
+ while (rr.next(key, value)) {
+ verifyRowFromMap(key, value);
+ rowTracker.addRow(key.copyBytes());
+ }
+
+ rr.close();
+ }
+
+ // validate all rows are seen
+ rowTracker.validate();
+ }
+
+ @Override
+ protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+ boolean shutdownCluster) throws Exception {
+ doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
+ numRegions, expectedNumSplits, shutdownCluster);
+ }
+
+ // this is also called by the IntegrationTestTableSnapshotInputFormat
+ public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
+ int expectedNumSplits, boolean shutdownCluster) throws Exception {
+
+ //create the table and snapshot
+ createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
+
+ if (shutdownCluster) {
+ util.shutdownMiniHBaseCluster();
+ }
+
+ try {
+ // create the job
+ JobConf jobConf = new JobConf(util.getConfiguration());
+
+ jobConf.setJarByClass(util.getClass());
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
+ TestTableSnapshotInputFormat.class);
+
+ TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
+ TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, jobConf, true, tableDir);
+
+ jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
+ jobConf.setNumReduceTasks(1);
+ jobConf.setOutputFormat(NullOutputFormat.class);
+
+ RunningJob job = JobClient.runJob(jobConf);
+ Assert.assertTrue(job.isSuccessful());
+ } finally {
+ if (!shutdownCluster) {
+ util.getAdmin().deleteSnapshot(snapshotName);
+ util.deleteTable(tableName);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
new file mode 100644
index 0000000..b342f64
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.security.UserProvider;
+
+/**
+ * A {@link UserProvider} that always says hadoop security is enabled, regardless of the underlying
+ * configuration. HBase security is <i>not enabled</i> as this is used to determine if SASL is used
+ * to do the authentication, which requires a Kerberos ticket (which we currently don't have in
+ * tests).
+ * <p>
+ * This should only be used for <b>TESTING</b>.
+ */
+public class HadoopSecurityEnabledUserProviderForTesting extends UserProvider {
+
+ @Override
+ public boolean isHBaseSecurityEnabled() {
+ return false;
+ }
+
+ @Override
+ public boolean isHadoopSecurityEnabled() {
+ return true;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
new file mode 100644
index 0000000..c717fa9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Base set of tests and setup for input formats touching multiple tables.
+ */
+public abstract class MultiTableInputFormatTestBase {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
+ public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ static final String TABLE_NAME = "scantest";
+ static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+ static final String KEY_STARTROW = "startRow";
+ static final String KEY_LASTROW = "stpRow";
+
+ static List<String> TABLES = Lists.newArrayList();
+
+ static {
+ for (int i = 0; i < 3; i++) {
+ TABLES.add(TABLE_NAME + String.valueOf(i));
+ }
+ }
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // switch TIF to log at DEBUG level
+ TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
+ // start mini hbase cluster
+ TEST_UTIL.startMiniCluster(3);
+ // create and fill table
+ for (String tableName : TABLES) {
+ try (Table table =
+ TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
+ INPUT_FAMILY, 4)) {
+ TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
+ }
+ }
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ Configuration c = TEST_UTIL.getConfiguration();
+ FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
+ }
+
+ /**
+ * Pass the key and value to reducer.
+ */
+ public static class ScanMapper extends
+ TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+ /**
+ * Pass the key and value to reduce.
+ *
+ * @param key The key, here "aaa", "aab" etc.
+ * @param value The value is the same as the key.
+ * @param context The task context.
+ * @throws IOException When reading the rows fails.
+ */
+ @Override
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ makeAssertions(key, value);
+ context.write(key, key);
+ }
+
+ public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
+ if (value.size() != 1) {
+ throw new IOException("There should only be one input column");
+ }
+ Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
+ value.getMap();
+ if (!cf.containsKey(INPUT_FAMILY)) {
+ throw new IOException("Wrong input columns. Missing: '" +
+ Bytes.toString(INPUT_FAMILY) + "'.");
+ }
+ String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
+ LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
+ ", value -> " + val);
+ }
+ }
+
+ /**
+ * Checks the last and first keys seen against the scanner boundaries.
+ */
+ public static class ScanReducer
+ extends
+ Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+ NullWritable, NullWritable> {
+ private String first = null;
+ private String last = null;
+
+ @Override
+ protected void reduce(ImmutableBytesWritable key,
+ Iterable<ImmutableBytesWritable> values, Context context)
+ throws IOException, InterruptedException {
+ makeAssertions(key, values);
+ }
+
+ protected void makeAssertions(ImmutableBytesWritable key,
+ Iterable<ImmutableBytesWritable> values) {
+ int count = 0;
+ for (ImmutableBytesWritable value : values) {
+ String val = Bytes.toStringBinary(value.get());
+ LOG.debug("reduce: key[" + count + "] -> " +
+ Bytes.toStringBinary(key.get()) + ", value -> " + val);
+ if (first == null) first = val;
+ last = val;
+ count++;
+ }
+ assertEquals(3, count);
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException,
+ InterruptedException {
+ Configuration c = context.getConfiguration();
+ cleanup(c);
+ }
+
+ protected void cleanup(Configuration c) {
+ String startRow = c.get(KEY_STARTROW);
+ String lastRow = c.get(KEY_LASTROW);
+ LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
+ startRow + "\"");
+ LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
+ "\"");
+ if (startRow != null && startRow.length() > 0) {
+ assertEquals(startRow, first);
+ }
+ if (lastRow != null && lastRow.length() > 0) {
+ assertEquals(lastRow, last);
+ }
+ }
+ }
+
+ @Test
+ public void testScanEmptyToEmpty() throws IOException, InterruptedException,
+ ClassNotFoundException {
+ testScan(null, null, null);
+ }
+
+ @Test
+ public void testScanEmptyToAPP() throws IOException, InterruptedException,
+ ClassNotFoundException {
+ testScan(null, "app", "apo");
+ }
+
+ @Test
+ public void testScanOBBToOPP() throws IOException, InterruptedException,
+ ClassNotFoundException {
+ testScan("obb", "opp", "opo");
+ }
+
+ @Test
+ public void testScanYZYToEmpty() throws IOException, InterruptedException,
+ ClassNotFoundException {
+ testScan("yzy", null, "zzz");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ private void testScan(String start, String stop, String last)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ String jobName =
+ "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
+ (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+ LOG.info("Before map/reduce startup - job " + jobName);
+ Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+
+ c.set(KEY_STARTROW, start != null ? start : "");
+ c.set(KEY_LASTROW, last != null ? last : "");
+
+ List<Scan> scans = new ArrayList<>();
+
+ for (String tableName : TABLES) {
+ Scan scan = new Scan();
+
+ scan.addFamily(INPUT_FAMILY);
+ scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
+
+ if (start != null) {
+ scan.setStartRow(Bytes.toBytes(start));
+ }
+ if (stop != null) {
+ scan.setStopRow(Bytes.toBytes(stop));
+ }
+
+ scans.add(scan);
+
+ LOG.info("scan before: " + scan);
+ }
+
+ runJob(jobName, c, scans);
+ }
+
+ protected void runJob(String jobName, Configuration c, List<Scan> scans)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ Job job = new Job(c, jobName);
+
+ initJob(scans, job);
+ job.setReducerClass(ScanReducer.class);
+ job.setNumReduceTasks(1); // one to get final "first" and "last" key
+ FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+ LOG.info("Started " + job.getJobName());
+ job.waitForCompletion(true);
+ assertTrue(job.isSuccessful());
+ LOG.info("After map/reduce completion - job " + jobName);
+ }
+
+ protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
+
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
new file mode 100644
index 0000000..3203f0c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
@@ -0,0 +1,134 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * Input format that creates a configurable number of map tasks
+ * each provided with a single row of NullWritables. This can be
+ * useful when trying to write mappers which don't have any real
+ * input (eg when the mapper is simply producing random data as output)
+ */
+public class NMapInputFormat extends InputFormat<NullWritable, NullWritable> {
+ private static final String NMAPS_KEY = "nmapinputformat.num.maps";
+
+ @Override
+ public RecordReader<NullWritable, NullWritable> createRecordReader(
+ InputSplit split,
+ TaskAttemptContext tac) throws IOException, InterruptedException {
+ return new SingleRecordReader<>(NullWritable.get(), NullWritable.get());
+ }
+
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException,
+ InterruptedException {
+ int count = getNumMapTasks(context.getConfiguration());
+ List<InputSplit> splits = new ArrayList<>(count);
+ for (int i = 0; i < count; i++) {
+ splits.add(new NullInputSplit());
+ }
+ return splits;
+ }
+
+ public static void setNumMapTasks(Configuration conf, int numTasks) {
+ conf.setInt(NMAPS_KEY, numTasks);
+ }
+
+ public static int getNumMapTasks(Configuration conf) {
+ return conf.getInt(NMAPS_KEY, 1);
+ }
+
+ private static class NullInputSplit extends InputSplit implements Writable {
+ @Override
+ public long getLength() throws IOException, InterruptedException {
+ return 0;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException, InterruptedException {
+ return new String[] {};
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ }
+ }
+
+ private static class SingleRecordReader<K, V>
+ extends RecordReader<K, V> {
+
+ private final K key;
+ private final V value;
+ boolean providedKey = false;
+
+ SingleRecordReader(K key, V value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public K getCurrentKey() {
+ return key;
+ }
+
+ @Override
+ public V getCurrentValue(){
+ return value;
+ }
+
+ @Override
+ public float getProgress() {
+ return 0;
+ }
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext tac) {
+ }
+
+ @Override
+ public boolean nextKeyValue() {
+ if (providedKey) return false;
+ providedKey = true;
+ return true;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
new file mode 100644
index 0000000..fa47253
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public abstract class TableSnapshotInputFormatTestBase {
+ private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatTestBase.class);
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ protected final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ protected static final int NUM_REGION_SERVERS = 2;
+ protected static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
+
+ protected FileSystem fs;
+ protected Path rootDir;
+
+ public void setupCluster() throws Exception {
+ setupConf(UTIL.getConfiguration());
+ UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
+ rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+ fs = rootDir.getFileSystem(UTIL.getConfiguration());
+ }
+
+ public void tearDownCluster() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ private static void setupConf(Configuration conf) {
+ // Enable snapshot
+ conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+ }
+
+ protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+ int numRegions, int expectedNumSplits) throws Exception;
+
+ protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+ boolean shutdownCluster) throws Exception;
+
+ protected abstract byte[] getStartRow();
+
+ protected abstract byte[] getEndRow();
+
+ @Test
+ public void testWithMockedMapReduceSingleRegion() throws Exception {
+ testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
+ }
+
+ @Test
+ public void testWithMockedMapReduceMultiRegion() throws Exception {
+ testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
+ }
+
+ @Test
+ public void testWithMapReduceSingleRegion() throws Exception {
+ testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
+ }
+
+ @Test
+ public void testWithMapReduceMultiRegion() throws Exception {
+ testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
+ }
+
+ @Test
+ // run the MR job while HBase is offline
+ public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
+ testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
+ }
+
+ // Test that snapshot restore does not create back references in the HBase root dir.
+ @Test
+ public void testRestoreSnapshotDoesNotCreateBackRefLinks() throws Exception {
+ setupCluster();
+ TableName tableName = TableName.valueOf("testRestoreSnapshotDoesNotCreateBackRefLinks");
+ String snapshotName = "foo";
+
+ try {
+ createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+
+ Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+ testRestoreSnapshotDoesNotCreateBackRefLinksInit(tableName, snapshotName,tmpTableDir);
+
+ Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+ for (Path regionDir : FSUtils.getRegionDirs(fs, FSUtils.getTableDir(rootDir, tableName))) {
+ for (Path storeDir : FSUtils.getFamilyDirs(fs, regionDir)) {
+ for (FileStatus status : fs.listStatus(storeDir)) {
+ System.out.println(status.getPath());
+ if (StoreFileInfo.isValid(status)) {
+ Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(UTIL.getConfiguration(),
+ tableName, regionDir.getName(), storeDir.getName());
+
+ Path path = HFileLink.getBackReferencesDir(storeDir, status.getPath().getName());
+ // assert back references directory is empty
+ assertFalse("There is a back reference in " + path, fs.exists(path));
+
+ path = HFileLink.getBackReferencesDir(archiveStoreDir, status.getPath().getName());
+ // assert back references directory is empty
+ assertFalse("There is a back reference in " + path, fs.exists(path));
+ }
+ }
+ }
+ }
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ public abstract void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+ String snapshotName, Path tmpTableDir) throws Exception;
+
+ protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
+ int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
+ setupCluster();
+ try {
+ Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
+ TableName tableName = TableName.valueOf("testWithMapReduce");
+ testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
+ expectedNumSplits, shutdownCluster);
+ } finally {
+ tearDownCluster();
+ }
+ }
+
+ protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result)
+ throws IOException {
+ byte[] row = key.get();
+ CellScanner scanner = result.cellScanner();
+ while (scanner.advance()) {
+ Cell cell = scanner.current();
+
+ //assert that all Cells in the Result have the same key
+ Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
+ cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
+ }
+
+ for (int j = 0; j < FAMILIES.length; j++) {
+ byte[] actual = result.getValue(FAMILIES[j], FAMILIES[j]);
+ Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
+ + " ,actual:" + Bytes.toString(actual), row, actual);
+ }
+ }
+
+ protected static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, byte[] startRow, byte[] endRow, int numRegions)
+ throws Exception {
+ try {
+ LOG.debug("Ensuring table doesn't exist.");
+ util.deleteTable(tableName);
+ } catch(Exception ex) {
+ // ignore
+ }
+
+ LOG.info("creating table '" + tableName + "'");
+ if (numRegions > 1) {
+ util.createTable(tableName, FAMILIES, 1, startRow, endRow, numRegions);
+ } else {
+ util.createTable(tableName, FAMILIES);
+ }
+ Admin admin = util.getAdmin();
+
+ LOG.info("put some stuff in the table");
+ Table table = util.getConnection().getTable(tableName);
+ util.loadTable(table, FAMILIES);
+
+ Path rootDir = FSUtils.getRootDir(util.getConfiguration());
+ FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
+
+ LOG.info("snapshot");
+ SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
+ Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
+
+ LOG.info("load different values");
+ byte[] value = Bytes.toBytes("after_snapshot_value");
+ util.loadTable(table, FAMILIES, value);
+
+ LOG.info("cause flush to create new files in the region");
+ admin.flush(tableName);
+ table.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
new file mode 100644
index 0000000..ff623cb
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
@@ -0,0 +1,376 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.*;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestCellCounter {
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+ private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
+ private static final String FAMILY_A_STRING = "a";
+ private static final String FAMILY_B_STRING = "b";
+ private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
+ private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
+ private static final byte[] QUALIFIER = Bytes.toBytes("q");
+
+ private static Path FQ_OUTPUT_DIR;
+ private static final String OUTPUT_DIR = "target" + File.separator + "test-data" + File.separator
+ + "output";
+ private static long now = System.currentTimeMillis();
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ FQ_OUTPUT_DIR = new Path(OUTPUT_DIR).makeQualified(new LocalFileSystem());
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test CellCounter all data should print to output
+ *
+ */
+ @Test (timeout=300000)
+ public void testCellCounter() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try{
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1" };
+ runCount(args);
+ FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+ "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+ assertTrue(data.contains("b;q" + "\t" + "1"));
+ assertTrue(data.contains("a;q" + "\t" + "1"));
+ assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+ }finally{
+ t.close();
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+ }
+
+ /**
+ * Test CellCounter all data should print to output
+ */
+ @Test(timeout = 300000)
+ public void testCellCounterPrefix() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try {
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1" };
+ runCount(args);
+ FileInputStream inputStream =
+ new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+ assertTrue(data.contains("b;q" + "\t" + "1"));
+ assertTrue(data.contains("a;q" + "\t" + "1"));
+ assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+ } finally {
+ t.close();
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+ }
+
+ /**
+ * Test CellCounter with time range all data should print to output
+ */
+ @Test (timeout=300000)
+ public void testCellCounterStartTimeRange() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try{
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = {
+ sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1",
+ "--starttime=" + now,
+ "--endtime=" + now + 2 };
+ runCount(args);
+ FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+ "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+ assertTrue(data.contains("b;q" + "\t" + "1"));
+ assertTrue(data.contains("a;q" + "\t" + "1"));
+ assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+ }finally{
+ t.close();
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+ }
+
+ /**
+ * Test CellCounter with time range all data should print to output
+ */
+ @Test (timeout=300000)
+ public void testCellCounteEndTimeRange() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try{
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = {
+ sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1",
+ "--endtime=" + now + 1 };
+ runCount(args);
+ FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+ "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+ assertTrue(data.contains("b;q" + "\t" + "1"));
+ assertTrue(data.contains("a;q" + "\t" + "1"));
+ assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+ }finally{
+ t.close();
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+ }
+
+ /**
+ * Test CellCounter with time range all data should print to output
+ */
+ @Test (timeout=300000)
+ public void testCellCounteOutOfTimeRange() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try{
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = {
+ sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "--starttime=" + now + 1,
+ "--endtime=" + now + 2 };
+
+ runCount(args);
+ FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
+ "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ // nothing should hace been emitted to the reducer
+ assertTrue(data.isEmpty());
+ }finally{
+ t.close();
+ FileUtil.fullyDelete(new File(OUTPUT_DIR));
+ }
+ }
+
+
+ private boolean runCount(String[] args) throws Exception {
+ // need to make a copy of the configuration because to make sure
+ // different temp dirs are used.
+ int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new CellCounter(),
+ args);
+ return status == 0;
+ }
+
+ /**
+ * Test main method of CellCounter
+ */
+ @Test (timeout=300000)
+ public void testCellCounterMain() throws Exception {
+
+ PrintStream oldPrintStream = System.err;
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ String[] args = {};
+ System.setErr(new PrintStream(data));
+ try {
+ System.setErr(new PrintStream(data));
+
+ try {
+ CellCounter.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ assertTrue(data.toString().contains("ERROR: Wrong number of parameters:"));
+ // should be information about usage
+ assertTrue(data.toString().contains("Usage:"));
+ }
+
+ } finally {
+ System.setErr(oldPrintStream);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+ }
+
+ /**
+ * Test CellCounter for complete table all data should print to output
+ */
+ @Test(timeout = 600000)
+ public void testCellCounterForCompleteTable() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName());
+ String outputPath = OUTPUT_DIR + sourceTable;
+ LocalFileSystem localFileSystem = new LocalFileSystem();
+ Path outputDir =
+ new Path(outputPath).makeQualified(localFileSystem.getUri(),
+ localFileSystem.getWorkingDirectory());
+ byte[][] families = { FAMILY_A, FAMILY_B };
+ Table t = UTIL.createTable(sourceTable, families);
+ try {
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+ t.put(p);
+ String[] args = { sourceTable.getNameAsString(), outputDir.toString(), ";" };
+ runCount(args);
+ FileInputStream inputStream =
+ new FileInputStream(outputPath + File.separator + "part-r-00000");
+ String data = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+ assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "4"));
+ assertTrue(data.contains("Total ROWS" + "\t" + "2"));
+ assertTrue(data.contains("b;q" + "\t" + "2"));
+ assertTrue(data.contains("a;q" + "\t" + "2"));
+ assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row2;a;q_Versions" + "\t" + "1"));
+ assertTrue(data.contains("row2;b;q_Versions" + "\t" + "1"));
+
+ FileUtil.fullyDelete(new File(outputPath));
+ args = new String[] { "-D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=a, b",
+ sourceTable.getNameAsString(), outputDir.toString(), ";"};
+ runCount(args);
+ inputStream = new FileInputStream(outputPath + File.separator + "part-r-00000");
+ String data2 = IOUtils.toString(inputStream);
+ inputStream.close();
+ assertEquals(data, data2);
+ } finally {
+ t.close();
+ localFileSystem.close();
+ FileUtil.fullyDelete(new File(outputPath));
+ }
+ }
+
+ @Test
+ public void TestCellCounterWithoutOutputDir() throws Exception {
+ String[] args = new String[] { "tableName" };
+ assertEquals("CellCounter should exit with -1 as output directory is not specified.", -1,
+ ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
new file mode 100644
index 0000000..0bec03b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+/**
+ * Basic test for the CopyTable M/R tool
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestCopyTable {
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private static final byte[] ROW1 = Bytes.toBytes("row1");
+ private static final byte[] ROW2 = Bytes.toBytes("row2");
+ private static final String FAMILY_A_STRING = "a";
+ private static final String FAMILY_B_STRING = "b";
+ private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
+ private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
+ private static final byte[] QUALIFIER = Bytes.toBytes("q");
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster(3);
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ private void doCopyTableTest(boolean bulkload) throws Exception {
+ final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+ final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+ final byte[] FAMILY = Bytes.toBytes("family");
+ final byte[] COLUMN1 = Bytes.toBytes("c1");
+
+ try (Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+ Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);) {
+ // put rows into the first table
+ for (int i = 0; i < 10; i++) {
+ Put p = new Put(Bytes.toBytes("row" + i));
+ p.addColumn(FAMILY, COLUMN1, COLUMN1);
+ t1.put(p);
+ }
+
+ CopyTable copy = new CopyTable();
+
+ int code;
+ if (bulkload) {
+ code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+ copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
+ "--bulkload", tableName1.getNameAsString() });
+ } else {
+ code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+ copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
+ tableName1.getNameAsString() });
+ }
+ assertEquals("copy job failed", 0, code);
+
+ // verify the data was copied into table 2
+ for (int i = 0; i < 10; i++) {
+ Get g = new Get(Bytes.toBytes("row" + i));
+ Result r = t2.get(g);
+ assertEquals(1, r.size());
+ assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
+ }
+ } finally {
+ TEST_UTIL.deleteTable(tableName1);
+ TEST_UTIL.deleteTable(tableName2);
+ }
+ }
+
+ /**
+ * Simple end-to-end test
+ * @throws Exception
+ */
+ @Test
+ public void testCopyTable() throws Exception {
+ doCopyTableTest(false);
+ }
+
+ /**
+ * Simple end-to-end test with bulkload.
+ */
+ @Test
+ public void testCopyTableWithBulkload() throws Exception {
+ doCopyTableTest(true);
+ }
+
+ @Test
+ public void testStartStopRow() throws Exception {
+ final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+ final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+ final byte[] FAMILY = Bytes.toBytes("family");
+ final byte[] COLUMN1 = Bytes.toBytes("c1");
+ final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
+ final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+ final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
+
+ Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+ Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
+
+ // put rows into the first table
+ Put p = new Put(ROW0);
+ p.addColumn(FAMILY, COLUMN1, COLUMN1);
+ t1.put(p);
+ p = new Put(ROW1);
+ p.addColumn(FAMILY, COLUMN1, COLUMN1);
+ t1.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY, COLUMN1, COLUMN1);
+ t1.put(p);
+
+ CopyTable copy = new CopyTable();
+ assertEquals(
+ 0,
+ ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
+ copy, new String[] { "--new.name=" + tableName2, "--startrow=\\x01row1",
+ "--stoprow=\\x01row2", tableName1.getNameAsString() }));
+
+ // verify the data was copied into table 2
+ // row1 exist, row0, row2 do not exist
+ Get g = new Get(ROW1);
+ Result r = t2.get(g);
+ assertEquals(1, r.size());
+ assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
+
+ g = new Get(ROW0);
+ r = t2.get(g);
+ assertEquals(0, r.size());
+
+ g = new Get(ROW2);
+ r = t2.get(g);
+ assertEquals(0, r.size());
+
+ t1.close();
+ t2.close();
+ TEST_UTIL.deleteTable(tableName1);
+ TEST_UTIL.deleteTable(tableName2);
+ }
+
+ /**
+ * Test copy of table from sourceTable to targetTable all rows from family a
+ */
+ @Test
+ public void testRenameFamily() throws Exception {
+ final TableName sourceTable = TableName.valueOf(name.getMethodName() + "source");
+ final TableName targetTable = TableName.valueOf(name.getMethodName() + "-target");
+
+ byte[][] families = { FAMILY_A, FAMILY_B };
+
+ Table t = TEST_UTIL.createTable(sourceTable, families);
+ Table t2 = TEST_UTIL.createTable(targetTable, families);
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data11"));
+ p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data12"));
+ p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data13"));
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Dat21"));
+ p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data22"));
+ p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data23"));
+ t.put(p);
+
+ long currentTime = System.currentTimeMillis();
+ String[] args = new String[] { "--new.name=" + targetTable, "--families=a:b", "--all.cells",
+ "--starttime=" + (currentTime - 100000), "--endtime=" + (currentTime + 100000),
+ "--versions=1", sourceTable.getNameAsString() };
+ assertNull(t2.get(new Get(ROW1)).getRow());
+
+ assertTrue(runCopy(args));
+
+ assertNotNull(t2.get(new Get(ROW1)).getRow());
+ Result res = t2.get(new Get(ROW1));
+ byte[] b1 = res.getValue(FAMILY_B, QUALIFIER);
+ assertEquals("Data13", new String(b1));
+ assertNotNull(t2.get(new Get(ROW2)).getRow());
+ res = t2.get(new Get(ROW2));
+ b1 = res.getValue(FAMILY_A, QUALIFIER);
+ // Data from the family of B is not copied
+ assertNull(b1);
+
+ }
+
+ /**
+ * Test main method of CopyTable.
+ */
+ @Test
+ public void testMainMethod() throws Exception {
+ String[] emptyArgs = { "-h" };
+ PrintStream oldWriter = System.err;
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ PrintStream writer = new PrintStream(data);
+ System.setErr(writer);
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ try {
+ CopyTable.main(emptyArgs);
+ fail("should be exit");
+ } catch (SecurityException e) {
+ assertEquals(1, newSecurityManager.getExitCode());
+ } finally {
+ System.setErr(oldWriter);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+ assertTrue(data.toString().contains("rs.class"));
+ // should print usage information
+ assertTrue(data.toString().contains("Usage:"));
+ }
+
+ private boolean runCopy(String[] args) throws Exception {
+ int status = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()), new CopyTable(),
+ args);
+ return status == 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
new file mode 100644
index 0000000..7e36602
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import static org.mockito.Mockito.*;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestGroupingTableMapper {
+
+ /**
+ * Test GroupingTableMapper class
+ */
+ @Test
+ public void testGroupingTableMapper() throws Exception {
+
+ GroupingTableMapper mapper = new GroupingTableMapper();
+ Configuration configuration = new Configuration();
+ configuration.set(GroupingTableMapper.GROUP_COLUMNS, "family1:clm family2:clm");
+ mapper.setConf(configuration);
+
+ Result result = mock(Result.class);
+ @SuppressWarnings("unchecked")
+ Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context =
+ mock(Mapper.Context.class);
+ context.write(any(ImmutableBytesWritable.class), any(Result.class));
+ List<Cell> keyValue = new ArrayList<>();
+ byte[] row = {};
+ keyValue.add(new KeyValue(row, Bytes.toBytes("family2"), Bytes.toBytes("clm"), Bytes
+ .toBytes("value1")));
+ keyValue.add(new KeyValue(row, Bytes.toBytes("family1"), Bytes.toBytes("clm"), Bytes
+ .toBytes("value2")));
+ when(result.listCells()).thenReturn(keyValue);
+ mapper.map(null, result, context);
+ // template data
+ byte[][] data = { Bytes.toBytes("value1"), Bytes.toBytes("value2") };
+ ImmutableBytesWritable ibw = mapper.createGroupKey(data);
+ verify(context).write(ibw, result);
+ }
+
+}
[36/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..4331c0f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
+import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
+import org.apache.hadoop.hbase.util.ConfigurationUtil;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Shared implementation of mapreduce code over multiple table snapshots.
+ * Utilized by both mapreduce ({@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormat} and mapred
+ * ({@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
+ */
+@InterfaceAudience.LimitedPrivate({ "HBase" })
+@InterfaceStability.Evolving
+public class MultiTableSnapshotInputFormatImpl {
+
+ private static final Log LOG = LogFactory.getLog(MultiTableSnapshotInputFormatImpl.class);
+
+ public static final String RESTORE_DIRS_KEY =
+ "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
+ public static final String SNAPSHOT_TO_SCANS_KEY =
+ "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";
+
+ /**
+ * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
+ * restoreDir.
+ * Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY}
+ *
+ * @param conf
+ * @param snapshotScans
+ * @param restoreDir
+ * @throws IOException
+ */
+ public void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
+ Path restoreDir) throws IOException {
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+
+ setSnapshotToScans(conf, snapshotScans);
+ Map<String, Path> restoreDirs =
+ generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir);
+ setSnapshotDirs(conf, restoreDirs);
+ restoreSnapshots(conf, restoreDirs, fs);
+ }
+
+ /**
+ * Return the list of splits extracted from the scans/snapshots pushed to conf by
+ * {@link
+ * #setInput(org.apache.hadoop.conf.Configuration, java.util.Map, org.apache.hadoop.fs.Path)}
+ *
+ * @param conf Configuration to determine splits from
+ * @return Return the list of splits extracted from the scans/snapshots pushed to conf
+ * @throws IOException
+ */
+ public List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Configuration conf)
+ throws IOException {
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+
+ List<TableSnapshotInputFormatImpl.InputSplit> rtn = Lists.newArrayList();
+
+ Map<String, Collection<Scan>> snapshotsToScans = getSnapshotsToScans(conf);
+ Map<String, Path> snapshotsToRestoreDirs = getSnapshotDirs(conf);
+ for (Map.Entry<String, Collection<Scan>> entry : snapshotsToScans.entrySet()) {
+ String snapshotName = entry.getKey();
+
+ Path restoreDir = snapshotsToRestoreDirs.get(snapshotName);
+
+ SnapshotManifest manifest =
+ TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs);
+ List<HRegionInfo> regionInfos =
+ TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest);
+
+ for (Scan scan : entry.getValue()) {
+ List<TableSnapshotInputFormatImpl.InputSplit> splits =
+ TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf);
+ rtn.addAll(splits);
+ }
+ }
+ return rtn;
+ }
+
+ /**
+ * Retrieve the snapshot name -> list<scan> mapping pushed to configuration by
+ * {@link #setSnapshotToScans(org.apache.hadoop.conf.Configuration, java.util.Map)}
+ *
+ * @param conf Configuration to extract name -> list<scan> mappings from.
+ * @return the snapshot name -> list<scan> mapping pushed to configuration
+ * @throws IOException
+ */
+ public Map<String, Collection<Scan>> getSnapshotsToScans(Configuration conf) throws IOException {
+
+ Map<String, Collection<Scan>> rtn = Maps.newHashMap();
+
+ for (Map.Entry<String, String> entry : ConfigurationUtil
+ .getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) {
+ String snapshotName = entry.getKey();
+ String scan = entry.getValue();
+
+ Collection<Scan> snapshotScans = rtn.get(snapshotName);
+ if (snapshotScans == null) {
+ snapshotScans = Lists.newArrayList();
+ rtn.put(snapshotName, snapshotScans);
+ }
+
+ snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan));
+ }
+
+ return rtn;
+ }
+
+ /**
+ * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY})
+ *
+ * @param conf
+ * @param snapshotScans
+ * @throws IOException
+ */
+ public void setSnapshotToScans(Configuration conf, Map<String, Collection<Scan>> snapshotScans)
+ throws IOException {
+ // flatten out snapshotScans for serialization to the job conf
+ List<Map.Entry<String, String>> snapshotToSerializedScans = Lists.newArrayList();
+
+ for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
+ String snapshotName = entry.getKey();
+ Collection<Scan> scans = entry.getValue();
+
+ // serialize all scans and map them to the appropriate snapshot
+ for (Scan scan : scans) {
+ snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry<>(snapshotName,
+ TableMapReduceUtil.convertScanToString(scan)));
+ }
+ }
+
+ ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans);
+ }
+
+ /**
+ * Retrieve the directories into which snapshots have been restored from
+ * ({@link #RESTORE_DIRS_KEY})
+ *
+ * @param conf Configuration to extract restore directories from
+ * @return the directories into which snapshots have been restored from
+ * @throws IOException
+ */
+ public Map<String, Path> getSnapshotDirs(Configuration conf) throws IOException {
+ List<Map.Entry<String, String>> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY);
+ Map<String, Path> rtn = Maps.newHashMapWithExpectedSize(kvps.size());
+
+ for (Map.Entry<String, String> kvp : kvps) {
+ rtn.put(kvp.getKey(), new Path(kvp.getValue()));
+ }
+
+ return rtn;
+ }
+
+ public void setSnapshotDirs(Configuration conf, Map<String, Path> snapshotDirs) {
+ Map<String, String> toSet = Maps.newHashMap();
+
+ for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
+ toSet.put(entry.getKey(), entry.getValue().toString());
+ }
+
+ ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet());
+ }
+
+ /**
+ * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and
+ * return a map from the snapshot to the restore directory.
+ *
+ * @param snapshots collection of snapshot names to restore
+ * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored
+ * @return a mapping from snapshot name to the directory in which that snapshot has been restored
+ */
+ private Map<String, Path> generateSnapshotToRestoreDirMapping(Collection<String> snapshots,
+ Path baseRestoreDir) {
+ Map<String, Path> rtn = Maps.newHashMap();
+
+ for (String snapshotName : snapshots) {
+ Path restoreSnapshotDir =
+ new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString());
+ rtn.put(snapshotName, restoreSnapshotDir);
+ }
+
+ return rtn;
+ }
+
+ /**
+ * Restore each (snapshot name, restore directory) pair in snapshotToDir
+ *
+ * @param conf configuration to restore with
+ * @param snapshotToDir mapping from snapshot names to restore directories
+ * @param fs filesystem to do snapshot restoration on
+ * @throws IOException
+ */
+ public void restoreSnapshots(Configuration conf, Map<String, Path> snapshotToDir, FileSystem fs)
+ throws IOException {
+ // TODO: restore from record readers to parallelize.
+ Path rootDir = FSUtils.getRootDir(conf);
+
+ for (Map.Entry<String, Path> entry : snapshotToDir.entrySet()) {
+ String snapshotName = entry.getKey();
+ Path restoreDir = entry.getValue();
+ LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir
+ + " for MultiTableSnapshotInputFormat");
+ restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs);
+ }
+ }
+
+ void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir,
+ FileSystem fs) throws IOException {
+ RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
new file mode 100644
index 0000000..a505379
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
@@ -0,0 +1,301 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.util.ReflectionUtils;
+
+
+/**
+ * Multithreaded implementation for @link org.apache.hbase.mapreduce.TableMapper
+ * <p>
+ * It can be used instead when the Map operation is not CPU
+ * bound in order to improve throughput.
+ * <p>
+ * Mapper implementations using this MapRunnable must be thread-safe.
+ * <p>
+ * The Map-Reduce job has to be configured with the mapper to use via
+ * {@link #setMapperClass} and the number of thread the thread-pool can use with the
+ * {@link #getNumberOfThreads} method. The default value is 10 threads.
+ * <p>
+ */
+
+public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
+ private static final Log LOG = LogFactory.getLog(MultithreadedTableMapper.class);
+ private Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> mapClass;
+ private Context outer;
+ private ExecutorService executor;
+ public static final String NUMBER_OF_THREADS = "hbase.mapreduce.multithreadedmapper.threads";
+ public static final String MAPPER_CLASS = "hbase.mapreduce.multithreadedmapper.mapclass";
+
+ /**
+ * The number of threads in the thread pool that will run the map function.
+ * @param job the job
+ * @return the number of threads
+ */
+ public static int getNumberOfThreads(JobContext job) {
+ return job.getConfiguration().
+ getInt(NUMBER_OF_THREADS, 10);
+ }
+
+ /**
+ * Set the number of threads in the pool for running maps.
+ * @param job the job to modify
+ * @param threads the new number of threads
+ */
+ public static void setNumberOfThreads(Job job, int threads) {
+ job.getConfiguration().setInt(NUMBER_OF_THREADS,
+ threads);
+ }
+
+ /**
+ * Get the application's mapper class.
+ * @param <K2> the map's output key type
+ * @param <V2> the map's output value type
+ * @param job the job
+ * @return the mapper class to run
+ */
+ @SuppressWarnings("unchecked")
+ public static <K2,V2>
+ Class<Mapper<ImmutableBytesWritable, Result,K2,V2>> getMapperClass(JobContext job) {
+ return (Class<Mapper<ImmutableBytesWritable, Result,K2,V2>>)
+ job.getConfiguration().getClass( MAPPER_CLASS,
+ Mapper.class);
+ }
+
+ /**
+ * Set the application's mapper class.
+ * @param <K2> the map output key type
+ * @param <V2> the map output value type
+ * @param job the job to modify
+ * @param cls the class to use as the mapper
+ */
+ public static <K2,V2>
+ void setMapperClass(Job job,
+ Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
+ if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
+ throw new IllegalArgumentException("Can't have recursive " +
+ "MultithreadedTableMapper instances.");
+ }
+ job.getConfiguration().setClass(MAPPER_CLASS,
+ cls, Mapper.class);
+ }
+
+ /**
+ * Run the application's maps using a thread pool.
+ */
+ @Override
+ public void run(Context context) throws IOException, InterruptedException {
+ outer = context;
+ int numberOfThreads = getNumberOfThreads(context);
+ mapClass = getMapperClass(context);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Configuring multithread runner to use " + numberOfThreads +
+ " threads");
+ }
+ executor = Executors.newFixedThreadPool(numberOfThreads);
+ for(int i=0; i < numberOfThreads; ++i) {
+ MapRunner thread = new MapRunner(context);
+ executor.execute(thread);
+ }
+ executor.shutdown();
+ while (!executor.isTerminated()) {
+ // wait till all the threads are done
+ Thread.sleep(1000);
+ }
+ }
+
+ private class SubMapRecordReader
+ extends RecordReader<ImmutableBytesWritable, Result> {
+ private ImmutableBytesWritable key;
+ private Result value;
+ private Configuration conf;
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return 0;
+ }
+
+ @Override
+ public void initialize(InputSplit split,
+ TaskAttemptContext context
+ ) throws IOException, InterruptedException {
+ conf = context.getConfiguration();
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ synchronized (outer) {
+ if (!outer.nextKeyValue()) {
+ return false;
+ }
+ key = ReflectionUtils.copy(outer.getConfiguration(),
+ outer.getCurrentKey(), key);
+ value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
+ return true;
+ }
+ }
+
+ public ImmutableBytesWritable getCurrentKey() {
+ return key;
+ }
+
+ @Override
+ public Result getCurrentValue() {
+ return value;
+ }
+ }
+
+ private class SubMapRecordWriter extends RecordWriter<K2,V2> {
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ }
+
+ @Override
+ public void write(K2 key, V2 value) throws IOException,
+ InterruptedException {
+ synchronized (outer) {
+ outer.write(key, value);
+ }
+ }
+ }
+
+ private class SubMapStatusReporter extends StatusReporter {
+
+ @Override
+ public Counter getCounter(Enum<?> name) {
+ return outer.getCounter(name);
+ }
+
+ @Override
+ public Counter getCounter(String group, String name) {
+ return outer.getCounter(group, name);
+ }
+
+ @Override
+ public void progress() {
+ outer.progress();
+ }
+
+ @Override
+ public void setStatus(String status) {
+ outer.setStatus(status);
+ }
+
+ public float getProgress() {
+ return 0;
+ }
+ }
+
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
+ justification="Don't understand why FB is complaining about this one. We do throw exception")
+ private class MapRunner implements Runnable {
+ private Mapper<ImmutableBytesWritable, Result, K2,V2> mapper;
+ private Context subcontext;
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ MapRunner(Context context) throws IOException, InterruptedException {
+ mapper = ReflectionUtils.newInstance(mapClass,
+ context.getConfiguration());
+ try {
+ Constructor c = context.getClass().getConstructor(
+ Mapper.class,
+ Configuration.class,
+ TaskAttemptID.class,
+ RecordReader.class,
+ RecordWriter.class,
+ OutputCommitter.class,
+ StatusReporter.class,
+ InputSplit.class);
+ c.setAccessible(true);
+ subcontext = (Context) c.newInstance(
+ mapper,
+ outer.getConfiguration(),
+ outer.getTaskAttemptID(),
+ new SubMapRecordReader(),
+ new SubMapRecordWriter(),
+ context.getOutputCommitter(),
+ new SubMapStatusReporter(),
+ outer.getInputSplit());
+ } catch (Exception e) {
+ try {
+ Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
+ Configuration.class,
+ TaskAttemptID.class,
+ RecordReader.class,
+ RecordWriter.class,
+ OutputCommitter.class,
+ StatusReporter.class,
+ InputSplit.class);
+ c.setAccessible(true);
+ MapContext mc = (MapContext) c.newInstance(
+ outer.getConfiguration(),
+ outer.getTaskAttemptID(),
+ new SubMapRecordReader(),
+ new SubMapRecordWriter(),
+ context.getOutputCommitter(),
+ new SubMapStatusReporter(),
+ outer.getInputSplit());
+ Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+ Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
+ subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
+ } catch (Exception ee) { // FindBugs: REC_CATCH_EXCEPTION
+ // rethrow as IOE
+ throw new IOException(e);
+ }
+ }
+ }
+
+ @Override
+ public void run() {
+ try {
+ mapper.run(subcontext);
+ } catch (Throwable ie) {
+ LOG.error("Problem in running map.", ie);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
new file mode 100644
index 0000000..d5faab5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class MutationSerialization implements Serialization<Mutation> {
+ @Override
+ public boolean accept(Class<?> c) {
+ return Mutation.class.isAssignableFrom(c);
+ }
+
+ @Override
+ public Deserializer<Mutation> getDeserializer(Class<Mutation> c) {
+ return new MutationDeserializer();
+ }
+
+ @Override
+ public Serializer<Mutation> getSerializer(Class<Mutation> c) {
+ return new MutationSerializer();
+ }
+
+ private static class MutationDeserializer implements Deserializer<Mutation> {
+ private InputStream in;
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public Mutation deserialize(Mutation mutation) throws IOException {
+ MutationProto proto = MutationProto.parseDelimitedFrom(in);
+ return ProtobufUtil.toMutation(proto);
+ }
+
+ @Override
+ public void open(InputStream in) throws IOException {
+ this.in = in;
+ }
+
+ }
+ private static class MutationSerializer implements Serializer<Mutation> {
+ private OutputStream out;
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ }
+
+ @Override
+ public void open(OutputStream out) throws IOException {
+ this.out = out;
+ }
+
+ @Override
+ public void serialize(Mutation mutation) throws IOException {
+ MutationType type;
+ if (mutation instanceof Put) {
+ type = MutationType.PUT;
+ } else if (mutation instanceof Delete) {
+ type = MutationType.DELETE;
+ } else {
+ throw new IllegalArgumentException("Only Put and Delete are supported");
+ }
+ ProtobufUtil.toMutation(type, mutation).writeDelimitedTo(out);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
new file mode 100644
index 0000000..f01e84f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
@@ -0,0 +1,98 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Combine Puts. Merges Put instances grouped by <code>K</code> into a single
+ * instance.
+ * @see TableMapReduceUtil
+ */
+@InterfaceAudience.Public
+public class PutCombiner<K> extends Reducer<K, Put, K, Put> {
+ private static final Log LOG = LogFactory.getLog(PutCombiner.class);
+
+ @Override
+ protected void reduce(K row, Iterable<Put> vals, Context context)
+ throws IOException, InterruptedException {
+ // Using HeapSize to create an upper bound on the memory size of
+ // the puts and flush some portion of the content while looping. This
+ // flush could result in multiple Puts for a single rowkey. That is
+ // acceptable because Combiner is run as an optimization and it's not
+ // critical that all Puts are grouped perfectly.
+ long threshold = context.getConfiguration().getLong(
+ "putcombiner.row.threshold", 1L * (1<<30));
+ int cnt = 0;
+ long curSize = 0;
+ Put put = null;
+ Map<byte[], List<Cell>> familyMap = null;
+ for (Put p : vals) {
+ cnt++;
+ if (put == null) {
+ put = p;
+ familyMap = put.getFamilyCellMap();
+ } else {
+ for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap()
+ .entrySet()) {
+ List<Cell> cells = familyMap.get(entry.getKey());
+ List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
+ for (Cell cell : entry.getValue()) {
+ KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+ curSize += kv.heapSize();
+ if (kvs != null) {
+ kvs.add(kv);
+ }
+ }
+ if (cells == null) {
+ familyMap.put(entry.getKey(), entry.getValue());
+ }
+ }
+ if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
+ if (curSize > threshold) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
+ }
+ context.write(row, put);
+ put = null;
+ curSize = 0;
+ cnt = 0;
+ }
+ }
+ }
+ if (put != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
+ }
+ context.write(row, put);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
new file mode 100644
index 0000000..17ab9cb
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
@@ -0,0 +1,147 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.TagUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Emits sorted Puts.
+ * Reads in all Puts from passed Iterator, sorts them, then emits
+ * Puts in sorted order. If lots of columns per row, it will use lots of
+ * memory sorting.
+ * @see HFileOutputFormat2
+ * @see KeyValueSortReducer
+ */
+@InterfaceAudience.Public
+public class PutSortReducer extends
+ Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
+ // the cell creator
+ private CellCreator kvCreator;
+
+ @Override
+ protected void
+ setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
+ throws IOException, InterruptedException {
+ Configuration conf = context.getConfiguration();
+ this.kvCreator = new CellCreator(conf);
+ }
+
+ @Override
+ protected void reduce(
+ ImmutableBytesWritable row,
+ java.lang.Iterable<Put> puts,
+ Reducer<ImmutableBytesWritable, Put,
+ ImmutableBytesWritable, KeyValue>.Context context)
+ throws java.io.IOException, InterruptedException
+ {
+ // although reduce() is called per-row, handle pathological case
+ long threshold = context.getConfiguration().getLong(
+ "putsortreducer.row.threshold", 1L * (1<<30));
+ Iterator<Put> iter = puts.iterator();
+ while (iter.hasNext()) {
+ TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
+ long curSize = 0;
+ // stop at the end or the RAM threshold
+ List<Tag> tags = new ArrayList<>();
+ while (iter.hasNext() && curSize < threshold) {
+ // clear the tags
+ tags.clear();
+ Put p = iter.next();
+ long t = p.getTTL();
+ if (t != Long.MAX_VALUE) {
+ // add TTL tag if found
+ tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
+ }
+ byte[] acl = p.getACL();
+ if (acl != null) {
+ // add ACL tag if found
+ tags.add(new ArrayBackedTag(TagType.ACL_TAG_TYPE, acl));
+ }
+ try {
+ CellVisibility cellVisibility = p.getCellVisibility();
+ if (cellVisibility != null) {
+ // add the visibility labels if any
+ tags.addAll(kvCreator.getVisibilityExpressionResolver()
+ .createVisibilityExpTags(cellVisibility.getExpression()));
+ }
+ } catch (DeserializationException e) {
+ // We just throw exception here. Should we allow other mutations to proceed by
+ // just ignoring the bad one?
+ throw new IOException("Invalid visibility expression found in mutation " + p, e);
+ }
+ for (List<Cell> cells: p.getFamilyCellMap().values()) {
+ for (Cell cell: cells) {
+ // Creating the KV which needs to be directly written to HFiles. Using the Facade
+ // KVCreator for creation of kvs.
+ KeyValue kv = null;
+ TagUtil.carryForwardTags(tags, cell);
+ if (!tags.isEmpty()) {
+ kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
+ cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
+ cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
+ cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
+ cell.getValueOffset(), cell.getValueLength(), tags);
+ } else {
+ kv = KeyValueUtil.ensureKeyValue(cell);
+ }
+ if (map.add(kv)) {// don't count duplicated kv into size
+ curSize += kv.heapSize();
+ }
+ }
+ }
+ }
+ context.setStatus("Read " + map.size() + " entries of " + map.getClass()
+ + "(" + StringUtils.humanReadableInt(curSize) + ")");
+ int index = 0;
+ for (KeyValue kv : map) {
+ context.write(row, kv);
+ if (++index % 100 == 0)
+ context.setStatus("Wrote " + index);
+ }
+
+ // if we have more entries to process
+ if (iter.hasNext()) {
+ // force flush because we cannot guarantee intra-row sorted order
+ context.write(null, null);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
new file mode 100644
index 0000000..f14cd90
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RegionSizeCalculator.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Computes size of each region for given table and given column families.
+ * The value is used by MapReduce for better scheduling.
+ * */
+@InterfaceAudience.Private
+public class RegionSizeCalculator {
+
+ private static final Log LOG = LogFactory.getLog(RegionSizeCalculator.class);
+
+ /**
+ * Maps each region to its size in bytes.
+ * */
+ private final Map<byte[], Long> sizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+
+ static final String ENABLE_REGIONSIZECALCULATOR = "hbase.regionsizecalculator.enable";
+ private static final long MEGABYTE = 1024L * 1024L;
+
+ /**
+ * Computes size of each region for table and given column families.
+ * */
+ public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) throws IOException {
+ init(regionLocator, admin);
+ }
+
+ private void init(RegionLocator regionLocator, Admin admin)
+ throws IOException {
+ if (!enabled(admin.getConfiguration())) {
+ LOG.info("Region size calculation disabled.");
+ return;
+ }
+
+ if (regionLocator.getName().isSystemTable()) {
+ LOG.info("Region size calculation disabled for system tables.");
+ return;
+ }
+
+ LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\".");
+
+ // Get the servers which host regions of the table
+ Set<ServerName> tableServers = getRegionServersOfTable(regionLocator);
+
+ for (ServerName tableServerName : tableServers) {
+ Map<byte[], RegionLoad> regionLoads =
+ admin.getRegionLoad(tableServerName, regionLocator.getName());
+ for (RegionLoad regionLoad : regionLoads.values()) {
+
+ byte[] regionId = regionLoad.getName();
+ long regionSizeBytes = regionLoad.getStorefileSizeMB() * MEGABYTE;
+ sizeMap.put(regionId, regionSizeBytes);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes);
+ }
+ }
+ }
+ LOG.debug("Region sizes calculated");
+ }
+
+ private Set<ServerName> getRegionServersOfTable(RegionLocator regionLocator)
+ throws IOException {
+
+ Set<ServerName> tableServers = Sets.newHashSet();
+ for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) {
+ tableServers.add(regionLocation.getServerName());
+ }
+ return tableServers;
+ }
+
+ boolean enabled(Configuration configuration) {
+ return configuration.getBoolean(ENABLE_REGIONSIZECALCULATOR, true);
+ }
+
+ /**
+ * Returns size of given region in bytes. Returns 0 if region was not found.
+ * */
+ public long getRegionSize(byte[] regionId) {
+ Long size = sizeMap.get(regionId);
+ if (size == null) {
+ LOG.debug("Unknown region:" + Arrays.toString(regionId));
+ return 0;
+ } else {
+ return size;
+ }
+ }
+
+ public Map<byte[], Long> getRegionSizeMap() {
+ return Collections.unmodifiableMap(sizeMap);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
new file mode 100644
index 0000000..dff04b6
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class ResultSerialization extends Configured implements Serialization<Result> {
+ private static final Log LOG = LogFactory.getLog(ResultSerialization.class);
+ // The following configuration property indicates import file format version.
+ public static final String IMPORT_FORMAT_VER = "hbase.import.version";
+
+ @Override
+ public boolean accept(Class<?> c) {
+ return Result.class.isAssignableFrom(c);
+ }
+
+ @Override
+ public Deserializer<Result> getDeserializer(Class<Result> c) {
+ // check input format version
+ Configuration conf = getConf();
+ if (conf != null) {
+ String inputVersion = conf.get(IMPORT_FORMAT_VER);
+ if (inputVersion != null && inputVersion.equals("0.94")) {
+ LOG.info("Load exported file using deserializer for HBase 0.94 format");
+ return new Result94Deserializer();
+ }
+ }
+
+ return new ResultDeserializer();
+ }
+
+ @Override
+ public Serializer<Result> getSerializer(Class<Result> c) {
+ return new ResultSerializer();
+ }
+
+ /**
+ * The following deserializer class is used to load exported file of 0.94
+ */
+ private static class Result94Deserializer implements Deserializer<Result> {
+ private DataInputStream in;
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public Result deserialize(Result mutation) throws IOException {
+ int totalBuffer = in.readInt();
+ if (totalBuffer == 0) {
+ return Result.EMPTY_RESULT;
+ }
+ byte[] buf = new byte[totalBuffer];
+ readChunked(in, buf, 0, totalBuffer);
+ List<Cell> kvs = new ArrayList<>();
+ int offset = 0;
+ while (offset < totalBuffer) {
+ int keyLength = Bytes.toInt(buf, offset);
+ offset += Bytes.SIZEOF_INT;
+ kvs.add(new KeyValue(buf, offset, keyLength));
+ offset += keyLength;
+ }
+ return Result.create(kvs);
+ }
+
+ @Override
+ public void open(InputStream in) throws IOException {
+ if (!(in instanceof DataInputStream)) {
+ throw new IOException("Wrong input stream instance passed in");
+ }
+ this.in = (DataInputStream) in;
+ }
+
+ private void readChunked(final DataInput in, byte[] dest, int ofs, int len) throws IOException {
+ int maxRead = 8192;
+
+ for (; ofs < len; ofs += maxRead)
+ in.readFully(dest, ofs, Math.min(len - ofs, maxRead));
+ }
+ }
+
+ private static class ResultDeserializer implements Deserializer<Result> {
+ private InputStream in;
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public Result deserialize(Result mutation) throws IOException {
+ ClientProtos.Result proto = ClientProtos.Result.parseDelimitedFrom(in);
+ return ProtobufUtil.toResult(proto);
+ }
+
+ @Override
+ public void open(InputStream in) throws IOException {
+ this.in = in;
+ }
+ }
+
+ private static class ResultSerializer implements Serializer<Result> {
+ private OutputStream out;
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ }
+
+ @Override
+ public void open(OutputStream out) throws IOException {
+ this.out = out;
+ }
+
+ @Override
+ public void serialize(Result result) throws IOException {
+ ProtobufUtil.toResult(result).writeDelimitedTo(out);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
new file mode 100644
index 0000000..2e0591e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -0,0 +1,265 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a just a map phase to count rows. Map outputs table rows IF the
+ * input row has columns that have content.
+ */
+@InterfaceAudience.Public
+public class RowCounter extends Configured implements Tool {
+
+ private static final Log LOG = LogFactory.getLog(RowCounter.class);
+
+ /** Name of this 'program'. */
+ static final String NAME = "rowcounter";
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+ private final static String EXPECTED_COUNT_KEY = RowCounter.class.getName() + ".expected_count";
+
+ /**
+ * Mapper that runs the count.
+ */
+ static class RowCounterMapper
+ extends TableMapper<ImmutableBytesWritable, Result> {
+
+ /** Counter enumeration to count the actual rows. */
+ public static enum Counters {ROWS}
+
+ /**
+ * Maps the data.
+ *
+ * @param row The current table row key.
+ * @param values The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ * @see org.apache.hadoop.mapreduce.Mapper#map(Object, Object, Context)
+ */
+ @Override
+ public void map(ImmutableBytesWritable row, Result values,
+ Context context)
+ throws IOException {
+ // Count every row containing data, whether it's in qualifiers or values
+ context.getCounter(Counters.ROWS).increment(1);
+ }
+ }
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ String tableName = args[0];
+ List<MultiRowRangeFilter.RowRange> rowRangeList = null;
+ long startTime = 0;
+ long endTime = 0;
+
+ StringBuilder sb = new StringBuilder();
+
+ final String rangeSwitch = "--range=";
+ final String startTimeArgKey = "--starttime=";
+ final String endTimeArgKey = "--endtime=";
+ final String expectedCountArg = "--expected-count=";
+
+ // First argument is table name, starting from second
+ for (int i = 1; i < args.length; i++) {
+ if (args[i].startsWith(rangeSwitch)) {
+ try {
+ rowRangeList = parseRowRangeParameter(args[i], rangeSwitch);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ continue;
+ }
+ if (args[i].startsWith(startTimeArgKey)) {
+ startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
+ continue;
+ }
+ if (args[i].startsWith(endTimeArgKey)) {
+ endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
+ continue;
+ }
+ if (args[i].startsWith(expectedCountArg)) {
+ conf.setLong(EXPECTED_COUNT_KEY,
+ Long.parseLong(args[i].substring(expectedCountArg.length())));
+ continue;
+ }
+ // if no switch, assume column names
+ sb.append(args[i]);
+ sb.append(" ");
+ }
+ if (endTime < startTime) {
+ printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
+ return null;
+ }
+
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(RowCounter.class);
+ Scan scan = new Scan();
+ scan.setCacheBlocks(false);
+ setScanFilter(scan, rowRangeList);
+ if (sb.length() > 0) {
+ for (String columnName : sb.toString().trim().split(" ")) {
+ String family = StringUtils.substringBefore(columnName, ":");
+ String qualifier = StringUtils.substringAfter(columnName, ":");
+
+ if (StringUtils.isBlank(qualifier)) {
+ scan.addFamily(Bytes.toBytes(family));
+ }
+ else {
+ scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
+ }
+ }
+ }
+ scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
+ job.setOutputFormatClass(NullOutputFormat.class);
+ TableMapReduceUtil.initTableMapperJob(tableName, scan,
+ RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
+ job.setNumReduceTasks(0);
+ return job;
+ }
+
+ private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(
+ String arg, String rangeSwitch) {
+ final String[] ranges = arg.substring(rangeSwitch.length()).split(";");
+ final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
+ for (String range : ranges) {
+ String[] startEnd = range.split(",", 2);
+ if (startEnd.length != 2 || startEnd[1].contains(",")) {
+ printUsage("Please specify range in such format as \"--range=a,b\" " +
+ "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
+ throw new IllegalArgumentException("Wrong range specification: " + range);
+ }
+ String startKey = startEnd[0];
+ String endKey = startEnd[1];
+ rangeList.add(new MultiRowRangeFilter.RowRange(
+ Bytes.toBytesBinary(startKey), true,
+ Bytes.toBytesBinary(endKey), false));
+ }
+ return rangeList;
+ }
+
+ /**
+ * Sets filter {@link FilterBase} to the {@link Scan} instance.
+ * If provided rowRangeList contains more than one element,
+ * method sets filter which is instance of {@link MultiRowRangeFilter}.
+ * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}.
+ * If rowRangeList contains exactly one element, startRow and stopRow are set to the scan.
+ * @param scan
+ * @param rowRangeList
+ */
+ private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) {
+ final int size = rowRangeList == null ? 0 : rowRangeList.size();
+ if (size <= 1) {
+ scan.setFilter(new FirstKeyOnlyFilter());
+ }
+ if (size == 1) {
+ MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
+ scan.setStartRow(range.getStartRow()); //inclusive
+ scan.setStopRow(range.getStopRow()); //exclusive
+ } else if (size > 1) {
+ scan.setFilter(new MultiRowRangeFilter(rowRangeList));
+ }
+ }
+
+ /*
+ * @param errorMessage Can attach a message when error occurs.
+ */
+ private static void printUsage(String errorMessage) {
+ System.err.println("ERROR: " + errorMessage);
+ printUsage();
+ }
+
+ /**
+ * Prints usage without error message.
+ * Note that we don't document --expected-count, because it's intended for test.
+ */
+ private static void printUsage() {
+ System.err.println("Usage: RowCounter [options] <tablename> " +
+ "[--starttime=[start] --endtime=[end] " +
+ "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
+ System.err.println("For performance consider the following options:\n"
+ + "-Dhbase.client.scanner.caching=100\n"
+ + "-Dmapreduce.map.speculative=false");
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 1) {
+ printUsage("Wrong number of parameters: " + args.length);
+ return -1;
+ }
+ Job job = createSubmittableJob(getConf(), args);
+ if (job == null) {
+ return -1;
+ }
+ boolean success = job.waitForCompletion(true);
+ final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
+ if (success && expectedCount != -1) {
+ final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
+ success = expectedCount == counter.getValue();
+ if (!success) {
+ LOG.error("Failing job because count of '" + counter.getValue() +
+ "' does not match expected count of '" + expectedCount + "'");
+ }
+ }
+ return (success ? 0 : 1);
+ }
+
+ /**
+ * Main entry point.
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
+ System.exit(errCode);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
new file mode 100644
index 0000000..01a919c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
@@ -0,0 +1,143 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * A partitioner that takes start and end keys and uses bigdecimal to figure
+ * which reduce a key belongs to. Pass the start and end
+ * keys in the Configuration using <code>hbase.simpletotalorder.start</code>
+ * and <code>hbase.simpletotalorder.end</code>. The end key needs to be
+ * exclusive; i.e. one larger than the biggest key in your key space.
+ * You may be surprised at how this class partitions the space; it may not
+ * align with preconceptions; e.g. a start key of zero and an end key of 100
+ * divided in ten will not make regions whose range is 0-10, 10-20, and so on.
+ * Make your own partitioner if you need the region spacing to come out a
+ * particular way.
+ * @param <VALUE>
+ * @see #START
+ * @see #END
+ */
+@InterfaceAudience.Public
+public class SimpleTotalOrderPartitioner<VALUE> extends Partitioner<ImmutableBytesWritable, VALUE>
+implements Configurable {
+ private final static Log LOG = LogFactory.getLog(SimpleTotalOrderPartitioner.class);
+
+ @Deprecated
+ public static final String START = "hbase.simpletotalorder.start";
+ @Deprecated
+ public static final String END = "hbase.simpletotalorder.end";
+
+ static final String START_BASE64 = "hbase.simpletotalorder.start.base64";
+ static final String END_BASE64 = "hbase.simpletotalorder.end.base64";
+
+ private Configuration c;
+ private byte [] startkey;
+ private byte [] endkey;
+ private byte [][] splits;
+ private int lastReduces = -1;
+
+ public static void setStartKey(Configuration conf, byte[] startKey) {
+ conf.set(START_BASE64, Base64.encodeBytes(startKey));
+ }
+
+ public static void setEndKey(Configuration conf, byte[] endKey) {
+ conf.set(END_BASE64, Base64.encodeBytes(endKey));
+ }
+
+ @SuppressWarnings("deprecation")
+ static byte[] getStartKey(Configuration conf) {
+ return getKeyFromConf(conf, START_BASE64, START);
+ }
+
+ @SuppressWarnings("deprecation")
+ static byte[] getEndKey(Configuration conf) {
+ return getKeyFromConf(conf, END_BASE64, END);
+ }
+
+ private static byte[] getKeyFromConf(Configuration conf,
+ String base64Key, String deprecatedKey) {
+ String encoded = conf.get(base64Key);
+ if (encoded != null) {
+ return Base64.decode(encoded);
+ }
+ String oldStyleVal = conf.get(deprecatedKey);
+ if (oldStyleVal == null) {
+ return null;
+ }
+ LOG.warn("Using deprecated configuration " + deprecatedKey +
+ " - please use static accessor methods instead.");
+ return Bytes.toBytesBinary(oldStyleVal);
+ }
+
+ @Override
+ public int getPartition(final ImmutableBytesWritable key, final VALUE value,
+ final int reduces) {
+ if (reduces == 1) return 0;
+ if (this.lastReduces != reduces) {
+ this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
+ for (int i = 0; i < splits.length; i++) {
+ LOG.info(Bytes.toStringBinary(splits[i]));
+ }
+ this.lastReduces = reduces;
+ }
+ int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
+ key.getLength());
+ // Below code is from hfile index search.
+ if (pos < 0) {
+ pos++;
+ pos *= -1;
+ if (pos == 0) {
+ // falls before the beginning of the file.
+ throw new RuntimeException("Key outside start/stop range: " +
+ key.toString());
+ }
+ pos--;
+ }
+ return pos;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return this.c;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.c = conf;
+ this.startkey = getStartKey(conf);
+ this.endkey = getEndKey(conf);
+ if (startkey == null || endkey == null) {
+ throw new RuntimeException(this.getClass() + " not configured");
+ }
+ LOG.info("startkey=" + Bytes.toStringBinary(startkey) +
+ ", endkey=" + Bytes.toStringBinary(endkey));
+ // Reset last reduces count on change of Start / End key
+ this.lastReduces = -1;
+ }
+}
[40/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
new file mode 100644
index 0000000..9811a97
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
@@ -0,0 +1,313 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a
+ * byte[] of input columns and optionally a {@link Filter}.
+ * Subclasses may use other TableRecordReader implementations.
+ *
+ * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
+ * function properly. Each of the entry points to this class used by the MapReduce framework,
+ * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
+ * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information. If your subclass overrides either of these
+ * methods, either call the parent version or call initialize yourself.
+ *
+ * <p>
+ * An example of a subclass:
+ * <pre>
+ * class ExampleTIF extends TableInputFormatBase {
+ *
+ * {@literal @}Override
+ * protected void initialize(JobConf context) throws IOException {
+ * // We are responsible for the lifecycle of this connection until we hand it over in
+ * // initializeTable.
+ * Connection connection =
+ * ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ * TableName tableName = TableName.valueOf("exampleTable");
+ * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
+ * initializeTable(connection, tableName);
+ * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ * Bytes.toBytes("columnB") };
+ * // mandatory
+ * setInputColumns(inputColumns);
+ * // optional, by default we'll get everything for the given columns.
+ * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ * setRowFilter(exampleFilter);
+ * }
+ * }
+ * </pre>
+ */
+
+@InterfaceAudience.Public
+public abstract class TableInputFormatBase
+implements InputFormat<ImmutableBytesWritable, Result> {
+ private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
+ private byte [][] inputColumns;
+ private Table table;
+ private RegionLocator regionLocator;
+ private Connection connection;
+ private TableRecordReader tableRecordReader;
+ private Filter rowFilter;
+
+ private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
+ "initialized. Ensure you call initializeTable either in your constructor or initialize " +
+ "method";
+ private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
+ " previous error. Please look at the previous logs lines from" +
+ " the task's full log for more details.";
+
+ /**
+ * Builds a TableRecordReader. If no TableRecordReader was provided, uses
+ * the default.
+ *
+ * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
+ * JobConf, Reporter)
+ */
+ public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
+ InputSplit split, JobConf job, Reporter reporter)
+ throws IOException {
+ // In case a subclass uses the deprecated approach or calls initializeTable directly
+ if (table == null) {
+ initialize(job);
+ }
+ // null check in case our child overrides getTable to not throw.
+ try {
+ if (getTable() == null) {
+ // initialize() must not have been implemented in the subclass.
+ throw new IOException(INITIALIZATION_ERROR);
+ }
+ } catch (IllegalStateException exception) {
+ throw new IOException(INITIALIZATION_ERROR, exception);
+ }
+
+ TableSplit tSplit = (TableSplit) split;
+ // if no table record reader was provided use default
+ final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :
+ this.tableRecordReader;
+ trr.setStartRow(tSplit.getStartRow());
+ trr.setEndRow(tSplit.getEndRow());
+ trr.setHTable(this.table);
+ trr.setInputColumns(this.inputColumns);
+ trr.setRowFilter(this.rowFilter);
+ trr.init();
+ return new RecordReader<ImmutableBytesWritable, Result>() {
+
+ @Override
+ public void close() throws IOException {
+ trr.close();
+ closeTable();
+ }
+
+ @Override
+ public ImmutableBytesWritable createKey() {
+ return trr.createKey();
+ }
+
+ @Override
+ public Result createValue() {
+ return trr.createValue();
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return trr.getPos();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return trr.getProgress();
+ }
+
+ @Override
+ public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
+ return trr.next(key, value);
+ }
+ };
+ }
+
+ /**
+ * Calculates the splits that will serve as input for the map tasks.
+ *
+ * Splits are created in number equal to the smallest between numSplits and
+ * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table.
+ * If the number of splits is smaller than the number of
+ * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across
+ * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s
+ * and are grouped the most evenly possible. In the
+ * case splits are uneven the bigger splits are placed first in the
+ * {@link InputSplit} array.
+ *
+ * @param job the map task {@link JobConf}
+ * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
+ *
+ * @return the input splits
+ *
+ * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
+ */
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ if (this.table == null) {
+ initialize(job);
+ }
+ // null check in case our child overrides getTable to not throw.
+ try {
+ if (getTable() == null) {
+ // initialize() must not have been implemented in the subclass.
+ throw new IOException(INITIALIZATION_ERROR);
+ }
+ } catch (IllegalStateException exception) {
+ throw new IOException(INITIALIZATION_ERROR, exception);
+ }
+
+ byte [][] startKeys = this.regionLocator.getStartKeys();
+ if (startKeys == null || startKeys.length == 0) {
+ throw new IOException("Expecting at least one region");
+ }
+ if (this.inputColumns == null || this.inputColumns.length == 0) {
+ throw new IOException("Expecting at least one column");
+ }
+ int realNumSplits = numSplits > startKeys.length? startKeys.length:
+ numSplits;
+ InputSplit[] splits = new InputSplit[realNumSplits];
+ int middle = startKeys.length / realNumSplits;
+ int startPos = 0;
+ for (int i = 0; i < realNumSplits; i++) {
+ int lastPos = startPos + middle;
+ lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
+ String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).
+ getHostname();
+ splits[i] = new TableSplit(this.table.getName(),
+ startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
+ HConstants.EMPTY_START_ROW, regionLocation);
+ LOG.info("split: " + i + "->" + splits[i]);
+ startPos = lastPos;
+ }
+ return splits;
+ }
+
+ /**
+ * Allows subclasses to initialize the table information.
+ *
+ * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.
+ * @param tableName The {@link TableName} of the table to process.
+ * @throws IOException
+ */
+ protected void initializeTable(Connection connection, TableName tableName) throws IOException {
+ if (this.table != null || this.connection != null) {
+ LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
+ "reference; TableInputFormatBase will not close these old references when done.");
+ }
+ this.table = connection.getTable(tableName);
+ this.regionLocator = connection.getRegionLocator(tableName);
+ this.connection = connection;
+ }
+
+ /**
+ * @param inputColumns to be passed in {@link Result} to the map task.
+ */
+ protected void setInputColumns(byte [][] inputColumns) {
+ this.inputColumns = inputColumns;
+ }
+
+ /**
+ * Allows subclasses to get the {@link Table}.
+ */
+ protected Table getTable() {
+ if (table == null) {
+ throw new IllegalStateException(NOT_INITIALIZED);
+ }
+ return this.table;
+ }
+
+ /**
+ * Allows subclasses to set the {@link TableRecordReader}.
+ *
+ * @param tableRecordReader
+ * to provide other {@link TableRecordReader} implementations.
+ */
+ protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+ this.tableRecordReader = tableRecordReader;
+ }
+
+ /**
+ * Allows subclasses to set the {@link Filter} to be used.
+ *
+ * @param rowFilter
+ */
+ protected void setRowFilter(Filter rowFilter) {
+ this.rowFilter = rowFilter;
+ }
+
+ /**
+ * Handle subclass specific set up.
+ * Each of the entry points used by the MapReduce framework,
+ * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
+ * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
+ * retrieving the necessary configuration information and calling
+ * {@link #initializeTable(Connection, TableName)}.
+ *
+ * Subclasses should implement their initialize call such that it is safe to call multiple times.
+ * The current TableInputFormatBase implementation relies on a non-null table reference to decide
+ * if an initialize call is needed, but this behavior may change in the future. In particular,
+ * it is critical that initializeTable not be called multiple times since this will leak
+ * Connection instances.
+ *
+ */
+ protected void initialize(JobConf job) throws IOException {
+ }
+
+ /**
+ * Close the Table and related objects that were initialized via
+ * {@link #initializeTable(Connection, TableName)}.
+ *
+ * @throws IOException
+ */
+ protected void closeTable() throws IOException {
+ close(table, connection);
+ table = null;
+ connection = null;
+ }
+
+ private void close(Closeable... closables) throws IOException {
+ for (Closeable c : closables) {
+ if(c != null) { c.close(); }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
new file mode 100644
index 0000000..a9f1e61
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.Mapper;
+
+/**
+ * Scan an HBase table to sort by a specified sort column.
+ * If the column does not exist, the record is not passed to Reduce.
+ *
+ * @param <K> WritableComparable key class
+ * @param <V> Writable value class
+ */
+@InterfaceAudience.Public
+public interface TableMap<K extends WritableComparable<? super K>, V>
+extends Mapper<ImmutableBytesWritable, Result, K, V> {
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
new file mode 100644
index 0000000..63ec418
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
@@ -0,0 +1,376 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
+import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.token.TokenUtil;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Utility for {@link TableMap} and {@link TableReduce}
+ */
+@InterfaceAudience.Public
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class TableMapReduceUtil {
+
+ /**
+ * Use this before submitting a TableMap job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The table name to read from.
+ * @param columns The columns to scan.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job configuration to adjust.
+ */
+ public static void initTableMapJob(String table, String columns,
+ Class<? extends TableMap> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, JobConf job) {
+ initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
+ true, TableInputFormat.class);
+ }
+
+ public static void initTableMapJob(String table, String columns,
+ Class<? extends TableMap> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
+ initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
+ addDependencyJars, TableInputFormat.class);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The table name to read from.
+ * @param columns The columns to scan.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job configuration to adjust.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ */
+ public static void initTableMapJob(String table, String columns,
+ Class<? extends TableMap> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
+ Class<? extends InputFormat> inputFormat) {
+
+ job.setInputFormat(inputFormat);
+ job.setMapOutputValueClass(outputValueClass);
+ job.setMapOutputKeyClass(outputKeyClass);
+ job.setMapperClass(mapper);
+ job.setStrings("io.serializations", job.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName());
+ FileInputFormat.addInputPaths(job, table);
+ job.set(TableInputFormat.COLUMN_LIST, columns);
+ if (addDependencyJars) {
+ try {
+ addDependencyJars(job);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ try {
+ initCredentials(job);
+ } catch (IOException ioe) {
+ // just spit out the stack trace? really?
+ ioe.printStackTrace();
+ }
+ }
+
+ /**
+ * Sets up the job for reading from one or more multiple table snapshots, with one or more scans
+ * per snapshot.
+ * It bypasses hbase servers and read directly from snapshot files.
+ *
+ * @param snapshotScans map of snapshot name to scans on that snapshot.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ */
+ public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
+ Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
+ JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
+ MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
+
+ job.setInputFormat(MultiTableSnapshotInputFormat.class);
+ if (outputValueClass != null) {
+ job.setMapOutputValueClass(outputValueClass);
+ }
+ if (outputKeyClass != null) {
+ job.setMapOutputKeyClass(outputKeyClass);
+ }
+ job.setMapperClass(mapper);
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
+
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
+ }
+
+ /**
+ * Sets up the job for reading from a table snapshot. It bypasses hbase servers
+ * and read directly from snapshot files.
+ *
+ * @param snapshotName The name of the snapshot (of a table) to read from.
+ * @param columns The columns to scan.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
+ * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+ * After the job is finished, restore directory can be deleted.
+ * @throws IOException When setting up the details fails.
+ * @see TableSnapshotInputFormat
+ */
+ public static void initTableSnapshotMapJob(String snapshotName, String columns,
+ Class<? extends TableMap> mapper,
+ Class<?> outputKeyClass,
+ Class<?> outputValueClass, JobConf job,
+ boolean addDependencyJars, Path tmpRestoreDir)
+ throws IOException {
+ TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
+ initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
+ addDependencyJars, TableSnapshotInputFormat.class);
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job configuration to adjust.
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReduceJob(String table,
+ Class<? extends TableReduce> reducer, JobConf job)
+ throws IOException {
+ initTableReduceJob(table, reducer, job, null);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job configuration to adjust.
+ * @param partitioner Partitioner to use. Pass <code>null</code> to use
+ * default partitioner.
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReduceJob(String table,
+ Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
+ throws IOException {
+ initTableReduceJob(table, reducer, job, partitioner, true);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job configuration to adjust.
+ * @param partitioner Partitioner to use. Pass <code>null</code> to use
+ * default partitioner.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReduceJob(String table,
+ Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
+ boolean addDependencyJars) throws IOException {
+ job.setOutputFormat(TableOutputFormat.class);
+ job.setReducerClass(reducer);
+ job.set(TableOutputFormat.OUTPUT_TABLE, table);
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(Put.class);
+ job.setStrings("io.serializations", job.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName());
+ if (partitioner == HRegionPartitioner.class) {
+ job.setPartitionerClass(HRegionPartitioner.class);
+ int regions =
+ MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+ if (job.getNumReduceTasks() > regions) {
+ job.setNumReduceTasks(regions);
+ }
+ } else if (partitioner != null) {
+ job.setPartitionerClass(partitioner);
+ }
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
+ initCredentials(job);
+ }
+
+ public static void initCredentials(JobConf job) throws IOException {
+ UserProvider userProvider = UserProvider.instantiate(job);
+ if (userProvider.isHadoopSecurityEnabled()) {
+ // propagate delegation related props from launcher job to MR job
+ if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+ job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+ }
+ }
+
+ if (userProvider.isHBaseSecurityEnabled()) {
+ Connection conn = ConnectionFactory.createConnection(job);
+ try {
+ // login the server principal (if using secure Hadoop)
+ User user = userProvider.getCurrent();
+ TokenUtil.addTokenForJob(conn, job, user);
+ } catch (InterruptedException ie) {
+ ie.printStackTrace();
+ Thread.currentThread().interrupt();
+ } finally {
+ conn.close();
+ }
+ }
+ }
+
+ /**
+ * Ensures that the given number of reduce tasks for the given job
+ * configuration does not exceed the number of regions for the given table.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job configuration to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ // Used by tests.
+ public static void limitNumReduceTasks(String table, JobConf job)
+ throws IOException {
+ int regions =
+ MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+ if (job.getNumReduceTasks() > regions)
+ job.setNumReduceTasks(regions);
+ }
+
+ /**
+ * Ensures that the given number of map tasks for the given job
+ * configuration does not exceed the number of regions for the given table.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job configuration to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ // Used by tests.
+ public static void limitNumMapTasks(String table, JobConf job)
+ throws IOException {
+ int regions =
+ MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
+ if (job.getNumMapTasks() > regions)
+ job.setNumMapTasks(regions);
+ }
+
+ /**
+ * Sets the number of reduce tasks for the given job configuration to the
+ * number of regions the given table has.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job configuration to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ public static void setNumReduceTasks(String table, JobConf job)
+ throws IOException {
+ job.setNumReduceTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
+ TableName.valueOf(table)));
+ }
+
+ /**
+ * Sets the number of map tasks for the given job configuration to the
+ * number of regions the given table has.
+ *
+ * @param table The table to get the region count for.
+ * @param job The current job configuration to adjust.
+ * @throws IOException When retrieving the table details fails.
+ */
+ public static void setNumMapTasks(String table, JobConf job)
+ throws IOException {
+ job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
+ TableName.valueOf(table)));
+ }
+
+ /**
+ * Sets the number of rows to return and cache with each scanner iteration.
+ * Higher caching values will enable faster mapreduce jobs at the expense of
+ * requiring more heap to contain the cached rows.
+ *
+ * @param job The current job configuration to adjust.
+ * @param batchSize The number of rows to return in batch with each scanner
+ * iteration.
+ */
+ public static void setScannerCaching(JobConf job, int batchSize) {
+ job.setInt("hbase.client.scanner.caching", batchSize);
+ }
+
+ /**
+ * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
+ */
+ public static void addDependencyJars(JobConf job) throws IOException {
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(
+ job,
+ // when making changes here, consider also mapreduce.TableMapReduceUtil
+ // pull job classes
+ job.getMapOutputKeyClass(),
+ job.getMapOutputValueClass(),
+ job.getOutputKeyClass(),
+ job.getOutputValueClass(),
+ job.getPartitionerClass(),
+ job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
+ job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
+ job.getCombinerClass());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
new file mode 100644
index 0000000..06b28ed
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
@@ -0,0 +1,134 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.InvalidJobConfException;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * Convert Map/Reduce output and write it to an HBase table
+ */
+@InterfaceAudience.Public
+public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable, Put> {
+
+ /** JobConf parameter that specifies the output table */
+ public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
+
+ /**
+ * Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
+ * and write to an HBase table.
+ */
+ protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
+ private BufferedMutator m_mutator;
+ private Connection conn;
+
+
+ /**
+ * Instantiate a TableRecordWriter with the HBase HClient for writing.
+ *
+ * @deprecated Please use {@code #TableRecordWriter(JobConf)} This version does not clean up
+ * connections and will leak connections (removed in 2.0)
+ */
+ @Deprecated
+ public TableRecordWriter(final BufferedMutator mutator) throws IOException {
+ this.m_mutator = mutator;
+ this.conn = null;
+ }
+
+ /**
+ * Instantiate a TableRecordWriter with a BufferedMutator for batch writing.
+ */
+ public TableRecordWriter(JobConf job) throws IOException {
+ // expecting exactly one path
+ TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
+ try {
+ this.conn = ConnectionFactory.createConnection(job);
+ this.m_mutator = conn.getBufferedMutator(tableName);
+ } finally {
+ if (this.m_mutator == null) {
+ conn.close();
+ conn = null;
+ }
+ }
+ }
+
+ public void close(Reporter reporter) throws IOException {
+ try {
+ if (this.m_mutator != null) {
+ this.m_mutator.close();
+ }
+ } finally {
+ if (conn != null) {
+ this.conn.close();
+ }
+ }
+ }
+
+ public void write(ImmutableBytesWritable key, Put value) throws IOException {
+ m_mutator.mutate(new Put(value));
+ }
+ }
+
+ /**
+ * Creates a new record writer.
+ *
+ * Be aware that the baseline javadoc gives the impression that there is a single
+ * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
+ * RecordWriter per call of this method. You must close the returned RecordWriter when done.
+ * Failure to do so will drop writes.
+ *
+ * @param ignored Ignored filesystem
+ * @param job Current JobConf
+ * @param name Name of the job
+ * @param progress
+ * @return The newly created writer instance.
+ * @throws IOException When creating the writer fails.
+ */
+ @Override
+ public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
+ Progressable progress)
+ throws IOException {
+ // Clear write buffer on fail is true by default so no need to reset it.
+ return new TableRecordWriter(job);
+ }
+
+ @Override
+ public void checkOutputSpecs(FileSystem ignored, JobConf job)
+ throws FileAlreadyExistsException, InvalidJobConfException, IOException {
+ String tableName = job.get(OUTPUT_TABLE);
+ if (tableName == null) {
+ throw new IOException("Must specify table name");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
new file mode 100644
index 0000000..cecef7d
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
@@ -0,0 +1,139 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.RecordReader;
+
+
+/**
+ * Iterate over an HBase table data, return (Text, RowResult) pairs
+ */
+@InterfaceAudience.Public
+public class TableRecordReader
+implements RecordReader<ImmutableBytesWritable, Result> {
+
+ private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
+
+ /**
+ * Restart from survivable exceptions by creating a new scanner.
+ *
+ * @param firstRow
+ * @throws IOException
+ */
+ public void restart(byte[] firstRow) throws IOException {
+ this.recordReaderImpl.restart(firstRow);
+ }
+
+ /**
+ * Build the scanner. Not done in constructor to allow for extension.
+ *
+ * @throws IOException
+ */
+ public void init() throws IOException {
+ this.recordReaderImpl.restart(this.recordReaderImpl.getStartRow());
+ }
+
+ /**
+ * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+ */
+ public void setHTable(Table htable) {
+ this.recordReaderImpl.setHTable(htable);
+ }
+
+ /**
+ * @param inputColumns the columns to be placed in {@link Result}.
+ */
+ public void setInputColumns(final byte [][] inputColumns) {
+ this.recordReaderImpl.setInputColumns(inputColumns);
+ }
+
+ /**
+ * @param startRow the first row in the split
+ */
+ public void setStartRow(final byte [] startRow) {
+ this.recordReaderImpl.setStartRow(startRow);
+ }
+
+ /**
+ *
+ * @param endRow the last row in the split
+ */
+ public void setEndRow(final byte [] endRow) {
+ this.recordReaderImpl.setEndRow(endRow);
+ }
+
+ /**
+ * @param rowFilter the {@link Filter} to be used.
+ */
+ public void setRowFilter(Filter rowFilter) {
+ this.recordReaderImpl.setRowFilter(rowFilter);
+ }
+
+ public void close() {
+ this.recordReaderImpl.close();
+ }
+
+ /**
+ * @return ImmutableBytesWritable
+ *
+ * @see org.apache.hadoop.mapred.RecordReader#createKey()
+ */
+ public ImmutableBytesWritable createKey() {
+ return this.recordReaderImpl.createKey();
+ }
+
+ /**
+ * @return RowResult
+ *
+ * @see org.apache.hadoop.mapred.RecordReader#createValue()
+ */
+ public Result createValue() {
+ return this.recordReaderImpl.createValue();
+ }
+
+ public long getPos() {
+
+ // This should be the ordinal tuple in the range;
+ // not clear how to calculate...
+ return this.recordReaderImpl.getPos();
+ }
+
+ public float getProgress() {
+ // Depends on the total number of tuples and getPos
+ return this.recordReaderImpl.getPos();
+ }
+
+ /**
+ * @param key HStoreKey as input key.
+ * @param value MapWritable as input value
+ * @return true if there was more data
+ * @throws IOException
+ */
+ public boolean next(ImmutableBytesWritable key, Result value)
+ throws IOException {
+ return this.recordReaderImpl.next(key, value);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
new file mode 100644
index 0000000..f6b79c3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
@@ -0,0 +1,259 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.ScannerCallable;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.StringUtils;
+
+import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT;
+
+/**
+ * Iterate over an HBase table data, return (Text, RowResult) pairs
+ */
+@InterfaceAudience.Public
+public class TableRecordReaderImpl {
+ private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
+
+ private byte [] startRow;
+ private byte [] endRow;
+ private byte [] lastSuccessfulRow;
+ private Filter trrRowFilter;
+ private ResultScanner scanner;
+ private Table htable;
+ private byte [][] trrInputColumns;
+ private long timestamp;
+ private int rowcount;
+ private boolean logScannerActivity = false;
+ private int logPerRowCount = 100;
+
+ /**
+ * Restart from survivable exceptions by creating a new scanner.
+ *
+ * @param firstRow
+ * @throws IOException
+ */
+ public void restart(byte[] firstRow) throws IOException {
+ Scan currentScan;
+ if ((endRow != null) && (endRow.length > 0)) {
+ if (trrRowFilter != null) {
+ Scan scan = new Scan(firstRow, endRow);
+ TableInputFormat.addColumns(scan, trrInputColumns);
+ scan.setFilter(trrRowFilter);
+ scan.setCacheBlocks(false);
+ this.scanner = this.htable.getScanner(scan);
+ currentScan = scan;
+ } else {
+ LOG.debug("TIFB.restart, firstRow: " +
+ Bytes.toStringBinary(firstRow) + ", endRow: " +
+ Bytes.toStringBinary(endRow));
+ Scan scan = new Scan(firstRow, endRow);
+ TableInputFormat.addColumns(scan, trrInputColumns);
+ this.scanner = this.htable.getScanner(scan);
+ currentScan = scan;
+ }
+ } else {
+ LOG.debug("TIFB.restart, firstRow: " +
+ Bytes.toStringBinary(firstRow) + ", no endRow");
+
+ Scan scan = new Scan(firstRow);
+ TableInputFormat.addColumns(scan, trrInputColumns);
+ scan.setFilter(trrRowFilter);
+ this.scanner = this.htable.getScanner(scan);
+ currentScan = scan;
+ }
+ if (logScannerActivity) {
+ LOG.info("Current scan=" + currentScan.toString());
+ timestamp = System.currentTimeMillis();
+ rowcount = 0;
+ }
+ }
+
+ /**
+ * Build the scanner. Not done in constructor to allow for extension.
+ *
+ * @throws IOException
+ */
+ public void init() throws IOException {
+ restart(startRow);
+ }
+
+ byte[] getStartRow() {
+ return this.startRow;
+ }
+ /**
+ * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
+ */
+ public void setHTable(Table htable) {
+ Configuration conf = htable.getConfiguration();
+ logScannerActivity = conf.getBoolean(
+ ScannerCallable.LOG_SCANNER_ACTIVITY, false);
+ logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
+ this.htable = htable;
+ }
+
+ /**
+ * @param inputColumns the columns to be placed in {@link Result}.
+ */
+ public void setInputColumns(final byte [][] inputColumns) {
+ this.trrInputColumns = inputColumns;
+ }
+
+ /**
+ * @param startRow the first row in the split
+ */
+ public void setStartRow(final byte [] startRow) {
+ this.startRow = startRow;
+ }
+
+ /**
+ *
+ * @param endRow the last row in the split
+ */
+ public void setEndRow(final byte [] endRow) {
+ this.endRow = endRow;
+ }
+
+ /**
+ * @param rowFilter the {@link Filter} to be used.
+ */
+ public void setRowFilter(Filter rowFilter) {
+ this.trrRowFilter = rowFilter;
+ }
+
+ public void close() {
+ if (this.scanner != null) {
+ this.scanner.close();
+ }
+ try {
+ this.htable.close();
+ } catch (IOException ioe) {
+ LOG.warn("Error closing table", ioe);
+ }
+ }
+
+ /**
+ * @return ImmutableBytesWritable
+ *
+ * @see org.apache.hadoop.mapred.RecordReader#createKey()
+ */
+ public ImmutableBytesWritable createKey() {
+ return new ImmutableBytesWritable();
+ }
+
+ /**
+ * @return RowResult
+ *
+ * @see org.apache.hadoop.mapred.RecordReader#createValue()
+ */
+ public Result createValue() {
+ return new Result();
+ }
+
+ public long getPos() {
+ // This should be the ordinal tuple in the range;
+ // not clear how to calculate...
+ return 0;
+ }
+
+ public float getProgress() {
+ // Depends on the total number of tuples and getPos
+ return 0;
+ }
+
+ /**
+ * @param key HStoreKey as input key.
+ * @param value MapWritable as input value
+ * @return true if there was more data
+ * @throws IOException
+ */
+ public boolean next(ImmutableBytesWritable key, Result value)
+ throws IOException {
+ Result result;
+ try {
+ try {
+ result = this.scanner.next();
+ if (logScannerActivity) {
+ rowcount ++;
+ if (rowcount >= logPerRowCount) {
+ long now = System.currentTimeMillis();
+ LOG.info("Mapper took " + (now-timestamp)
+ + "ms to process " + rowcount + " rows");
+ timestamp = now;
+ rowcount = 0;
+ }
+ }
+ } catch (IOException e) {
+ // do not retry if the exception tells us not to do so
+ if (e instanceof DoNotRetryIOException) {
+ throw e;
+ }
+ // try to handle all other IOExceptions by restarting
+ // the scanner, if the second call fails, it will be rethrown
+ LOG.debug("recovered from " + StringUtils.stringifyException(e));
+ if (lastSuccessfulRow == null) {
+ LOG.warn("We are restarting the first next() invocation," +
+ " if your mapper has restarted a few other times like this" +
+ " then you should consider killing this job and investigate" +
+ " why it's taking so long.");
+ }
+ if (lastSuccessfulRow == null) {
+ restart(startRow);
+ } else {
+ restart(lastSuccessfulRow);
+ this.scanner.next(); // skip presumed already mapped row
+ }
+ result = this.scanner.next();
+ }
+
+ if (result != null && result.size() > 0) {
+ key.set(result.getRow());
+ lastSuccessfulRow = key.get();
+ value.copyFrom(result);
+ return true;
+ }
+ return false;
+ } catch (IOException ioe) {
+ if (logScannerActivity) {
+ long now = System.currentTimeMillis();
+ LOG.info("Mapper took " + (now-timestamp)
+ + "ms to process " + rowcount + " rows");
+ LOG.info(ioe);
+ String lastRow = lastSuccessfulRow == null ?
+ "null" : Bytes.toStringBinary(lastSuccessfulRow);
+ LOG.info("lastSuccessfulRow=" + lastRow);
+ }
+ throw ioe;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
new file mode 100644
index 0000000..91fb4a1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
@@ -0,0 +1,38 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.Reducer;
+
+/**
+ * Write a table, sorting by the input key
+ *
+ * @param <K> key class
+ * @param <V> value class
+ */
+@InterfaceAudience.Public
+@SuppressWarnings("unchecked")
+public interface TableReduce<K extends WritableComparable, V>
+extends Reducer<K, V, ImmutableBytesWritable, Put> {
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
new file mode 100644
index 0000000..d7b49ff
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
+ * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
+ *
+ * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ */
+@InterfaceAudience.Public
+public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
+
+ public static class TableSnapshotRegionSplit implements InputSplit {
+ private TableSnapshotInputFormatImpl.InputSplit delegate;
+
+ // constructor for mapreduce framework / Writable
+ public TableSnapshotRegionSplit() {
+ this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
+ }
+
+ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
+ this.delegate = delegate;
+ }
+
+ public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
+ List<String> locations, Scan scan, Path restoreDir) {
+ this.delegate =
+ new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
+ }
+
+ @Override
+ public long getLength() throws IOException {
+ return delegate.getLength();
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return delegate.getLocations();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ delegate.write(out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ delegate.readFields(in);
+ }
+ }
+
+ static class TableSnapshotRecordReader
+ implements RecordReader<ImmutableBytesWritable, Result> {
+
+ private TableSnapshotInputFormatImpl.RecordReader delegate;
+
+ public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
+ throws IOException {
+ delegate = new TableSnapshotInputFormatImpl.RecordReader();
+ delegate.initialize(split.delegate, job);
+ }
+
+ @Override
+ public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
+ if (!delegate.nextKeyValue()) {
+ return false;
+ }
+ ImmutableBytesWritable currentKey = delegate.getCurrentKey();
+ key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
+ value.copyFrom(delegate.getCurrentValue());
+ return true;
+ }
+
+ @Override
+ public ImmutableBytesWritable createKey() {
+ return new ImmutableBytesWritable();
+ }
+
+ @Override
+ public Result createValue() {
+ return new Result();
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return delegate.getPos();
+ }
+
+ @Override
+ public void close() throws IOException {
+ delegate.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return delegate.getProgress();
+ }
+ }
+
+ @Override
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ List<TableSnapshotInputFormatImpl.InputSplit> splits =
+ TableSnapshotInputFormatImpl.getSplits(job);
+ InputSplit[] results = new InputSplit[splits.size()];
+ for (int i = 0; i < splits.size(); i++) {
+ results[i] = new TableSnapshotRegionSplit(splits.get(i));
+ }
+ return results;
+ }
+
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result>
+ getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
+ }
+
+ /**
+ * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+ * @param job the job to configure
+ * @param snapshotName the name of the snapshot to read from
+ * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+ * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+ * After the job is finished, restoreDir can be deleted.
+ * @throws IOException if an error occurs
+ */
+ public static void setInput(JobConf job, String snapshotName, Path restoreDir)
+ throws IOException {
+ TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
new file mode 100644
index 0000000..0784e5e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
@@ -0,0 +1,154 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * A table split corresponds to a key range [low, high)
+ */
+@InterfaceAudience.Public
+public class TableSplit implements InputSplit, Comparable<TableSplit> {
+ private TableName m_tableName;
+ private byte [] m_startRow;
+ private byte [] m_endRow;
+ private String m_regionLocation;
+
+ /** default constructor */
+ public TableSplit() {
+ this((TableName)null, HConstants.EMPTY_BYTE_ARRAY,
+ HConstants.EMPTY_BYTE_ARRAY, "");
+ }
+
+ /**
+ * Constructor
+ * @param tableName
+ * @param startRow
+ * @param endRow
+ * @param location
+ */
+ public TableSplit(TableName tableName, byte [] startRow, byte [] endRow,
+ final String location) {
+ this.m_tableName = tableName;
+ this.m_startRow = startRow;
+ this.m_endRow = endRow;
+ this.m_regionLocation = location;
+ }
+
+ public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
+ final String location) {
+ this(TableName.valueOf(tableName), startRow, endRow,
+ location);
+ }
+
+ /** @return table name */
+ public TableName getTable() {
+ return this.m_tableName;
+ }
+
+ /** @return table name */
+ public byte [] getTableName() {
+ return this.m_tableName.getName();
+ }
+
+ /** @return starting row key */
+ public byte [] getStartRow() {
+ return this.m_startRow;
+ }
+
+ /** @return end row key */
+ public byte [] getEndRow() {
+ return this.m_endRow;
+ }
+
+ /** @return the region's hostname */
+ public String getRegionLocation() {
+ return this.m_regionLocation;
+ }
+
+ public String[] getLocations() {
+ return new String[] {this.m_regionLocation};
+ }
+
+ public long getLength() {
+ // Not clear how to obtain this... seems to be used only for sorting splits
+ return 0;
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ this.m_tableName = TableName.valueOf(Bytes.readByteArray(in));
+ this.m_startRow = Bytes.readByteArray(in);
+ this.m_endRow = Bytes.readByteArray(in);
+ this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
+ }
+
+ public void write(DataOutput out) throws IOException {
+ Bytes.writeByteArray(out, this.m_tableName.getName());
+ Bytes.writeByteArray(out, this.m_startRow);
+ Bytes.writeByteArray(out, this.m_endRow);
+ Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("HBase table split(");
+ sb.append("table name: ").append(m_tableName);
+ sb.append(", start row: ").append(Bytes.toStringBinary(m_startRow));
+ sb.append(", end row: ").append(Bytes.toStringBinary(m_endRow));
+ sb.append(", region location: ").append(m_regionLocation);
+ sb.append(")");
+ return sb.toString();
+ }
+
+ @Override
+ public int compareTo(TableSplit o) {
+ return Bytes.compareTo(getStartRow(), o.getStartRow());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null || !(o instanceof TableSplit)) {
+ return false;
+ }
+ TableSplit other = (TableSplit)o;
+ return m_tableName.equals(other.m_tableName) &&
+ Bytes.equals(m_startRow, other.m_startRow) &&
+ Bytes.equals(m_endRow, other.m_endRow) &&
+ m_regionLocation.equals(other.m_regionLocation);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = m_tableName != null ? m_tableName.hashCode() : 0;
+ result = 31 * result + Arrays.hashCode(m_startRow);
+ result = 31 * result + Arrays.hashCode(m_endRow);
+ result = 31 * result + (m_regionLocation != null ? m_regionLocation.hashCode() : 0);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
new file mode 100644
index 0000000..1da3a52
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
@@ -0,0 +1,26 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
+Input/OutputFormats, a table indexing MapReduce job, and utility methods.
+
+<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
+in the HBase Reference Guide for mapreduce over hbase documentation.
+*/
+package org.apache.hadoop.hbase.mapred;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
new file mode 100644
index 0000000..078033e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
@@ -0,0 +1,333 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+
+
+/**
+ * A job with a a map and reduce phase to count cells in a table.
+ * The counter lists the following stats for a given table:
+ * <pre>
+ * 1. Total number of rows in the table
+ * 2. Total number of CFs across all rows
+ * 3. Total qualifiers across all rows
+ * 4. Total occurrence of each CF
+ * 5. Total occurrence of each qualifier
+ * 6. Total number of versions of each qualifier.
+ * </pre>
+ *
+ * The cellcounter can take optional parameters to use a user
+ * supplied row/family/qualifier string to use in the report and
+ * second a regex based or prefix based row filter to restrict the
+ * count operation to a limited subset of rows from the table or a
+ * start time and/or end time to limit the count to a time range.
+ */
+@InterfaceAudience.Public
+public class CellCounter extends Configured implements Tool {
+ private static final Log LOG =
+ LogFactory.getLog(CellCounter.class.getName());
+
+
+ /**
+ * Name of this 'program'.
+ */
+ static final String NAME = "CellCounter";
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ /**
+ * Mapper that runs the count.
+ */
+ static class CellCounterMapper
+ extends TableMapper<Text, IntWritable> {
+ /**
+ * Counter enumeration to count the actual rows.
+ */
+ public static enum Counters {
+ ROWS,
+ CELLS
+ }
+
+ private Configuration conf;
+ private String separator;
+
+ // state of current row, family, column needs to persist across map() invocations
+ // in order to properly handle scanner batching, where a single qualifier may have too
+ // many versions for a single map() call
+ private byte[] lastRow;
+ private String currentRowKey;
+ byte[] currentFamily = null;
+ String currentFamilyName = null;
+ byte[] currentQualifier = null;
+ // family + qualifier
+ String currentQualifierName = null;
+ // rowkey + family + qualifier
+ String currentRowQualifierName = null;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ conf = context.getConfiguration();
+ separator = conf.get("ReportSeparator",":");
+ }
+
+ /**
+ * Maps the data.
+ *
+ * @param row The current table row key.
+ * @param values The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
+ * org.apache.hadoop.mapreduce.Mapper.Context)
+ */
+
+ @Override
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
+ justification="Findbugs is blind to the Precondition null check")
+ public void map(ImmutableBytesWritable row, Result values,
+ Context context)
+ throws IOException {
+ Preconditions.checkState(values != null,
+ "values passed to the map is null");
+
+ try {
+ byte[] currentRow = values.getRow();
+ if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {
+ lastRow = currentRow;
+ currentRowKey = Bytes.toStringBinary(currentRow);
+ currentFamily = null;
+ currentQualifier = null;
+ context.getCounter(Counters.ROWS).increment(1);
+ context.write(new Text("Total ROWS"), new IntWritable(1));
+ }
+ if (!values.isEmpty()) {
+ int cellCount = 0;
+ for (Cell value : values.listCells()) {
+ cellCount++;
+ if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {
+ currentFamily = CellUtil.cloneFamily(value);
+ currentFamilyName = Bytes.toStringBinary(currentFamily);
+ currentQualifier = null;
+ context.getCounter("CF", currentFamilyName).increment(1);
+ if (1 == context.getCounter("CF", currentFamilyName).getValue()) {
+ context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
+ context.write(new Text(currentFamily), new IntWritable(1));
+ }
+ }
+ if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {
+ currentQualifier = CellUtil.cloneQualifier(value);
+ currentQualifierName = currentFamilyName + separator +
+ Bytes.toStringBinary(currentQualifier);
+ currentRowQualifierName = currentRowKey + separator + currentQualifierName;
+
+ context.write(new Text("Total Qualifiers across all Rows"),
+ new IntWritable(1));
+ context.write(new Text(currentQualifierName), new IntWritable(1));
+ }
+ // Increment versions
+ context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));
+ }
+ context.getCounter(Counters.CELLS).increment(cellCount);
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
+ Key, IntWritable> {
+
+ private IntWritable result = new IntWritable();
+ public void reduce(Key key, Iterable<IntWritable> values,
+ Context context)
+ throws IOException, InterruptedException {
+ int sum = 0;
+ for (IntWritable val : values) {
+ sum += val.get();
+ }
+ result.set(sum);
+ context.write(key, result);
+ }
+ }
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ String tableName = args[0];
+ Path outputDir = new Path(args[1]);
+ String reportSeparatorString = (args.length > 2) ? args[2]: ":";
+ conf.set("ReportSeparator", reportSeparatorString);
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(CellCounter.class);
+ Scan scan = getConfiguredScanForJob(conf, args);
+ TableMapReduceUtil.initTableMapperJob(tableName, scan,
+ CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
+ job.setNumReduceTasks(1);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(IntWritable.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(IntWritable.class);
+ FileOutputFormat.setOutputPath(job, outputDir);
+ job.setReducerClass(IntSumReducer.class);
+ return job;
+ }
+
+ private static Scan getConfiguredScanForJob(Configuration conf, String[] args)
+ throws IOException {
+ // create scan with any properties set from TableInputFormat
+ Scan s = TableInputFormat.createScanFromConfiguration(conf);
+ // Set Scan Versions
+ if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {
+ // default to all versions unless explicitly set
+ s.setMaxVersions(Integer.MAX_VALUE);
+ }
+ s.setCacheBlocks(false);
+ // Set RowFilter or Prefix Filter if applicable.
+ Filter rowFilter = getRowFilter(args);
+ if (rowFilter!= null) {
+ LOG.info("Setting Row Filter for counter.");
+ s.setFilter(rowFilter);
+ }
+ // Set TimeRange if defined
+ long timeRange[] = getTimeRange(args);
+ if (timeRange != null) {
+ LOG.info("Setting TimeRange for counter.");
+ s.setTimeRange(timeRange[0], timeRange[1]);
+ }
+ return s;
+ }
+
+
+ private static Filter getRowFilter(String[] args) {
+ Filter rowFilter = null;
+ String filterCriteria = (args.length > 3) ? args[3]: null;
+ if (filterCriteria == null) return null;
+ if (filterCriteria.startsWith("^")) {
+ String regexPattern = filterCriteria.substring(1, filterCriteria.length());
+ rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
+ } else {
+ rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
+ }
+ return rowFilter;
+ }
+
+ private static long[] getTimeRange(String[] args) throws IOException {
+ final String startTimeArgKey = "--starttime=";
+ final String endTimeArgKey = "--endtime=";
+ long startTime = 0L;
+ long endTime = 0L;
+
+ for (int i = 1; i < args.length; i++) {
+ System.out.println("i:" + i + "arg[i]" + args[i]);
+ if (args[i].startsWith(startTimeArgKey)) {
+ startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
+ }
+ if (args[i].startsWith(endTimeArgKey)) {
+ endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
+ }
+ }
+
+ if (startTime == 0 && endTime == 0)
+ return null;
+
+ endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;
+ return new long [] {startTime, endTime};
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ System.err.println("ERROR: Wrong number of parameters: " + args.length);
+ System.err.println("Usage: CellCounter ");
+ System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
+ "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
+ System.err.println(" Note: -D properties will be applied to the conf used. ");
+ System.err.println(" Additionally, all of the SCAN properties from TableInputFormat");
+ System.err.println(" can be specified to get fine grained control on what is counted..");
+ System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
+ System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
+ System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
+ System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
+ System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
+ System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
+ System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
+ System.err.println(" -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
+ System.err.println(" -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
+ System.err.println(" -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
+ System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
+ "string : used to separate the rowId/column family name and qualifier name.");
+ System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
+ "operation to a limited subset of rows from the table based on regex or prefix pattern.");
+ return -1;
+ }
+ Job job = createSubmittableJob(getConf(), args);
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ /**
+ * Main entry point.
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);
+ System.exit(errCode);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
new file mode 100644
index 0000000..1d4d37b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * Facade to create Cells for HFileOutputFormat. The created Cells are of <code>Put</code> type.
+ */
+@InterfaceAudience.Public
+public class CellCreator {
+
+ public static final String VISIBILITY_EXP_RESOLVER_CLASS =
+ "hbase.mapreduce.visibility.expression.resolver.class";
+
+ private VisibilityExpressionResolver visExpResolver;
+
+ public CellCreator(Configuration conf) {
+ Class<? extends VisibilityExpressionResolver> clazz = conf.getClass(
+ VISIBILITY_EXP_RESOLVER_CLASS, DefaultVisibilityExpressionResolver.class,
+ VisibilityExpressionResolver.class);
+ this.visExpResolver = ReflectionUtils.newInstance(clazz, conf);
+ this.visExpResolver.init();
+ }
+
+ /**
+ * @param row row key
+ * @param roffset row offset
+ * @param rlength row length
+ * @param family family name
+ * @param foffset family offset
+ * @param flength family length
+ * @param qualifier column qualifier
+ * @param qoffset qualifier offset
+ * @param qlength qualifier length
+ * @param timestamp version timestamp
+ * @param value column value
+ * @param voffset value offset
+ * @param vlength value length
+ * @return created Cell
+ * @throws IOException
+ */
+ public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+ byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+ int vlength) throws IOException {
+ return create(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength,
+ timestamp, value, voffset, vlength, (List<Tag>)null);
+ }
+
+ /**
+ * @param row row key
+ * @param roffset row offset
+ * @param rlength row length
+ * @param family family name
+ * @param foffset family offset
+ * @param flength family length
+ * @param qualifier column qualifier
+ * @param qoffset qualifier offset
+ * @param qlength qualifier length
+ * @param timestamp version timestamp
+ * @param value column value
+ * @param voffset value offset
+ * @param vlength value length
+ * @param visExpression visibility expression to be associated with cell
+ * @return created Cell
+ * @throws IOException
+ */
+ @Deprecated
+ public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+ byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+ int vlength, String visExpression) throws IOException {
+ List<Tag> visTags = null;
+ if (visExpression != null) {
+ visTags = this.visExpResolver.createVisibilityExpTags(visExpression);
+ }
+ return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
+ qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, visTags);
+ }
+
+ /**
+ * @param row row key
+ * @param roffset row offset
+ * @param rlength row length
+ * @param family family name
+ * @param foffset family offset
+ * @param flength family length
+ * @param qualifier column qualifier
+ * @param qoffset qualifier offset
+ * @param qlength qualifier length
+ * @param timestamp version timestamp
+ * @param value column value
+ * @param voffset value offset
+ * @param vlength value length
+ * @param tags
+ * @return created Cell
+ * @throws IOException
+ */
+ public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
+ byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
+ int vlength, List<Tag> tags) throws IOException {
+ return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
+ qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, tags);
+ }
+
+ /**
+ * @return Visibility expression resolver
+ */
+ public VisibilityExpressionResolver getVisibilityExpressionResolver() {
+ return this.visExpResolver;
+ }
+}
[29/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
new file mode 100644
index 0000000..e669f14
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
@@ -0,0 +1,406 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableSnapshotScanner;
+import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.AbstractHBaseTool;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Stopwatch;
+
+/**
+ * A simple performance evaluation tool for single client and MR scans
+ * and snapshot scans.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class ScanPerformanceEvaluation extends AbstractHBaseTool {
+
+ private static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
+
+ private String type;
+ private String file;
+ private String tablename;
+ private String snapshotName;
+ private String restoreDir;
+ private String caching;
+
+ @Override
+ public void setConf(Configuration conf) {
+ super.setConf(conf);
+ Path rootDir;
+ try {
+ rootDir = FSUtils.getRootDir(conf);
+ rootDir.getFileSystem(conf);
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ @Override
+ protected void addOptions() {
+ this.addRequiredOptWithArg("t", "type", "the type of the test. One of the following: streaming|scan|snapshotscan|scanmapreduce|snapshotscanmapreduce");
+ this.addOptWithArg("f", "file", "the filename to read from");
+ this.addOptWithArg("tn", "table", "the tablename to read from");
+ this.addOptWithArg("sn", "snapshot", "the snapshot name to read from");
+ this.addOptWithArg("rs", "restoredir", "the directory to restore the snapshot");
+ this.addOptWithArg("ch", "caching", "scanner caching value");
+ }
+
+ @Override
+ protected void processOptions(CommandLine cmd) {
+ type = cmd.getOptionValue("type");
+ file = cmd.getOptionValue("file");
+ tablename = cmd.getOptionValue("table");
+ snapshotName = cmd.getOptionValue("snapshot");
+ restoreDir = cmd.getOptionValue("restoredir");
+ caching = cmd.getOptionValue("caching");
+ }
+
+ protected void testHdfsStreaming(Path filename) throws IOException {
+ byte[] buf = new byte[1024];
+ FileSystem fs = filename.getFileSystem(getConf());
+
+ // read the file from start to finish
+ Stopwatch fileOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch streamTimer = Stopwatch.createUnstarted();
+
+ fileOpenTimer.start();
+ FSDataInputStream in = fs.open(filename);
+ fileOpenTimer.stop();
+
+ long totalBytes = 0;
+ streamTimer.start();
+ while (true) {
+ int read = in.read(buf);
+ if (read < 0) {
+ break;
+ }
+ totalBytes += read;
+ }
+ streamTimer.stop();
+
+ double throughput = (double)totalBytes / streamTimer.elapsed(TimeUnit.SECONDS);
+
+ System.out.println("HDFS streaming: ");
+ System.out.println("total time to open: " +
+ fileOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to read: " + streamTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total bytes: " + totalBytes + " bytes ("
+ + StringUtils.humanReadableInt(totalBytes) + ")");
+ System.out.println("throghput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+ }
+
+ private Scan getScan() {
+ Scan scan = new Scan(); // default scan settings
+ scan.setCacheBlocks(false);
+ scan.setMaxVersions(1);
+ scan.setScanMetricsEnabled(true);
+ if (caching != null) {
+ scan.setCaching(Integer.parseInt(caching));
+ }
+
+ return scan;
+ }
+
+ public void testScan() throws IOException {
+ Stopwatch tableOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+ tableOpenTimer.start();
+ Connection connection = ConnectionFactory.createConnection(getConf());
+ Table table = connection.getTable(TableName.valueOf(tablename));
+ tableOpenTimer.stop();
+
+ Scan scan = getScan();
+ scanOpenTimer.start();
+ ResultScanner scanner = table.getScanner(scan);
+ scanOpenTimer.stop();
+
+ long numRows = 0;
+ long numCells = 0;
+ scanTimer.start();
+ while (true) {
+ Result result = scanner.next();
+ if (result == null) {
+ break;
+ }
+ numRows++;
+
+ numCells += result.rawCells().length;
+ }
+ scanTimer.stop();
+ scanner.close();
+ table.close();
+ connection.close();
+
+ ScanMetrics metrics = scan.getScanMetrics();
+ long totalBytes = metrics.countOfBytesInResults.get();
+ double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+ System.out.println("HBase scan: ");
+ System.out.println("total time to open table: " +
+ tableOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to open scanner: " +
+ scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to scan: " +
+ scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+ System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
+
+ System.out.println("total bytes: " + totalBytes + " bytes ("
+ + StringUtils.humanReadableInt(totalBytes) + ")");
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+ System.out.println("total rows : " + numRows);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+ System.out.println("total cells : " + numCells);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+ }
+
+
+ public void testSnapshotScan() throws IOException {
+ Stopwatch snapshotRestoreTimer = Stopwatch.createUnstarted();
+ Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+ Path restoreDir = new Path(this.restoreDir);
+
+ snapshotRestoreTimer.start();
+ restoreDir.getFileSystem(conf).delete(restoreDir, true);
+ snapshotRestoreTimer.stop();
+
+ Scan scan = getScan();
+ scanOpenTimer.start();
+ TableSnapshotScanner scanner = new TableSnapshotScanner(conf, restoreDir, snapshotName, scan);
+ scanOpenTimer.stop();
+
+ long numRows = 0;
+ long numCells = 0;
+ scanTimer.start();
+ while (true) {
+ Result result = scanner.next();
+ if (result == null) {
+ break;
+ }
+ numRows++;
+
+ numCells += result.rawCells().length;
+ }
+ scanTimer.stop();
+ scanner.close();
+
+ ScanMetrics metrics = scanner.getScanMetrics();
+ long totalBytes = metrics.countOfBytesInResults.get();
+ double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+ System.out.println("HBase scan snapshot: ");
+ System.out.println("total time to restore snapshot: " +
+ snapshotRestoreTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to open scanner: " +
+ scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to scan: " +
+ scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+ System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
+
+ System.out.println("total bytes: " + totalBytes + " bytes ("
+ + StringUtils.humanReadableInt(totalBytes) + ")");
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+ System.out.println("total rows : " + numRows);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+ System.out.println("total cells : " + numCells);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+
+ }
+
+ public static enum ScanCounter {
+ NUM_ROWS,
+ NUM_CELLS,
+ }
+
+ public static class MyMapper<KEYOUT, VALUEOUT> extends TableMapper<KEYOUT, VALUEOUT> {
+ @Override
+ protected void map(ImmutableBytesWritable key, Result value,
+ Context context) throws IOException,
+ InterruptedException {
+ context.getCounter(ScanCounter.NUM_ROWS).increment(1);
+ context.getCounter(ScanCounter.NUM_CELLS).increment(value.rawCells().length);
+ }
+ }
+
+ public void testScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
+ Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+ Scan scan = getScan();
+
+ String jobName = "testScanMapReduce";
+
+ Job job = new Job(conf);
+ job.setJobName(jobName);
+
+ job.setJarByClass(getClass());
+
+ TableMapReduceUtil.initTableMapperJob(
+ this.tablename,
+ scan,
+ MyMapper.class,
+ NullWritable.class,
+ NullWritable.class,
+ job
+ );
+
+ job.setNumReduceTasks(0);
+ job.setOutputKeyClass(NullWritable.class);
+ job.setOutputValueClass(NullWritable.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ scanTimer.start();
+ job.waitForCompletion(true);
+ scanTimer.stop();
+
+ Counters counters = job.getCounters();
+ long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
+ long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
+
+ long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
+ double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+ System.out.println("HBase scan mapreduce: ");
+ System.out.println("total time to open scanner: " +
+ scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+ System.out.println("total bytes: " + totalBytes + " bytes ("
+ + StringUtils.humanReadableInt(totalBytes) + ")");
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+ System.out.println("total rows : " + numRows);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+ System.out.println("total cells : " + numCells);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+ }
+
+ public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
+ Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
+ Stopwatch scanTimer = Stopwatch.createUnstarted();
+
+ Scan scan = getScan();
+
+ String jobName = "testSnapshotScanMapReduce";
+
+ Job job = new Job(conf);
+ job.setJobName(jobName);
+
+ job.setJarByClass(getClass());
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(
+ this.snapshotName,
+ scan,
+ MyMapper.class,
+ NullWritable.class,
+ NullWritable.class,
+ job,
+ true,
+ new Path(restoreDir)
+ );
+
+ job.setNumReduceTasks(0);
+ job.setOutputKeyClass(NullWritable.class);
+ job.setOutputValueClass(NullWritable.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ scanTimer.start();
+ job.waitForCompletion(true);
+ scanTimer.stop();
+
+ Counters counters = job.getCounters();
+ long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
+ long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
+
+ long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
+ double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
+ double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
+
+ System.out.println("HBase scan mapreduce: ");
+ System.out.println("total time to open scanner: " +
+ scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+ System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
+
+ System.out.println("total bytes: " + totalBytes + " bytes ("
+ + StringUtils.humanReadableInt(totalBytes) + ")");
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
+ System.out.println("total rows : " + numRows);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
+ System.out.println("total cells : " + numCells);
+ System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
+ }
+
+ @Override
+ protected int doWork() throws Exception {
+ if (type.equals("streaming")) {
+ testHdfsStreaming(new Path(file));
+ } else if (type.equals("scan")){
+ testScan();
+ } else if (type.equals("snapshotscan")) {
+ testSnapshotScan();
+ } else if (type.equals("scanmapreduce")) {
+ testScanMapReduce();
+ } else if (type.equals("snapshotscanmapreduce")) {
+ testSnapshotScanMapReduce();
+ }
+ return 0;
+ }
+
+ public static void main (String[] args) throws Exception {
+ int ret = ToolRunner.run(HBaseConfiguration.create(), new ScanPerformanceEvaluation(), args);
+ System.exit(ret);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
new file mode 100644
index 0000000..86a3d3f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import static org.junit.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.NoSuchElementException;
+import java.util.Queue;
+import java.util.Random;
+import java.util.LinkedList;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.PerformanceEvaluation.RandomReadTest;
+import org.apache.hadoop.hbase.PerformanceEvaluation.TestOptions;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.codehaus.jackson.JsonGenerationException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Snapshot;
+import com.codahale.metrics.UniformReservoir;
+
+@Category({MiscTests.class, SmallTests.class})
+public class TestPerformanceEvaluation {
+ private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
+
+ @Test
+ public void testSerialization()
+ throws JsonGenerationException, JsonMappingException, IOException {
+ PerformanceEvaluation.TestOptions options = new PerformanceEvaluation.TestOptions();
+ assertTrue(!options.isAutoFlush());
+ options.setAutoFlush(true);
+ ObjectMapper mapper = new ObjectMapper();
+ String optionsString = mapper.writeValueAsString(options);
+ PerformanceEvaluation.TestOptions optionsDeserialized =
+ mapper.readValue(optionsString, PerformanceEvaluation.TestOptions.class);
+ assertTrue(optionsDeserialized.isAutoFlush());
+ }
+
+ /**
+ * Exercise the mr spec writing. Simple assertions to make sure it is basically working.
+ * @throws IOException
+ */
+ @Ignore @Test
+ public void testWriteInputFile() throws IOException {
+ TestOptions opts = new PerformanceEvaluation.TestOptions();
+ final int clients = 10;
+ opts.setNumClientThreads(clients);
+ opts.setPerClientRunRows(10);
+ Path dir =
+ PerformanceEvaluation.writeInputFile(HTU.getConfiguration(), opts, HTU.getDataTestDir());
+ FileSystem fs = FileSystem.get(HTU.getConfiguration());
+ Path p = new Path(dir, PerformanceEvaluation.JOB_INPUT_FILENAME);
+ long len = fs.getFileStatus(p).getLen();
+ assertTrue(len > 0);
+ byte [] content = new byte[(int)len];
+ FSDataInputStream dis = fs.open(p);
+ try {
+ dis.readFully(content);
+ BufferedReader br =
+ new BufferedReader(new InputStreamReader(new ByteArrayInputStream(content)));
+ int count = 0;
+ while (br.readLine() != null) {
+ count++;
+ }
+ assertEquals(clients, count);
+ } finally {
+ dis.close();
+ }
+ }
+
+ @Test
+ public void testSizeCalculation() {
+ TestOptions opts = new PerformanceEvaluation.TestOptions();
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ int rows = opts.getPerClientRunRows();
+ // Default row count
+ final int defaultPerClientRunRows = 1024 * 1024;
+ assertEquals(defaultPerClientRunRows, rows);
+ // If size is 2G, then twice the row count.
+ opts.setSize(2.0f);
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
+ // If two clients, then they get half the rows each.
+ opts.setNumClientThreads(2);
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ assertEquals(defaultPerClientRunRows, opts.getPerClientRunRows());
+ // What if valueSize is 'random'? Then half of the valueSize so twice the rows.
+ opts.valueRandom = true;
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
+ }
+
+ @Test
+ public void testRandomReadCalculation() {
+ TestOptions opts = new PerformanceEvaluation.TestOptions();
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ int rows = opts.getPerClientRunRows();
+ // Default row count
+ final int defaultPerClientRunRows = 1024 * 1024;
+ assertEquals(defaultPerClientRunRows, rows);
+ // If size is 2G, then twice the row count.
+ opts.setSize(2.0f);
+ opts.setPerClientRunRows(1000);
+ opts.setCmdName(PerformanceEvaluation.RANDOM_READ);
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ assertEquals(1000, opts.getPerClientRunRows());
+ // If two clients, then they get half the rows each.
+ opts.setNumClientThreads(2);
+ opts = PerformanceEvaluation.calculateRowsAndSize(opts);
+ assertEquals(1000, opts.getPerClientRunRows());
+ Random random = new Random();
+ // assuming we will get one before this loop expires
+ boolean foundValue = false;
+ for (int i = 0; i < 10000000; i++) {
+ int randomRow = PerformanceEvaluation.generateRandomRow(random, opts.totalRows);
+ if (randomRow > 1000) {
+ foundValue = true;
+ break;
+ }
+ }
+ assertTrue("We need to get a value more than 1000", foundValue);
+ }
+
+ @Test
+ public void testZipfian()
+ throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException,
+ IllegalArgumentException, InvocationTargetException {
+ TestOptions opts = new PerformanceEvaluation.TestOptions();
+ opts.setValueZipf(true);
+ final int valueSize = 1024;
+ opts.setValueSize(valueSize);
+ RandomReadTest rrt = new RandomReadTest(null, opts, null);
+ Constructor<?> ctor =
+ Histogram.class.getDeclaredConstructor(com.codahale.metrics.Reservoir.class);
+ ctor.setAccessible(true);
+ Histogram histogram = (Histogram)ctor.newInstance(new UniformReservoir(1024 * 500));
+ for (int i = 0; i < 100; i++) {
+ histogram.update(rrt.getValueLength(null));
+ }
+ Snapshot snapshot = histogram.getSnapshot();
+ double stddev = snapshot.getStdDev();
+ assertTrue(stddev != 0 && stddev != 1.0);
+ assertTrue(snapshot.getStdDev() != 0);
+ double median = snapshot.getMedian();
+ assertTrue(median != 0 && median != 1 && median != valueSize);
+ }
+
+ @Test
+ public void testParseOptsWithThreads() {
+ Queue<String> opts = new LinkedList<>();
+ String cmdName = "sequentialWrite";
+ int threads = 1;
+ opts.offer(cmdName);
+ opts.offer(String.valueOf(threads));
+ PerformanceEvaluation.TestOptions options = PerformanceEvaluation.parseOpts(opts);
+ assertNotNull(options);
+ assertNotNull(options.getCmdName());
+ assertEquals(cmdName, options.getCmdName());
+ assertEquals(threads, options.getNumClientThreads());
+ }
+
+ @Test
+ public void testParseOptsWrongThreads() {
+ Queue<String> opts = new LinkedList<>();
+ String cmdName = "sequentialWrite";
+ opts.offer(cmdName);
+ opts.offer("qq");
+ try {
+ PerformanceEvaluation.parseOpts(opts);
+ } catch (IllegalArgumentException e) {
+ System.out.println(e.getMessage());
+ assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
+ assertTrue(e.getCause() instanceof NumberFormatException);
+ }
+ }
+
+ @Test
+ public void testParseOptsNoThreads() {
+ Queue<String> opts = new LinkedList<>();
+ String cmdName = "sequentialWrite";
+ try {
+ PerformanceEvaluation.parseOpts(opts);
+ } catch (IllegalArgumentException e) {
+ System.out.println(e.getMessage());
+ assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
+ assertTrue(e.getCause() instanceof NoSuchElementException);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
new file mode 100644
index 0000000..d085c21
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
@@ -0,0 +1,41 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.util.ProgramDriver;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestDriver {
+
+ @Test
+ public void testDriverMainMethod() throws Throwable {
+ ProgramDriver programDriverMock = mock(ProgramDriver.class);
+ Driver.setProgramDriver(programDriverMock);
+ Driver.main(new String[]{});
+ verify(programDriverMock).driver(Mockito.any(String[].class));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
new file mode 100644
index 0000000..7131cf9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
@@ -0,0 +1,181 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestGroupingTableMap {
+
+ @Test
+ @SuppressWarnings({ "deprecation", "unchecked" })
+ public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes()
+ throws Exception {
+ GroupingTableMap gTableMap = null;
+ try {
+ Result result = mock(Result.class);
+ Reporter reporter = mock(Reporter.class);
+ gTableMap = new GroupingTableMap();
+ Configuration cfg = new Configuration();
+ cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+ JobConf jobConf = new JobConf(cfg);
+ gTableMap.configure(jobConf);
+
+ byte[] row = {};
+ List<Cell> keyValues = ImmutableList.<Cell>of(
+ new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
+ new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")),
+ new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
+ when(result.listCells()).thenReturn(keyValues);
+ OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+ mock(OutputCollector.class);
+ gTableMap.map(null, result, outputCollectorMock, reporter);
+ verify(result).listCells();
+ verifyZeroInteractions(outputCollectorMock);
+ } finally {
+ if (gTableMap != null)
+ gTableMap.close();
+ }
+ }
+
+ @Test
+ @SuppressWarnings({ "deprecation", "unchecked" })
+ public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
+ GroupingTableMap gTableMap = null;
+ try {
+ Result result = mock(Result.class);
+ Reporter reporter = mock(Reporter.class);
+ gTableMap = new GroupingTableMap();
+ Configuration cfg = new Configuration();
+ cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+ JobConf jobConf = new JobConf(cfg);
+ gTableMap.configure(jobConf);
+
+ byte[] row = {};
+ List<Cell> keyValues = ImmutableList.<Cell>of(
+ new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
+ new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")),
+ new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
+ when(result.listCells()).thenReturn(keyValues);
+ OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+ mock(OutputCollector.class);
+ gTableMap.map(null, result, outputCollectorMock, reporter);
+ verify(result).listCells();
+ verify(outputCollectorMock, times(1))
+ .collect(any(ImmutableBytesWritable.class), any(Result.class));
+ verifyNoMoreInteractions(outputCollectorMock);
+ } finally {
+ if (gTableMap != null)
+ gTableMap.close();
+ }
+ }
+
+ @Test
+ @SuppressWarnings({ "deprecation" })
+ public void shouldCreateNewKey() throws Exception {
+ GroupingTableMap gTableMap = null;
+ try {
+ Result result = mock(Result.class);
+ Reporter reporter = mock(Reporter.class);
+ final byte[] bSeparator = Bytes.toBytes(" ");
+ gTableMap = new GroupingTableMap();
+ Configuration cfg = new Configuration();
+ cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
+ JobConf jobConf = new JobConf(cfg);
+ gTableMap.configure(jobConf);
+
+ final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
+ final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
+ byte[] row = {};
+ List<Cell> cells = ImmutableList.<Cell>of(
+ new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), firstPartKeyValue),
+ new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), secondPartKeyValue));
+ when(result.listCells()).thenReturn(cells);
+
+ final AtomicBoolean outputCollected = new AtomicBoolean();
+ OutputCollector<ImmutableBytesWritable, Result> outputCollector =
+ new OutputCollector<ImmutableBytesWritable, Result>() {
+ @Override
+ public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
+ assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
+ Bytes.concat(firstPartKeyValue, bSeparator,
+ secondPartKeyValue), arg.copyBytes());
+ outputCollected.set(true);
+ }
+ };
+
+ gTableMap.map(null, result, outputCollector, reporter);
+ verify(result).listCells();
+ Assert.assertTrue("Output not received", outputCollected.get());
+
+ final byte[] firstPartValue = Bytes.toBytes("238947928");
+ final byte[] secondPartValue = Bytes.toBytes("4678456942345");
+ byte[][] data = { firstPartValue, secondPartValue };
+ ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
+ assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
+ Bytes.concat(firstPartValue,
+ bSeparator, secondPartValue), byteWritable.get());
+ } finally {
+ if (gTableMap != null)
+ gTableMap.close();
+ }
+ }
+
+ @Test
+ @SuppressWarnings({ "deprecation" })
+ public void shouldReturnNullFromCreateGroupKey() throws Exception {
+ GroupingTableMap gTableMap = null;
+ try {
+ gTableMap = new GroupingTableMap();
+ assertNull(gTableMap.createGroupKey(null));
+ } finally {
+ if(gTableMap != null)
+ gTableMap.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
new file mode 100644
index 0000000..e222d0b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
@@ -0,0 +1,64 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestIdentityTableMap {
+
+ @Test
+ @SuppressWarnings({ "deprecation", "unchecked" })
+ public void shouldCollectPredefinedTimes() throws IOException {
+ int recordNumber = 999;
+ Result resultMock = mock(Result.class);
+ IdentityTableMap identityTableMap = null;
+ try {
+ Reporter reporterMock = mock(Reporter.class);
+ identityTableMap = new IdentityTableMap();
+ ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class);
+ OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
+ mock(OutputCollector.class);
+
+ for (int i = 0; i < recordNumber; i++)
+ identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock,
+ reporterMock);
+
+ verify(outputCollectorMock, times(recordNumber)).collect(
+ Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class));
+ } finally {
+ if (identityTableMap != null)
+ identityTableMap.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..665c547
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+@Category({ VerySlowMapReduceTests.class, LargeTests.class })
+public class TestMultiTableSnapshotInputFormat
+ extends org.apache.hadoop.hbase.mapreduce.TestMultiTableSnapshotInputFormat {
+
+ private static final Log LOG = LogFactory.getLog(TestMultiTableSnapshotInputFormat.class);
+
+ @Override
+ protected void runJob(String jobName, Configuration c, List<Scan> scans)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ JobConf job = new JobConf(TEST_UTIL.getConfiguration());
+
+ job.setJobName(jobName);
+ job.setMapperClass(Mapper.class);
+ job.setReducerClass(Reducer.class);
+
+ TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
+
+ TableMapReduceUtil.addDependencyJars(job);
+
+ job.setReducerClass(Reducer.class);
+ job.setNumReduceTasks(1); // one to get final "first" and "last" key
+ FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+ LOG.info("Started " + job.getJobName());
+
+ RunningJob runningJob = JobClient.runJob(job);
+ runningJob.waitForCompletion();
+ assertTrue(runningJob.isSuccessful());
+ LOG.info("After map/reduce completion - job " + jobName);
+ }
+
+ public static class Mapper extends TestMultiTableSnapshotInputFormat.ScanMapper
+ implements TableMap<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector,
+ Reporter reporter) throws IOException {
+ makeAssertions(key, value);
+ outputCollector.collect(key, key);
+ }
+
+ /**
+ * Closes this stream and releases any system resources associated
+ * with it. If the stream is already closed then invoking this
+ * method has no effect.
+ *
+ * @throws IOException if an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public void configure(JobConf jobConf) {
+
+ }
+ }
+
+ public static class Reducer extends TestMultiTableSnapshotInputFormat.ScanReducer implements
+ org.apache.hadoop.mapred.Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+ NullWritable, NullWritable> {
+
+ private JobConf jobConf;
+
+ @Override
+ public void reduce(ImmutableBytesWritable key, Iterator<ImmutableBytesWritable> values,
+ OutputCollector<NullWritable, NullWritable> outputCollector, Reporter reporter)
+ throws IOException {
+ makeAssertions(key, Lists.newArrayList(values));
+ }
+
+ /**
+ * Closes this stream and releases any system resources associated
+ * with it. If the stream is already closed then invoking this
+ * method has no effect.
+ *
+ * @throws IOException if an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ super.cleanup(this.jobConf);
+ }
+
+ @Override
+ public void configure(JobConf jobConf) {
+ this.jobConf = jobConf;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
new file mode 100644
index 0000000..4ebd8bf
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
@@ -0,0 +1,163 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.RowCounter.RowCounterMapper;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestRowCounter {
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void shouldPrintUsage() throws Exception {
+ String expectedOutput = "rowcounter <outputdir> <tablename> <column1> [<column2>...]";
+ String result = new OutputReader(System.out) {
+ @Override
+ void doRead() {
+ assertEquals(-1, RowCounter.printUsage());
+ }
+ }.read();
+
+ assertTrue(result.startsWith(expectedOutput));
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void shouldExitAndPrintUsageSinceParameterNumberLessThanThree()
+ throws Exception {
+ final String[] args = new String[] { "one", "two" };
+ String line = "ERROR: Wrong number of parameters: " + args.length;
+ String result = new OutputReader(System.err) {
+ @Override
+ void doRead() throws Exception {
+ assertEquals(-1, new RowCounter().run(args));
+ }
+ }.read();
+
+ assertTrue(result.startsWith(line));
+ }
+
+ @Test
+ @SuppressWarnings({ "deprecation", "unchecked" })
+ public void shouldRegInReportEveryIncomingRow() throws IOException {
+ int iterationNumber = 999;
+ RowCounter.RowCounterMapper mapper = new RowCounter.RowCounterMapper();
+ Reporter reporter = mock(Reporter.class);
+ for (int i = 0; i < iterationNumber; i++)
+ mapper.map(mock(ImmutableBytesWritable.class), mock(Result.class),
+ mock(OutputCollector.class), reporter);
+
+ Mockito.verify(reporter, times(iterationNumber)).incrCounter(
+ any(Enum.class), anyInt());
+ }
+
+ @Test
+ @SuppressWarnings({ "deprecation" })
+ public void shouldCreateAndRunSubmittableJob() throws Exception {
+ RowCounter rCounter = new RowCounter();
+ rCounter.setConf(HBaseConfiguration.create());
+ String[] args = new String[] { "\temp", "tableA", "column1", "column2",
+ "column3" };
+ JobConf jobConfig = rCounter.createSubmittableJob(args);
+
+ assertNotNull(jobConfig);
+ assertEquals(0, jobConfig.getNumReduceTasks());
+ assertEquals("rowcounter", jobConfig.getJobName());
+ assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
+ assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
+ assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
+ .join("column1", "column2", "column3"));
+ assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
+ }
+
+ enum Outs {
+ OUT, ERR
+ }
+
+ private static abstract class OutputReader {
+ private final PrintStream ps;
+ private PrintStream oldPrintStream;
+ private Outs outs;
+
+ protected OutputReader(PrintStream ps) {
+ this.ps = ps;
+ }
+
+ protected String read() throws Exception {
+ ByteArrayOutputStream outBytes = new ByteArrayOutputStream();
+ if (ps == System.out) {
+ oldPrintStream = System.out;
+ outs = Outs.OUT;
+ System.setOut(new PrintStream(outBytes));
+ } else if (ps == System.err) {
+ oldPrintStream = System.err;
+ outs = Outs.ERR;
+ System.setErr(new PrintStream(outBytes));
+ } else {
+ throw new IllegalStateException("OutputReader: unsupported PrintStream");
+ }
+
+ try {
+ doRead();
+ return new String(outBytes.toByteArray());
+ } finally {
+ switch (outs) {
+ case OUT: {
+ System.setOut(oldPrintStream);
+ break;
+ }
+ case ERR: {
+ System.setErr(oldPrintStream);
+ break;
+ }
+ default:
+ throw new IllegalStateException(
+ "OutputReader: unsupported PrintStream");
+ }
+ }
+ }
+
+ abstract void doRead() throws Exception;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
new file mode 100644
index 0000000..2655ac2
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
@@ -0,0 +1,116 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestSplitTable {
+ @Rule
+ public TestName name = new TestName();
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void testSplitTableCompareTo() {
+ TableSplit aTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+ Bytes.toBytes("aaa"), Bytes.toBytes("ddd"), "locationA");
+
+ TableSplit bTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+ Bytes.toBytes("iii"), Bytes.toBytes("kkk"), "locationA");
+
+ TableSplit cTableSplit = new TableSplit(Bytes.toBytes("tableA"),
+ Bytes.toBytes("lll"), Bytes.toBytes("zzz"), "locationA");
+
+ assertTrue(aTableSplit.compareTo(aTableSplit) == 0);
+ assertTrue(bTableSplit.compareTo(bTableSplit) == 0);
+ assertTrue(cTableSplit.compareTo(cTableSplit) == 0);
+
+ assertTrue(aTableSplit.compareTo(bTableSplit) < 0);
+ assertTrue(bTableSplit.compareTo(aTableSplit) > 0);
+
+ assertTrue(aTableSplit.compareTo(cTableSplit) < 0);
+ assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
+
+ assertTrue(bTableSplit.compareTo(cTableSplit) < 0);
+ assertTrue(cTableSplit.compareTo(bTableSplit) > 0);
+
+ assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void testSplitTableEquals() {
+ byte[] tableA = Bytes.toBytes("tableA");
+ byte[] aaa = Bytes.toBytes("aaa");
+ byte[] ddd = Bytes.toBytes("ddd");
+ String locationA = "locationA";
+
+ TableSplit tablesplit = new TableSplit(tableA, aaa, ddd, locationA);
+
+ TableSplit tableB = new TableSplit(Bytes.toBytes("tableB"), aaa, ddd, locationA);
+ assertNotEquals(tablesplit.hashCode(), tableB.hashCode());
+ assertNotEquals(tablesplit, tableB);
+
+ TableSplit startBbb = new TableSplit(tableA, Bytes.toBytes("bbb"), ddd, locationA);
+ assertNotEquals(tablesplit.hashCode(), startBbb.hashCode());
+ assertNotEquals(tablesplit, startBbb);
+
+ TableSplit endEee = new TableSplit(tableA, aaa, Bytes.toBytes("eee"), locationA);
+ assertNotEquals(tablesplit.hashCode(), endEee.hashCode());
+ assertNotEquals(tablesplit, endEee);
+
+ TableSplit locationB = new TableSplit(tableA, aaa, ddd, "locationB");
+ assertNotEquals(tablesplit.hashCode(), locationB.hashCode());
+ assertNotEquals(tablesplit, locationB);
+
+ TableSplit same = new TableSplit(tableA, aaa, ddd, locationA);
+ assertEquals(tablesplit.hashCode(), same.hashCode());
+ assertEquals(tablesplit, same);
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void testToString() {
+ TableSplit split =
+ new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
+ "location");
+ String str =
+ "HBase table split(table name: " + name.getMethodName() + ", start row: row-start, "
+ + "end row: row-end, region location: location)";
+ Assert.assertEquals(str, split.toString());
+
+ split = new TableSplit((TableName) null, null, null, null);
+ str =
+ "HBase table split(table name: null, start row: null, "
+ + "end row: null, region location: null)";
+ Assert.assertEquals(str, split.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
new file mode 100644
index 0000000..f39a7f5
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
@@ -0,0 +1,460 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * This tests the TableInputFormat and its recovery semantics
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTableInputFormat {
+
+ private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
+
+ private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ static final byte[] FAMILY = Bytes.toBytes("family");
+
+ private static final byte[][] columns = new byte[][] { FAMILY };
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws IOException {
+ LOG.info("before");
+ UTIL.ensureSomeRegionServersAvailable(1);
+ LOG.info("before done");
+ }
+
+ /**
+ * Setup a table with two rows and values.
+ *
+ * @param tableName
+ * @return
+ * @throws IOException
+ */
+ public static Table createTable(byte[] tableName) throws IOException {
+ return createTable(tableName, new byte[][] { FAMILY });
+ }
+
+ /**
+ * Setup a table with two rows and values per column family.
+ *
+ * @param tableName
+ * @return
+ * @throws IOException
+ */
+ public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
+ Table table = UTIL.createTable(TableName.valueOf(tableName), families);
+ Put p = new Put("aaa".getBytes());
+ for (byte[] family : families) {
+ p.addColumn(family, null, "value aaa".getBytes());
+ }
+ table.put(p);
+ p = new Put("bbb".getBytes());
+ for (byte[] family : families) {
+ p.addColumn(family, null, "value bbb".getBytes());
+ }
+ table.put(p);
+ return table;
+ }
+
+ /**
+ * Verify that the result and key have expected values.
+ *
+ * @param r
+ * @param key
+ * @param expectedKey
+ * @param expectedValue
+ * @return
+ */
+ static boolean checkResult(Result r, ImmutableBytesWritable key,
+ byte[] expectedKey, byte[] expectedValue) {
+ assertEquals(0, key.compareTo(expectedKey));
+ Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
+ byte[] value = vals.values().iterator().next();
+ assertTrue(Arrays.equals(value, expectedValue));
+ return true; // if succeed
+ }
+
+ /**
+ * Create table data and run tests on specified htable using the
+ * o.a.h.hbase.mapred API.
+ *
+ * @param table
+ * @throws IOException
+ */
+ static void runTestMapred(Table table) throws IOException {
+ org.apache.hadoop.hbase.mapred.TableRecordReader trr =
+ new org.apache.hadoop.hbase.mapred.TableRecordReader();
+ trr.setStartRow("aaa".getBytes());
+ trr.setEndRow("zzz".getBytes());
+ trr.setHTable(table);
+ trr.setInputColumns(columns);
+
+ trr.init();
+ Result r = new Result();
+ ImmutableBytesWritable key = new ImmutableBytesWritable();
+
+ boolean more = trr.next(key, r);
+ assertTrue(more);
+ checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
+
+ more = trr.next(key, r);
+ assertTrue(more);
+ checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
+
+ // no more data
+ more = trr.next(key, r);
+ assertFalse(more);
+ }
+
+ /**
+ * Create a table that IOE's on first scanner next call
+ *
+ * @throws IOException
+ */
+ static Table createIOEScannerTable(byte[] name, final int failCnt)
+ throws IOException {
+ // build up a mock scanner stuff to fail the first time
+ Answer<ResultScanner> a = new Answer<ResultScanner>() {
+ int cnt = 0;
+
+ @Override
+ public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+ // first invocation return the busted mock scanner
+ if (cnt++ < failCnt) {
+ // create mock ResultScanner that always fails.
+ Scan scan = mock(Scan.class);
+ doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+ ResultScanner scanner = mock(ResultScanner.class);
+ // simulate TimeoutException / IOException
+ doThrow(new IOException("Injected exception")).when(scanner).next();
+ return scanner;
+ }
+
+ // otherwise return the real scanner.
+ return (ResultScanner) invocation.callRealMethod();
+ }
+ };
+
+ Table htable = spy(createTable(name));
+ doAnswer(a).when(htable).getScanner((Scan) anyObject());
+ return htable;
+ }
+
+ /**
+ * Create a table that throws a DoNoRetryIOException on first scanner next
+ * call
+ *
+ * @throws IOException
+ */
+ static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
+ throws IOException {
+ // build up a mock scanner stuff to fail the first time
+ Answer<ResultScanner> a = new Answer<ResultScanner>() {
+ int cnt = 0;
+
+ @Override
+ public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+ // first invocation return the busted mock scanner
+ if (cnt++ < failCnt) {
+ // create mock ResultScanner that always fails.
+ Scan scan = mock(Scan.class);
+ doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+ ResultScanner scanner = mock(ResultScanner.class);
+
+ invocation.callRealMethod(); // simulate NotServingRegionException
+ doThrow(
+ new NotServingRegionException("Injected simulated TimeoutException"))
+ .when(scanner).next();
+ return scanner;
+ }
+
+ // otherwise return the real scanner.
+ return (ResultScanner) invocation.callRealMethod();
+ }
+ };
+
+ Table htable = spy(createTable(name));
+ doAnswer(a).when(htable).getScanner((Scan) anyObject());
+ return htable;
+ }
+
+ /**
+ * Run test assuming no errors using mapred api.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testTableRecordReader() throws IOException {
+ Table table = createTable("table1".getBytes());
+ runTestMapred(table);
+ }
+
+ /**
+ * Run test assuming Scanner IOException failure using mapred api,
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testTableRecordReaderScannerFail() throws IOException {
+ Table htable = createIOEScannerTable("table2".getBytes(), 1);
+ runTestMapred(htable);
+ }
+
+ /**
+ * Run test assuming Scanner IOException failure using mapred api,
+ *
+ * @throws IOException
+ */
+ @Test(expected = IOException.class)
+ public void testTableRecordReaderScannerFailTwice() throws IOException {
+ Table htable = createIOEScannerTable("table3".getBytes(), 2);
+ runTestMapred(htable);
+ }
+
+ /**
+ * Run test assuming NotServingRegionException using mapred api.
+ *
+ * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+ */
+ @Test
+ public void testTableRecordReaderScannerTimeout() throws IOException {
+ Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
+ runTestMapred(htable);
+ }
+
+ /**
+ * Run test assuming NotServingRegionException using mapred api.
+ *
+ * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+ */
+ @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
+ public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
+ Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
+ runTestMapred(htable);
+ }
+
+ /**
+ * Verify the example we present in javadocs on TableInputFormatBase
+ */
+ @Test
+ public void testExtensionOfTableInputFormatBase() throws IOException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase");
+ final Table table = createTable(Bytes.toBytes("exampleTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleTIF.class);
+ }
+
+ @Test
+ public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
+ + "as it was given in 0.98.");
+ final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleDeprecatedTIF.class);
+ }
+
+ @Test
+ public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
+ + "using JobConfigurable.");
+ final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleJobConfigurableTIF.class);
+ }
+
+ void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
+ Configuration conf = UTIL.getConfiguration();
+ final JobConf job = new JobConf(conf);
+ job.setInputFormat(clazz);
+ job.setOutputFormat(NullOutputFormat.class);
+ job.setMapperClass(ExampleVerifier.class);
+ job.setNumReduceTasks(0);
+ LOG.debug("submitting job.");
+ final RunningJob run = JobClient.runJob(job);
+ assertTrue("job failed!", run.isSuccessful());
+ assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
+ assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
+ assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
+ assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
+ assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
+ assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
+ }
+
+ public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
+
+ @Override
+ public void configure(JobConf conf) {
+ }
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<NullWritable,NullWritable> output,
+ Reporter reporter) throws IOException {
+ for (Cell cell : value.listCells()) {
+ reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
+ Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
+ .increment(1l);
+ reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
+ Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
+ .increment(1l);
+ reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
+ Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
+ .increment(1l);
+ }
+ }
+
+ @Override
+ public void close() {
+ }
+
+ }
+
+ public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
+
+ @Override
+ public void configure(JobConf job) {
+ try {
+ Connection connection = ConnectionFactory.createConnection(job);
+ Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
+ // mandatory
+ initializeTable(connection, exampleTable.getName());
+ byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ Bytes.toBytes("columnB") };
+ // mandatory
+ setInputColumns(inputColumns);
+ Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ // optional
+ setRowFilter(exampleFilter);
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to configure for job.", exception);
+ }
+ }
+
+ }
+
+ public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
+
+ @Override
+ public void configure(JobConf job) {
+ try {
+ initialize(job);
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to initialize.", exception);
+ }
+ }
+
+ @Override
+ protected void initialize(JobConf job) throws IOException {
+ initialize(job, "exampleJobConfigurableTable");
+ }
+ }
+
+
+ public static class ExampleTIF extends TableInputFormatBase {
+
+ @Override
+ protected void initialize(JobConf job) throws IOException {
+ initialize(job, "exampleTable");
+ }
+
+ protected void initialize(JobConf job, String table) throws IOException {
+ Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ TableName tableName = TableName.valueOf(table);
+ // mandatory
+ initializeTable(connection, tableName);
+ byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ Bytes.toBytes("columnB") };
+ // mandatory
+ setInputColumns(inputColumns);
+ Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ // optional
+ setRowFilter(exampleFilter);
+ }
+
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
new file mode 100644
index 0000000..3f905cf
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
@@ -0,0 +1,103 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TestTableMapReduceBase;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+@SuppressWarnings("deprecation")
+public class TestTableMapReduce extends TestTableMapReduceBase {
+ private static final Log LOG =
+ LogFactory.getLog(TestTableMapReduce.class.getName());
+
+ protected Log getLog() { return LOG; }
+
+ /**
+ * Pass the given key and processed record reduce
+ */
+ static class ProcessContentsMapper extends MapReduceBase implements
+ TableMap<ImmutableBytesWritable, Put> {
+
+ /**
+ * Pass the key, and reversed value to reduce
+ */
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable, Put> output,
+ Reporter reporter)
+ throws IOException {
+ output.collect(key, TestTableMapReduceBase.map(key, value));
+ }
+ }
+
+ @Override
+ protected void runTestOnTable(Table table) throws IOException {
+ JobConf jobConf = null;
+ try {
+ LOG.info("Before map/reduce startup");
+ jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
+ jobConf.setJobName("process column contents");
+ jobConf.setNumReduceTasks(1);
+ TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(),
+ Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
+ ImmutableBytesWritable.class, Put.class, jobConf);
+ TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(),
+ IdentityTableReduce.class, jobConf);
+
+ LOG.info("Started " + table.getName());
+ RunningJob job = JobClient.runJob(jobConf);
+ assertTrue(job.isSuccessful());
+ LOG.info("After map/reduce completion");
+
+ // verify map-reduce results
+ verify(table.getName());
+ } finally {
+ if (jobConf != null) {
+ FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+ }
+ }
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
new file mode 100644
index 0000000..ac2f20d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
@@ -0,0 +1,272 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableSet;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTableMapReduceUtil {
+
+ private static final Log LOG = LogFactory
+ .getLog(TestTableMapReduceUtil.class);
+
+ private static Table presidentsTable;
+ private static final String TABLE_NAME = "People";
+
+ private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info");
+ private static final byte[] COLUMN_QUALIFIER = Bytes.toBytes("name");
+
+ private static ImmutableSet<String> presidentsRowKeys = ImmutableSet.of(
+ "president1", "president2", "president3");
+ private static Iterator<String> presidentNames = ImmutableSet.of(
+ "John F. Kennedy", "George W. Bush", "Barack Obama").iterator();
+
+ private static ImmutableSet<String> actorsRowKeys = ImmutableSet.of("actor1",
+ "actor2");
+ private static Iterator<String> actorNames = ImmutableSet.of(
+ "Jack Nicholson", "Martin Freeman").iterator();
+
+ private static String PRESIDENT_PATTERN = "president";
+ private static String ACTOR_PATTERN = "actor";
+ private static ImmutableMap<String, ImmutableSet<String>> relation = ImmutableMap
+ .of(PRESIDENT_PATTERN, presidentsRowKeys, ACTOR_PATTERN, actorsRowKeys);
+
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ presidentsTable = createAndFillTable(TableName.valueOf(TABLE_NAME));
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws IOException {
+ LOG.info("before");
+ UTIL.ensureSomeRegionServersAvailable(1);
+ LOG.info("before done");
+ }
+
+ public static Table createAndFillTable(TableName tableName) throws IOException {
+ Table table = UTIL.createTable(tableName, COLUMN_FAMILY);
+ createPutCommand(table);
+ return table;
+ }
+
+ private static void createPutCommand(Table table) throws IOException {
+ for (String president : presidentsRowKeys) {
+ if (presidentNames.hasNext()) {
+ Put p = new Put(Bytes.toBytes(president));
+ p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(presidentNames.next()));
+ table.put(p);
+ }
+ }
+
+ for (String actor : actorsRowKeys) {
+ if (actorNames.hasNext()) {
+ Put p = new Put(Bytes.toBytes(actor));
+ p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(actorNames.next()));
+ table.put(p);
+ }
+ }
+ }
+
+ /**
+ * Check what the given number of reduce tasks for the given job configuration
+ * does not exceed the number of regions for the given table.
+ */
+ @Test
+ public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
+ throws IOException {
+ Assert.assertNotNull(presidentsTable);
+ Configuration cfg = UTIL.getConfiguration();
+ JobConf jobConf = new JobConf(cfg);
+ TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
+ TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
+ TableMapReduceUtil.setScannerCaching(jobConf, 100);
+ assertEquals(1, jobConf.getNumReduceTasks());
+ assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));
+
+ jobConf.setNumReduceTasks(10);
+ TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
+ TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
+ assertEquals(1, jobConf.getNumReduceTasks());
+ }
+
+ @Test
+ public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
+ throws IOException {
+ Configuration cfg = UTIL.getConfiguration();
+ JobConf jobConf = new JobConf(cfg);
+ TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
+ TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
+ assertEquals(1, jobConf.getNumMapTasks());
+
+ jobConf.setNumMapTasks(10);
+ TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
+ TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
+ assertEquals(1, jobConf.getNumMapTasks());
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void shoudBeValidMapReduceEvaluation() throws Exception {
+ Configuration cfg = UTIL.getConfiguration();
+ JobConf jobConf = new JobConf(cfg);
+ try {
+ jobConf.setJobName("process row task");
+ jobConf.setNumReduceTasks(1);
+ TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
+ ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
+ jobConf);
+ TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
+ ClassificatorRowReduce.class, jobConf);
+ RunningJob job = JobClient.runJob(jobConf);
+ assertTrue(job.isSuccessful());
+ } finally {
+ if (jobConf != null)
+ FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+ }
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void shoudBeValidMapReduceWithPartitionerEvaluation()
+ throws IOException {
+ Configuration cfg = UTIL.getConfiguration();
+ JobConf jobConf = new JobConf(cfg);
+ try {
+ jobConf.setJobName("process row task");
+ jobConf.setNumReduceTasks(2);
+ TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
+ ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
+ jobConf);
+
+ TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
+ ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
+ RunningJob job = JobClient.runJob(jobConf);
+ assertTrue(job.isSuccessful());
+ } finally {
+ if (jobConf != null)
+ FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
+ }
+ }
+
+ @SuppressWarnings("deprecation")
+ static class ClassificatorRowReduce extends MapReduceBase implements
+ TableReduce<ImmutableBytesWritable, Put> {
+
+ @Override
+ public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
+ OutputCollector<ImmutableBytesWritable, Put> output, Reporter reporter)
+ throws IOException {
+ String strKey = Bytes.toString(key.get());
+ List<Put> result = new ArrayList<>();
+ while (values.hasNext())
+ result.add(values.next());
+
+ if (relation.keySet().contains(strKey)) {
+ Set<String> set = relation.get(strKey);
+ if (set != null) {
+ assertEquals(set.size(), result.size());
+ } else {
+ throwAccertionError("Test infrastructure error: set is null");
+ }
+ } else {
+ throwAccertionError("Test infrastructure error: key not found in map");
+ }
+ }
+
+ private void throwAccertionError(String errorMessage) throws AssertionError {
+ throw new AssertionError(errorMessage);
+ }
+ }
+
+ @SuppressWarnings("deprecation")
+ static class ClassificatorMapper extends MapReduceBase implements
+ TableMap<ImmutableBytesWritable, Put> {
+
+ @Override
+ public void map(ImmutableBytesWritable row, Result result,
+ OutputCollector<ImmutableBytesWritable, Put> outCollector,
+ Reporter reporter) throws IOException {
+ String rowKey = Bytes.toString(result.getRow());
+ final ImmutableBytesWritable pKey = new ImmutableBytesWritable(
+ Bytes.toBytes(PRESIDENT_PATTERN));
+ final ImmutableBytesWritable aKey = new ImmutableBytesWritable(
+ Bytes.toBytes(ACTOR_PATTERN));
+ ImmutableBytesWritable outKey = null;
+
+ if (rowKey.startsWith(PRESIDENT_PATTERN)) {
+ outKey = pKey;
+ } else if (rowKey.startsWith(ACTOR_PATTERN)) {
+ outKey = aKey;
+ } else {
+ throw new AssertionError("unexpected rowKey");
+ }
+
+ String name = Bytes.toString(result.getValue(COLUMN_FAMILY,
+ COLUMN_QUALIFIER));
+ outCollector.collect(outKey,
+ new Put(Bytes.toBytes("rowKey2"))
+ .addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(name)));
+ }
+ }
+}
[25/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
new file mode 100644
index 0000000..7b6e684
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
@@ -0,0 +1,571 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestImportTsv implements Configurable {
+
+ private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
+ protected static final String NAME = TestImportTsv.class.getSimpleName();
+ protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+ // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.
+ protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+ /**
+ * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+ */
+ protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+ private final String FAMILY = "FAM";
+ private TableName tn;
+ private Map<String, String> args;
+
+ @Rule
+ public ExpectedException exception = ExpectedException.none();
+
+ public Configuration getConf() {
+ return util.getConfiguration();
+ }
+
+ public void setConf(Configuration conf) {
+ throw new IllegalArgumentException("setConf not supported");
+ }
+
+ @BeforeClass
+ public static void provisionCluster() throws Exception {
+ util.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void releaseCluster() throws Exception {
+ util.shutdownMiniCluster();
+ }
+
+ @Before
+ public void setup() throws Exception {
+ tn = TableName.valueOf("test-" + UUID.randomUUID());
+ args = new HashMap<>();
+ // Prepare the arguments required for the test.
+ args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");
+ args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");
+ }
+
+ @Test
+ public void testMROnTable() throws Exception {
+ util.createTable(tn, FAMILY);
+ doMROnTableTest(null, 1);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testMROnTableWithTimestamp() throws Exception {
+ util.createTable(tn, FAMILY);
+ args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+ args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+ String data = "KEY,1234,VALUE1,VALUE2\n";
+
+ doMROnTableTest(data, 1);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testMROnTableWithCustomMapper()
+ throws Exception {
+ util.createTable(tn, FAMILY);
+ args.put(ImportTsv.MAPPER_CONF_KEY,
+ "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");
+
+ doMROnTableTest(null, 3);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testBulkOutputWithoutAnExistingTable() throws Exception {
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+
+ doMROnTableTest(null, 3);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testBulkOutputWithAnExistingTable() throws Exception {
+ util.createTable(tn, FAMILY);
+
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+
+ doMROnTableTest(null, 3);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {
+ util.createTable(tn, FAMILY);
+
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+ args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");
+ doMROnTableTest(null, 3);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+ String INPUT_FILE = "InputFile1.csv";
+ // Prepare the arguments required for the test.
+ String[] args =
+ new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY
+ + "=HBASE_ROW_KEY,FAM:A,FAM:B",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+ tn.getNameAsString(),
+ INPUT_FILE
+ };
+ assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+ new Configuration(util.getConfiguration()),
+ new ImportTsv() {
+ @Override
+ public int run(String[] args) throws Exception {
+ Job job = createSubmittableJob(getConf(), args);
+ assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));
+ assertTrue(job.getReducerClass().equals(TextSortReducer.class));
+ assertTrue(job.getMapOutputValueClass().equals(Text.class));
+ return 0;
+ }
+ }, args));
+ // Delete table created by createSubmittableJob.
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+ args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+ String data = "KEY\u001bVALUE4\u001bVALUE8\n";
+ doMROnTableTest(data, 4);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
+ String[] args = new String[] { tn.getNameAsString(), "/inputFile" };
+
+ Configuration conf = new Configuration(util.getConfiguration());
+ conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
+ conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
+ conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
+ exception.expect(TableNotFoundException.class);
+ assertEquals("running test job configuration failed.", 0,
+ ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {
+ @Override public int run(String[] args) throws Exception {
+ createSubmittableJob(getConf(), args);
+ return 0;
+ }
+ }, args));
+ }
+
+ @Test
+ public void testMRWithoutAnExistingTable() throws Exception {
+ String[] args =
+ new String[] { tn.getNameAsString(), "/inputFile" };
+
+ exception.expect(TableNotFoundException.class);
+ assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+ new Configuration(util.getConfiguration()),
+ new ImportTsv() {
+ @Override
+ public int run(String[] args) throws Exception {
+ createSubmittableJob(getConf(), args);
+ return 0;
+ }
+ }, args));
+ }
+
+ @Test
+ public void testJobConfigurationsWithDryMode() throws Exception {
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
+ String INPUT_FILE = "InputFile1.csv";
+ // Prepare the arguments required for the test.
+ String[] argsArray = new String[] {
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+ "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
+ tn.getNameAsString(),
+ INPUT_FILE };
+ assertEquals("running test job configuration failed.", 0, ToolRunner.run(
+ new Configuration(util.getConfiguration()),
+ new ImportTsv() {
+ @Override
+ public int run(String[] args) throws Exception {
+ Job job = createSubmittableJob(getConf(), args);
+ assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
+ return 0;
+ }
+ }, argsArray));
+ // Delete table created by createSubmittableJob.
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {
+ util.createTable(tn, FAMILY);
+ args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+ doMROnTableTest(null, 1);
+ // Dry mode should not delete an existing table. If it's not present,
+ // this will throw TableNotFoundException.
+ util.deleteTable(tn);
+ }
+
+ /**
+ * If table is not present in non-bulk mode, dry run should fail just like
+ * normal mode.
+ */
+ @Test
+ public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {
+ args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+ exception.expect(TableNotFoundException.class);
+ doMROnTableTest(null, 1);
+ }
+
+ @Test public void testDryModeWithBulkOutputAndTableExists() throws Exception {
+ util.createTable(tn, FAMILY);
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+ args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+ doMROnTableTest(null, 1);
+ // Dry mode should not delete an existing table. If it's not present,
+ // this will throw TableNotFoundException.
+ util.deleteTable(tn);
+ }
+
+ /**
+ * If table is not present in bulk mode and create.table is not set to yes,
+ * import should fail with TableNotFoundException.
+ */
+ @Test
+ public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws
+ Exception {
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+ args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+ args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
+ exception.expect(TableNotFoundException.class);
+ doMROnTableTest(null, 1);
+ }
+
+ @Test
+ public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {
+ // Prepare the arguments required for the test.
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
+ args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
+ args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");
+ doMROnTableTest(null, 1);
+ // Verify temporary table was deleted.
+ exception.expect(TableNotFoundException.class);
+ util.deleteTable(tn);
+ }
+
+ /**
+ * If there are invalid data rows as inputs, then only those rows should be ignored.
+ */
+ @Test
+ public void testTsvImporterTextMapperWithInvalidData() throws Exception {
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+ args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+ args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+ // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS
+ String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";
+ doMROnTableTest(util, tn, FAMILY, data, args, 1, 4);
+ util.deleteTable(tn);
+ }
+
+ @Test
+ public void testSkipEmptyColumns() throws Exception {
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
+ args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
+ args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
+ args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
+ args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");
+ // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4
+ String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";
+ doMROnTableTest(util, tn, FAMILY, data, args, 1, 3);
+ util.deleteTable(tn);
+ }
+
+ private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {
+ return doMROnTableTest(util, tn, FAMILY, data, args, valueMultiplier,-1);
+ }
+
+ protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
+ String family, String data, Map<String, String> args) throws Exception {
+ return doMROnTableTest(util, table, family, data, args, 1,-1);
+ }
+
+ /**
+ * Run an ImportTsv job and perform basic validation on the results.
+ * Returns the ImportTsv <code>Tool</code> instance so that other tests can
+ * inspect it for further validation as necessary. This method is static to
+ * insure non-reliance on instance's util/conf facilities.
+ * @param args Any arguments to pass BEFORE inputFile path is appended.
+ * @return The Tool instance used to run the test.
+ */
+ protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
+ String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
+ throws Exception {
+ Configuration conf = new Configuration(util.getConfiguration());
+
+ // populate input file
+ FileSystem fs = FileSystem.get(conf);
+ Path inputPath = fs.makeQualified(
+ new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+ FSDataOutputStream op = fs.create(inputPath, true);
+ if (data == null) {
+ data = "KEY\u001bVALUE1\u001bVALUE2\n";
+ }
+ op.write(Bytes.toBytes(data));
+ op.close();
+ LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+ if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+ LOG.debug("Forcing combiner.");
+ conf.setInt("mapreduce.map.combine.minspills", 1);
+ }
+
+ // Build args array.
+ String[] argsArray = new String[args.size() + 2];
+ Iterator it = args.entrySet().iterator();
+ int i = 0;
+ while (it.hasNext()) {
+ Map.Entry pair = (Map.Entry) it.next();
+ argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
+ i++;
+ }
+ argsArray[i] = table.getNameAsString();
+ argsArray[i + 1] = inputPath.toString();
+
+ // run the import
+ Tool tool = new ImportTsv();
+ LOG.debug("Running ImportTsv with arguments: " + argsArray);
+ assertEquals(0, ToolRunner.run(conf, tool, argsArray));
+
+ // Perform basic validation. If the input args did not include
+ // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
+ // Otherwise, validate presence of hfiles.
+ boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
+ "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
+ if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
+ if (isDryRun) {
+ assertFalse(String.format("Dry run mode, %s should not have been created.",
+ ImportTsv.BULK_OUTPUT_CONF_KEY),
+ fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
+ } else {
+ validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
+ }
+ } else {
+ validateTable(conf, table, family, valueMultiplier, isDryRun);
+ }
+
+ if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+ LOG.debug("Deleting test subdirectory");
+ util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+ }
+ return tool;
+ }
+
+ /**
+ * Confirm ImportTsv via data in online table.
+ */
+ private static void validateTable(Configuration conf, TableName tableName,
+ String family, int valueMultiplier, boolean isDryRun) throws IOException {
+
+ LOG.debug("Validating table.");
+ Connection connection = ConnectionFactory.createConnection(conf);
+ Table table = connection.getTable(tableName);
+ boolean verified = false;
+ long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ Scan scan = new Scan();
+ // Scan entire family.
+ scan.addFamily(Bytes.toBytes(family));
+ ResultScanner resScanner = table.getScanner(scan);
+ int numRows = 0;
+ for (Result res : resScanner) {
+ numRows++;
+ assertEquals(2, res.size());
+ List<Cell> kvs = res.listCells();
+ assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+ assertTrue(CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+ // Only one result set is expected, so let it loop.
+ }
+ if (isDryRun) {
+ assertEquals(0, numRows);
+ } else {
+ assertEquals(1, numRows);
+ }
+ verified = true;
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ table.close();
+ connection.close();
+ assertTrue(verified);
+ }
+
+ /**
+ * Confirm ImportTsv via HFiles on fs.
+ */
+ private static void validateHFiles(FileSystem fs, String outputPath, String family,
+ int expectedKVCount) throws IOException {
+ // validate number and content of output columns
+ LOG.debug("Validating HFiles.");
+ Set<String> configFamilies = new HashSet<>();
+ configFamilies.add(family);
+ Set<String> foundFamilies = new HashSet<>();
+ int actualKVCount = 0;
+ for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
+ String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
+ String cf = elements[elements.length - 1];
+ foundFamilies.add(cf);
+ assertTrue(
+ String.format(
+ "HFile output contains a column family (%s) not present in input families (%s)",
+ cf, configFamilies),
+ configFamilies.contains(cf));
+ for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
+ assertTrue(
+ String.format("HFile %s appears to contain no data.", hfile.getPath()),
+ hfile.getLen() > 0);
+ // count the number of KVs from all the hfiles
+ if (expectedKVCount > -1) {
+ actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
+ }
+ }
+ }
+ assertTrue(String.format("HFile output does not contain the input family '%s'.", family),
+ foundFamilies.contains(family));
+ if (expectedKVCount > -1) {
+ assertTrue(String.format(
+ "KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
+ expectedKVCount), actualKVCount == expectedKVCount);
+ }
+ }
+
+ /**
+ * Method returns the total KVs in given hfile
+ * @param fs File System
+ * @param p HFile path
+ * @return KV count in the given hfile
+ * @throws IOException
+ */
+ private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
+ Configuration conf = util.getConfiguration();
+ HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
+ reader.loadFileInfo();
+ HFileScanner scanner = reader.getScanner(false, false);
+ scanner.seekTo();
+ int count = 0;
+ do {
+ count++;
+ } while (scanner.next());
+ reader.close();
+ return count;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
new file mode 100644
index 0000000..3c38102
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterables;
+
+/**
+ * Tests for {@link TsvParser}.
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestImportTsvParser {
+
+ private void assertBytesEquals(byte[] a, byte[] b) {
+ assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
+ }
+
+ private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
+ ArrayList<String> parsedCols = new ArrayList<>();
+ for (int i = 0; i < parsed.getColumnCount(); i++) {
+ parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i),
+ parsed.getColumnLength(i)));
+ }
+ if (!Iterables.elementsEqual(parsedCols, expected)) {
+ fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:"
+ + Joiner.on(",").join(parsedCols));
+ }
+ }
+
+ @Test
+ public void testTsvParserSpecParsing() {
+ TsvParser parser;
+
+ parser = new TsvParser("HBASE_ROW_KEY", "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertFalse(parser.hasTimestamp());
+
+ parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertFalse(parser.hasTimestamp());
+
+ parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
+ assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertFalse(parser.hasTimestamp());
+
+ parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+ assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertTrue(parser.hasTimestamp());
+ assertEquals(2, parser.getTimestampKeyColumnIndex());
+
+ parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY",
+ "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+ assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertTrue(parser.hasTimestamp());
+ assertEquals(2, parser.getTimestampKeyColumnIndex());
+ assertEquals(4, parser.getAttributesKeyColumnIndex());
+
+ parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY",
+ "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
+ assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
+ assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
+ assertEquals(4, parser.getRowKeyColumnIndex());
+ assertTrue(parser.hasTimestamp());
+ assertEquals(2, parser.getTimestampKeyColumnIndex());
+ assertEquals(0, parser.getAttributesKeyColumnIndex());
+ }
+
+ @Test
+ public void testTsvParser() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
+ assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
+ assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
+ assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
+ assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
+ assertNull(parser.getFamily(2));
+ assertNull(parser.getQualifier(2));
+ assertEquals(2, parser.getRowKeyColumnIndex());
+
+ assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex());
+
+ byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
+ ParsedLine parsed = parser.parse(line, line.length);
+ checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+ }
+
+ @Test
+ public void testTsvParserWithTimestamp() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
+ assertNull(parser.getFamily(0));
+ assertNull(parser.getQualifier(0));
+ assertNull(parser.getFamily(1));
+ assertNull(parser.getQualifier(1));
+ assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
+ assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertEquals(1, parser.getTimestampKeyColumnIndex());
+
+ byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
+ ParsedLine parsed = parser.parse(line, line.length);
+ assertEquals(1234l, parsed.getTimestamp(-1));
+ checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+ }
+
+ /**
+ * Test cases that throw BadTsvLineException
+ */
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+ byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
+ parser.parse(line, line.length);
+ }
+
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+ byte[] line = Bytes.toBytes("");
+ parser.parse(line, line.length);
+ }
+
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
+ byte[] line = Bytes.toBytes("key_only");
+ parser.parse(line, line.length);
+ }
+
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
+ byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
+ parser.parse(line, line.length);
+ }
+
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
+ assertEquals(1, parser.getTimestampKeyColumnIndex());
+ byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
+ ParsedLine parsed = parser.parse(line, line.length);
+ assertEquals(-1, parsed.getTimestamp(-1));
+ checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
+ }
+
+ @Test(expected = BadTsvLineException.class)
+ public void testTsvParserNoTimestampValue() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
+ assertEquals(2, parser.getTimestampKeyColumnIndex());
+ byte[] line = Bytes.toBytes("rowkey\tval_a");
+ parser.parse(line, line.length);
+ }
+
+ @Test
+ public void testTsvParserParseRowKey() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ byte[] line = Bytes.toBytes("rowkey\tval_a\t1234");
+ Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length);
+ assertEquals(0, rowKeyOffsets.getFirst().intValue());
+ assertEquals(6, rowKeyOffsets.getSecond().intValue());
+ try {
+ line = Bytes.toBytes("\t\tval_a\t1234");
+ parser.parseRowKey(line, line.length);
+ fail("Should get BadTsvLineException on empty rowkey.");
+ } catch (BadTsvLineException b) {
+
+ }
+ parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
+ assertEquals(1, parser.getRowKeyColumnIndex());
+ line = Bytes.toBytes("val_a\trowkey\t1234");
+ rowKeyOffsets = parser.parseRowKey(line, line.length);
+ assertEquals(6, rowKeyOffsets.getFirst().intValue());
+ assertEquals(6, rowKeyOffsets.getSecond().intValue());
+ try {
+ line = Bytes.toBytes("val_a");
+ rowKeyOffsets = parser.parseRowKey(line, line.length);
+ fail("Should get BadTsvLineException when number of columns less than rowkey position.");
+ } catch (BadTsvLineException b) {
+
+ }
+ parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
+ assertEquals(2, parser.getRowKeyColumnIndex());
+ line = Bytes.toBytes("val_a\t1234\trowkey");
+ rowKeyOffsets = parser.parseRowKey(line, line.length);
+ assertEquals(11, rowKeyOffsets.getFirst().intValue());
+ assertEquals(6, rowKeyOffsets.getSecond().intValue());
+ }
+
+ @Test
+ public void testTsvParseAttributesKey() throws BadTsvLineException {
+ TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t");
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value");
+ ParsedLine parse = parser.parse(line, line.length);
+ assertEquals(18, parse.getAttributeKeyOffset());
+ assertEquals(3, parser.getAttributesKeyColumnIndex());
+ String attributes[] = parse.getIndividualAttributes();
+ assertEquals(attributes[0], "key=>value");
+ try {
+ line = Bytes.toBytes("rowkey\tval_a\t1234");
+ parser.parse(line, line.length);
+ fail("Should get BadTsvLineException on empty rowkey.");
+ } catch (BadTsvLineException b) {
+
+ }
+ parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
+ assertEquals(2, parser.getRowKeyColumnIndex());
+ line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234");
+ parse = parser.parse(line, line.length);
+ assertEquals(0, parse.getAttributeKeyOffset());
+ assertEquals(0, parser.getAttributesKeyColumnIndex());
+ attributes = parse.getIndividualAttributes();
+ assertEquals(attributes[0], "key=>value");
+ try {
+ line = Bytes.toBytes("val_a");
+ ParsedLine parse2 = parser.parse(line, line.length);
+ fail("Should get BadTsvLineException when number of columns less than rowkey position.");
+ } catch (BadTsvLineException b) {
+
+ }
+ parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
+ assertEquals(3, parser.getRowKeyColumnIndex());
+ line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey");
+ parse = parser.parse(line, line.length);
+ assertEquals(1, parser.getAttributesKeyColumnIndex());
+ assertEquals(6, parse.getAttributeKeyOffset());
+ String[] attr = parse.getIndividualAttributes();
+ int i = 0;
+ for(String str : attr) {
+ assertEquals(("key"+i+"=>"+"value"+i), str );
+ i++;
+ }
+ }
+
+ @Test
+ public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException {
+ TsvParser parser = new TsvParser(
+ "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t");
+ assertEquals(0, parser.getRowKeyColumnIndex());
+ assertEquals(4, parser.getCellVisibilityColumnIndex());
+ byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET");
+ ParsedLine parse = parser.parse(line, line.length);
+ assertEquals(18, parse.getAttributeKeyOffset());
+ assertEquals(3, parser.getAttributesKeyColumnIndex());
+ String attributes[] = parse.getIndividualAttributes();
+ assertEquals(attributes[0], "key=>value");
+ assertEquals(29, parse.getCellVisibilityColumnOffset());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
new file mode 100644
index 0000000..8187b73
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+import java.text.MessageFormat;
+import java.util.Properties;
+import java.util.jar.JarInputStream;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * This file was forked from hadoop/common/branches/branch-2@1350012.
+ */
+@Category(SmallTests.class)
+public class TestJarFinder {
+
+ @Test
+ public void testJar() throws Exception {
+
+ //picking a class that is for sure in a JAR in the classpath
+ String jar = JarFinder.getJar(LogFactory.class);
+ Assert.assertTrue(new File(jar).exists());
+ }
+
+ private static void delete(File file) throws IOException {
+ if (file.getAbsolutePath().length() < 5) {
+ throw new IllegalArgumentException(
+ MessageFormat.format("Path [{0}] is too short, not deleting",
+ file.getAbsolutePath()));
+ }
+ if (file.exists()) {
+ if (file.isDirectory()) {
+ File[] children = file.listFiles();
+ if (children != null) {
+ for (File child : children) {
+ delete(child);
+ }
+ }
+ }
+ if (!file.delete()) {
+ throw new RuntimeException(
+ MessageFormat.format("Could not delete path [{0}]",
+ file.getAbsolutePath()));
+ }
+ }
+ }
+
+ @Test
+ public void testExpandedClasspath() throws Exception {
+ //picking a class that is for sure in a directory in the classpath
+ //in this case the JAR is created on the fly
+ String jar = JarFinder.getJar(TestJarFinder.class);
+ Assert.assertTrue(new File(jar).exists());
+ }
+
+ @Test
+ public void testExistingManifest() throws Exception {
+ File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
+ TestJarFinder.class.getName() + "-testExistingManifest");
+ delete(dir);
+ dir.mkdirs();
+
+ File metaInfDir = new File(dir, "META-INF");
+ metaInfDir.mkdirs();
+ File manifestFile = new File(metaInfDir, "MANIFEST.MF");
+ Manifest manifest = new Manifest();
+ OutputStream os = new FileOutputStream(manifestFile);
+ manifest.write(os);
+ os.close();
+
+ File propsFile = new File(dir, "props.properties");
+ Writer writer = new FileWriter(propsFile);
+ new Properties().store(writer, "");
+ writer.close();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ JarOutputStream zos = new JarOutputStream(baos);
+ JarFinder.jarDir(dir, "", zos);
+ JarInputStream jis =
+ new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
+ Assert.assertNotNull(jis.getManifest());
+ jis.close();
+ }
+
+ @Test
+ public void testNoManifest() throws Exception {
+ File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
+ TestJarFinder.class.getName() + "-testNoManifest");
+ delete(dir);
+ dir.mkdirs();
+ File propsFile = new File(dir, "props.properties");
+ Writer writer = new FileWriter(propsFile);
+ new Properties().store(writer, "");
+ writer.close();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ JarOutputStream zos = new JarOutputStream(baos);
+ JarFinder.jarDir(dir, "", zos);
+ JarInputStream jis =
+ new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
+ Assert.assertNotNull(jis.getManifest());
+ jis.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
new file mode 100644
index 0000000..529a448
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -0,0 +1,669 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableExistsException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ClientServiceCallable;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
+
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.RpcController;
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
+
+/**
+ * Test cases for the atomic load error handling of the bulk load functionality.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestLoadIncrementalHFilesSplitRecovery {
+ private static final Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
+
+ static HBaseTestingUtility util;
+ //used by secure subclass
+ static boolean useSecure = false;
+
+ final static int NUM_CFS = 10;
+ final static byte[] QUAL = Bytes.toBytes("qual");
+ final static int ROWCOUNT = 100;
+
+ private final static byte[][] families = new byte[NUM_CFS][];
+
+ @Rule
+ public TestName name = new TestName();
+
+ static {
+ for (int i = 0; i < NUM_CFS; i++) {
+ families[i] = Bytes.toBytes(family(i));
+ }
+ }
+
+ static byte[] rowkey(int i) {
+ return Bytes.toBytes(String.format("row_%08d", i));
+ }
+
+ static String family(int i) {
+ return String.format("family_%04d", i);
+ }
+
+ static byte[] value(int i) {
+ return Bytes.toBytes(String.format("%010d", i));
+ }
+
+ public static void buildHFiles(FileSystem fs, Path dir, int value)
+ throws IOException {
+ byte[] val = value(value);
+ for (int i = 0; i < NUM_CFS; i++) {
+ Path testIn = new Path(dir, family(i));
+
+ TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
+ Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
+ }
+ }
+
+ /**
+ * Creates a table with given table name and specified number of column
+ * families if the table does not already exist.
+ */
+ private void setupTable(final Connection connection, TableName table, int cfs)
+ throws IOException {
+ try {
+ LOG.info("Creating table " + table);
+ HTableDescriptor htd = new HTableDescriptor(table);
+ for (int i = 0; i < cfs; i++) {
+ htd.addFamily(new HColumnDescriptor(family(i)));
+ }
+ try (Admin admin = connection.getAdmin()) {
+ admin.createTable(htd);
+ }
+ } catch (TableExistsException tee) {
+ LOG.info("Table " + table + " already exists");
+ }
+ }
+
+ /**
+ * Creates a table with given table name,specified number of column families<br>
+ * and splitkeys if the table does not already exist.
+ * @param table
+ * @param cfs
+ * @param SPLIT_KEYS
+ */
+ private void setupTableWithSplitkeys(TableName table, int cfs, byte[][] SPLIT_KEYS)
+ throws IOException {
+ try {
+ LOG.info("Creating table " + table);
+ HTableDescriptor htd = new HTableDescriptor(table);
+ for (int i = 0; i < cfs; i++) {
+ htd.addFamily(new HColumnDescriptor(family(i)));
+ }
+
+ util.createTable(htd, SPLIT_KEYS);
+ } catch (TableExistsException tee) {
+ LOG.info("Table " + table + " already exists");
+ }
+ }
+
+ private Path buildBulkFiles(TableName table, int value) throws Exception {
+ Path dir = util.getDataTestDirOnTestFS(table.getNameAsString());
+ Path bulk1 = new Path(dir, table.getNameAsString() + value);
+ FileSystem fs = util.getTestFileSystem();
+ buildHFiles(fs, bulk1, value);
+ return bulk1;
+ }
+
+ /**
+ * Populate table with known values.
+ */
+ private void populateTable(final Connection connection, TableName table, int value)
+ throws Exception {
+ // create HFiles for different column families
+ LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
+ Path bulk1 = buildBulkFiles(table, value);
+ try (Table t = connection.getTable(table);
+ RegionLocator locator = connection.getRegionLocator(table);
+ Admin admin = connection.getAdmin()) {
+ lih.doBulkLoad(bulk1, admin, t, locator);
+ }
+ }
+
+ /**
+ * Split the known table in half. (this is hard coded for this test suite)
+ */
+ private void forceSplit(TableName table) {
+ try {
+ // need to call regions server to by synchronous but isn't visible.
+ HRegionServer hrs = util.getRSForFirstRegionInTable(table);
+
+ for (HRegionInfo hri :
+ ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
+ if (hri.getTable().equals(table)) {
+ util.getAdmin().splitRegionAsync(hri.getRegionName(), rowkey(ROWCOUNT / 2));
+ //ProtobufUtil.split(null, hrs.getRSRpcServices(), hri, rowkey(ROWCOUNT / 2));
+ }
+ }
+
+ // verify that split completed.
+ int regions;
+ do {
+ regions = 0;
+ for (HRegionInfo hri :
+ ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
+ if (hri.getTable().equals(table)) {
+ regions++;
+ }
+ }
+ if (regions != 2) {
+ LOG.info("Taking some time to complete split...");
+ Thread.sleep(250);
+ }
+ } while (regions != 2);
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ util = new HBaseTestingUtility();
+ util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
+ util.startMiniCluster(1);
+ }
+
+ @AfterClass
+ public static void teardownCluster() throws Exception {
+ util.shutdownMiniCluster();
+ }
+
+ /**
+ * Checks that all columns have the expected value and that there is the
+ * expected number of rows.
+ * @throws IOException
+ */
+ void assertExpectedTable(TableName table, int count, int value) throws IOException {
+ HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
+ assertEquals(htds.length, 1);
+ Table t = null;
+ try {
+ t = util.getConnection().getTable(table);
+ Scan s = new Scan();
+ ResultScanner sr = t.getScanner(s);
+ int i = 0;
+ for (Result r : sr) {
+ i++;
+ for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
+ for (byte[] val : nm.values()) {
+ assertTrue(Bytes.equals(val, value(value)));
+ }
+ }
+ }
+ assertEquals(count, i);
+ } catch (IOException e) {
+ fail("Failed due to exception");
+ } finally {
+ if (t != null) t.close();
+ }
+ }
+
+ /**
+ * Test that shows that exception thrown from the RS side will result in an
+ * exception on the LIHFile client.
+ */
+ @Test(expected=IOException.class, timeout=120000)
+ public void testBulkLoadPhaseFailure() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName());
+ final AtomicInteger attmptedCalls = new AtomicInteger();
+ final AtomicInteger failedCalls = new AtomicInteger();
+ util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
+ try (Connection connection = ConnectionFactory.createConnection(util
+ .getConfiguration())) {
+ setupTable(connection, table, 10);
+ LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+ util.getConfiguration()) {
+ @Override
+ protected List<LoadQueueItem> tryAtomicRegionLoad(
+ ClientServiceCallable<byte[]> serviceCallable, TableName tableName, final byte[] first,
+ Collection<LoadQueueItem> lqis) throws IOException {
+ int i = attmptedCalls.incrementAndGet();
+ if (i == 1) {
+ Connection errConn;
+ try {
+ errConn = getMockedConnection(util.getConfiguration());
+ serviceCallable = this.buildClientServiceCallable(errConn, table, first, lqis, true);
+ } catch (Exception e) {
+ LOG.fatal("mocking cruft, should never happen", e);
+ throw new RuntimeException("mocking cruft, should never happen");
+ }
+ failedCalls.incrementAndGet();
+ return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
+ }
+
+ return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
+ }
+ };
+ try {
+ // create HFiles for different column families
+ Path dir = buildBulkFiles(table, 1);
+ try (Table t = connection.getTable(table);
+ RegionLocator locator = connection.getRegionLocator(table);
+ Admin admin = connection.getAdmin()) {
+ lih.doBulkLoad(dir, admin, t, locator);
+ }
+ } finally {
+ util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
+ HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
+ }
+ fail("doBulkLoad should have thrown an exception");
+ }
+ }
+
+ /**
+ * Test that shows that exception thrown from the RS side will result in the
+ * expected number of retries set by ${@link HConstants#HBASE_CLIENT_RETRIES_NUMBER}
+ * when ${@link LoadIncrementalHFiles#RETRY_ON_IO_EXCEPTION} is set
+ */
+ @Test
+ public void testRetryOnIOException() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName());
+ final AtomicInteger calls = new AtomicInteger(1);
+ final Connection conn = ConnectionFactory.createConnection(util
+ .getConfiguration());
+ util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
+ util.getConfiguration().setBoolean(
+ LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, true);
+ final LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+ util.getConfiguration()) {
+ @Override
+ protected List<LoadQueueItem> tryAtomicRegionLoad(
+ ClientServiceCallable<byte[]> serverCallable, TableName tableName,
+ final byte[] first, Collection<LoadQueueItem> lqis)
+ throws IOException {
+ if (calls.getAndIncrement() < util.getConfiguration().getInt(
+ HConstants.HBASE_CLIENT_RETRIES_NUMBER,
+ HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER) - 1) {
+ ClientServiceCallable<byte[]> newServerCallable = new ClientServiceCallable<byte[]>(
+ conn, tableName, first, new RpcControllerFactory(
+ util.getConfiguration()).newController(), HConstants.PRIORITY_UNSET) {
+ @Override
+ public byte[] rpcCall() throws Exception {
+ throw new IOException("Error calling something on RegionServer");
+ }
+ };
+ return super.tryAtomicRegionLoad(newServerCallable, tableName, first, lqis);
+ } else {
+ return super.tryAtomicRegionLoad(serverCallable, tableName, first, lqis);
+ }
+ }
+ };
+ setupTable(conn, table, 10);
+ Path dir = buildBulkFiles(table, 1);
+ lih.doBulkLoad(dir, conn.getAdmin(), conn.getTable(table),
+ conn.getRegionLocator(table));
+ util.getConfiguration().setBoolean(
+ LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, false);
+
+ }
+
+ @SuppressWarnings("deprecation")
+ private ClusterConnection getMockedConnection(final Configuration conf)
+ throws IOException, org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException {
+ ClusterConnection c = Mockito.mock(ClusterConnection.class);
+ Mockito.when(c.getConfiguration()).thenReturn(conf);
+ Mockito.doNothing().when(c).close();
+ // Make it so we return a particular location when asked.
+ final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
+ ServerName.valueOf("example.org", 1234, 0));
+ Mockito.when(c.getRegionLocation((TableName) Mockito.any(),
+ (byte[]) Mockito.any(), Mockito.anyBoolean())).
+ thenReturn(loc);
+ Mockito.when(c.locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any())).
+ thenReturn(loc);
+ ClientProtos.ClientService.BlockingInterface hri =
+ Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
+ Mockito.when(hri.bulkLoadHFile((RpcController)Mockito.any(), (BulkLoadHFileRequest)Mockito.any())).
+ thenThrow(new ServiceException(new IOException("injecting bulk load error")));
+ Mockito.when(c.getClient(Mockito.any(ServerName.class))).
+ thenReturn(hri);
+ return c;
+ }
+
+ /**
+ * This test exercises the path where there is a split after initial
+ * validation but before the atomic bulk load call. We cannot use presplitting
+ * to test this path, so we actually inject a split just before the atomic
+ * region load.
+ */
+ @Test (timeout=120000)
+ public void testSplitWhileBulkLoadPhase() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName());
+ try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+ setupTable(connection, table, 10);
+ populateTable(connection, table,1);
+ assertExpectedTable(table, ROWCOUNT, 1);
+
+ // Now let's cause trouble. This will occur after checks and cause bulk
+ // files to fail when attempt to atomically import. This is recoverable.
+ final AtomicInteger attemptedCalls = new AtomicInteger();
+ LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(util.getConfiguration()) {
+ @Override
+ protected void bulkLoadPhase(final Table htable, final Connection conn,
+ ExecutorService pool, Deque<LoadQueueItem> queue,
+ final Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFile,
+ Map<LoadQueueItem, ByteBuffer> item2RegionMap)
+ throws IOException {
+ int i = attemptedCalls.incrementAndGet();
+ if (i == 1) {
+ // On first attempt force a split.
+ forceSplit(table);
+ }
+ super.bulkLoadPhase(htable, conn, pool, queue, regionGroups, copyFile, item2RegionMap);
+ }
+ };
+
+ // create HFiles for different column families
+ try (Table t = connection.getTable(table);
+ RegionLocator locator = connection.getRegionLocator(table);
+ Admin admin = connection.getAdmin()) {
+ Path bulk = buildBulkFiles(table, 2);
+ lih2.doBulkLoad(bulk, admin, t, locator);
+ }
+
+ // check that data was loaded
+ // The three expected attempts are 1) failure because need to split, 2)
+ // load of split top 3) load of split bottom
+ assertEquals(attemptedCalls.get(), 3);
+ assertExpectedTable(table, ROWCOUNT, 2);
+ }
+ }
+
+ /**
+ * This test splits a table and attempts to bulk load. The bulk import files
+ * should be split before atomically importing.
+ */
+ @Test (timeout=120000)
+ public void testGroupOrSplitPresplit() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName());
+ try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+ setupTable(connection, table, 10);
+ populateTable(connection, table, 1);
+ assertExpectedTable(connection, table, ROWCOUNT, 1);
+ forceSplit(table);
+
+ final AtomicInteger countedLqis= new AtomicInteger();
+ LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+ util.getConfiguration()) {
+ @Override
+ protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+ Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+ final LoadQueueItem item, final Table htable,
+ final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+ Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
+ startEndKeys);
+ if (lqis != null && lqis.getFirst() != null) {
+ countedLqis.addAndGet(lqis.getFirst().size());
+ }
+ return lqis;
+ }
+ };
+
+ // create HFiles for different column families
+ Path bulk = buildBulkFiles(table, 2);
+ try (Table t = connection.getTable(table);
+ RegionLocator locator = connection.getRegionLocator(table);
+ Admin admin = connection.getAdmin()) {
+ lih.doBulkLoad(bulk, admin, t, locator);
+ }
+ assertExpectedTable(connection, table, ROWCOUNT, 2);
+ assertEquals(20, countedLqis.get());
+ }
+ }
+
+ /**
+ * This test creates a table with many small regions. The bulk load files
+ * would be splitted multiple times before all of them can be loaded successfully.
+ */
+ @Test (timeout=120000)
+ public void testSplitTmpFileCleanUp() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName());
+ byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"),
+ Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"),
+ Bytes.toBytes("row_00000040"), Bytes.toBytes("row_00000050")};
+ try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+ setupTableWithSplitkeys(table, 10, SPLIT_KEYS);
+
+ LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
+
+ // create HFiles
+ Path bulk = buildBulkFiles(table, 2);
+ try (Table t = connection.getTable(table);
+ RegionLocator locator = connection.getRegionLocator(table);
+ Admin admin = connection.getAdmin()) {
+ lih.doBulkLoad(bulk, admin, t, locator);
+ }
+ // family path
+ Path tmpPath = new Path(bulk, family(0));
+ // TMP_DIR under family path
+ tmpPath = new Path(tmpPath, LoadIncrementalHFiles.TMP_DIR);
+ FileSystem fs = bulk.getFileSystem(util.getConfiguration());
+ // HFiles have been splitted, there is TMP_DIR
+ assertTrue(fs.exists(tmpPath));
+ // TMP_DIR should have been cleaned-up
+ assertNull(LoadIncrementalHFiles.TMP_DIR + " should be empty.",
+ FSUtils.listStatus(fs, tmpPath));
+ assertExpectedTable(connection, table, ROWCOUNT, 2);
+ }
+ }
+
+ /**
+ * This simulates an remote exception which should cause LIHF to exit with an
+ * exception.
+ */
+ @Test(expected = IOException.class, timeout=120000)
+ public void testGroupOrSplitFailure() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
+ setupTable(connection, tableName, 10);
+
+ LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
+ util.getConfiguration()) {
+ int i = 0;
+
+ @Override
+ protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+ Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+ final LoadQueueItem item, final Table table,
+ final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+ i++;
+
+ if (i == 5) {
+ throw new IOException("failure");
+ }
+ return super.groupOrSplit(regionGroups, item, table, startEndKeys);
+ }
+ };
+
+ // create HFiles for different column families
+ Path dir = buildBulkFiles(tableName,1);
+ try (Table t = connection.getTable(tableName);
+ RegionLocator locator = connection.getRegionLocator(tableName);
+ Admin admin = connection.getAdmin()) {
+ lih.doBulkLoad(dir, admin, t, locator);
+ }
+ }
+
+ fail("doBulkLoad should have thrown an exception");
+ }
+
+ @Test (timeout=120000)
+ public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000100") };
+ // Share connection. We were failing to find the table with our new reverse scan because it
+ // looks for first region, not any region -- that is how it works now. The below removes first
+ // region in test. Was reliant on the Connection caching having first region.
+ Connection connection = ConnectionFactory.createConnection(util.getConfiguration());
+ Table table = connection.getTable(tableName);
+
+ setupTableWithSplitkeys(tableName, 10, SPLIT_KEYS);
+ Path dir = buildBulkFiles(tableName, 2);
+
+ final AtomicInteger countedLqis = new AtomicInteger();
+ LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) {
+
+ @Override
+ protected Pair<List<LoadQueueItem>, String> groupOrSplit(
+ Multimap<ByteBuffer, LoadQueueItem> regionGroups,
+ final LoadQueueItem item, final Table htable,
+ final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
+ Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
+ startEndKeys);
+ if (lqis != null && lqis.getFirst() != null) {
+ countedLqis.addAndGet(lqis.getFirst().size());
+ }
+ return lqis;
+ }
+ };
+
+ // do bulkload when there is no region hole in hbase:meta.
+ try (Table t = connection.getTable(tableName);
+ RegionLocator locator = connection.getRegionLocator(tableName);
+ Admin admin = connection.getAdmin()) {
+ loader.doBulkLoad(dir, admin, t, locator);
+ } catch (Exception e) {
+ LOG.error("exeception=", e);
+ }
+ // check if all the data are loaded into the table.
+ this.assertExpectedTable(tableName, ROWCOUNT, 2);
+
+ dir = buildBulkFiles(tableName, 3);
+
+ // Mess it up by leaving a hole in the hbase:meta
+ List<HRegionInfo> regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
+ for (HRegionInfo regionInfo : regionInfos) {
+ if (Bytes.equals(regionInfo.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
+ MetaTableAccessor.deleteRegion(connection, regionInfo);
+ break;
+ }
+ }
+
+ try (Table t = connection.getTable(tableName);
+ RegionLocator locator = connection.getRegionLocator(tableName);
+ Admin admin = connection.getAdmin()) {
+ loader.doBulkLoad(dir, admin, t, locator);
+ } catch (Exception e) {
+ LOG.error("exception=", e);
+ assertTrue("IOException expected", e instanceof IOException);
+ }
+
+ table.close();
+
+ // Make sure at least the one region that still exists can be found.
+ regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
+ assertTrue(regionInfos.size() >= 1);
+
+ this.assertExpectedTable(connection, tableName, ROWCOUNT, 2);
+ connection.close();
+ }
+
+ /**
+ * Checks that all columns have the expected value and that there is the
+ * expected number of rows.
+ * @throws IOException
+ */
+ void assertExpectedTable(final Connection connection, TableName table, int count, int value)
+ throws IOException {
+ HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
+ assertEquals(htds.length, 1);
+ Table t = null;
+ try {
+ t = connection.getTable(table);
+ Scan s = new Scan();
+ ResultScanner sr = t.getScanner(s);
+ int i = 0;
+ for (Result r : sr) {
+ i++;
+ for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
+ for (byte[] val : nm.values()) {
+ assertTrue(Bytes.equals(val, value(value)));
+ }
+ }
+ }
+ assertEquals(count, i);
+ } catch (IOException e) {
+ fail("Failed due to exception");
+ } finally {
+ if (t != null) t.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
new file mode 100644
index 0000000..0c5207b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Tests various scan start and stop row scenarios. This is set in a scan and
+ * tested in a MapReduce job to see if that is handed over and done properly
+ * too.
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestMultiTableInputFormat extends MultiTableInputFormatTestBase {
+
+ @BeforeClass
+ public static void setupLogging() {
+ TEST_UTIL.enableDebug(MultiTableInputFormat.class);
+ }
+
+ @Override
+ protected void initJob(List<Scan> scans, Job job) throws IOException {
+ TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..530d9c5
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Function;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimaps;
+import edu.umd.cs.findbugs.annotations.Nullable;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+@Category({ VerySlowMapReduceTests.class, LargeTests.class })
+public class TestMultiTableSnapshotInputFormat extends MultiTableInputFormatTestBase {
+
+ protected Path restoreDir;
+
+ @BeforeClass
+ public static void setUpSnapshots() throws Exception {
+
+ TEST_UTIL.enableDebug(MultiTableSnapshotInputFormat.class);
+ TEST_UTIL.enableDebug(MultiTableSnapshotInputFormatImpl.class);
+
+ // take a snapshot of every table we have.
+ for (String tableName : TABLES) {
+ SnapshotTestingUtils
+ .createSnapshotAndValidate(TEST_UTIL.getAdmin(), TableName.valueOf(tableName),
+ ImmutableList.of(INPUT_FAMILY), null,
+ snapshotNameForTable(tableName), FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
+ TEST_UTIL.getTestFileSystem(), true);
+ }
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ this.restoreDir = TEST_UTIL.getRandomDir();
+ }
+
+ @Override
+ protected void initJob(List<Scan> scans, Job job) throws IOException {
+ TableMapReduceUtil
+ .initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), ScanMapper.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
+ }
+
+ protected Map<String, Collection<Scan>> getSnapshotScanMapping(final List<Scan> scans) {
+ return Multimaps.index(scans, new Function<Scan, String>() {
+ @Nullable
+ @Override
+ public String apply(Scan input) {
+ return snapshotNameForTable(
+ Bytes.toStringBinary(input.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME)));
+ }
+ }).asMap();
+ }
+
+ public static String snapshotNameForTable(String tableName) {
+ return tableName + "_snapshot";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..1c33848
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.verify;
+
+@Category({ SmallTests.class })
+public class TestMultiTableSnapshotInputFormatImpl {
+
+ private MultiTableSnapshotInputFormatImpl subject;
+ private Map<String, Collection<Scan>> snapshotScans;
+ private Path restoreDir;
+ private Configuration conf;
+ private Path rootDir;
+
+ @Before
+ public void setUp() throws Exception {
+ this.subject = Mockito.spy(new MultiTableSnapshotInputFormatImpl());
+
+ // mock out restoreSnapshot
+ // TODO: this is kind of meh; it'd be much nicer to just inject the RestoreSnapshotHelper
+ // dependency into the
+ // input format. However, we need a new RestoreSnapshotHelper per snapshot in the current
+ // design, and it *also*
+ // feels weird to introduce a RestoreSnapshotHelperFactory and inject that, which would
+ // probably be the more "pure"
+ // way of doing things. This is the lesser of two evils, perhaps?
+ doNothing().when(this.subject).
+ restoreSnapshot(any(Configuration.class), any(String.class), any(Path.class),
+ any(Path.class), any(FileSystem.class));
+
+ this.conf = new Configuration();
+ this.rootDir = new Path("file:///test-root-dir");
+ FSUtils.setRootDir(conf, rootDir);
+ this.snapshotScans = ImmutableMap.<String, Collection<Scan>>of("snapshot1",
+ ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2"))), "snapshot2",
+ ImmutableList.of(new Scan(Bytes.toBytes("3"), Bytes.toBytes("4")),
+ new Scan(Bytes.toBytes("5"), Bytes.toBytes("6"))));
+
+ this.restoreDir = new Path(FSUtils.getRootDir(conf), "restore-dir");
+
+ }
+
+ public void callSetInput() throws IOException {
+ subject.setInput(this.conf, snapshotScans, restoreDir);
+ }
+
+ public Map<String, Collection<ScanWithEquals>> toScanWithEquals(
+ Map<String, Collection<Scan>> snapshotScans) throws IOException {
+ Map<String, Collection<ScanWithEquals>> rtn = Maps.newHashMap();
+
+ for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
+ List<ScanWithEquals> scans = Lists.newArrayList();
+
+ for (Scan scan : entry.getValue()) {
+ scans.add(new ScanWithEquals(scan));
+ }
+ rtn.put(entry.getKey(), scans);
+ }
+
+ return rtn;
+ }
+
+ public static class ScanWithEquals {
+
+ private final String startRow;
+ private final String stopRow;
+
+ /**
+ * Creates a new instance of this class while copying all values.
+ *
+ * @param scan The scan instance to copy from.
+ * @throws java.io.IOException When copying the values fails.
+ */
+ public ScanWithEquals(Scan scan) throws IOException {
+ this.startRow = Bytes.toStringBinary(scan.getStartRow());
+ this.stopRow = Bytes.toStringBinary(scan.getStopRow());
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ScanWithEquals)) {
+ return false;
+ }
+ ScanWithEquals otherScan = (ScanWithEquals) obj;
+ return Objects.equals(this.startRow, otherScan.startRow) && Objects
+ .equals(this.stopRow, otherScan.stopRow);
+ }
+
+ @Override
+ public String toString() {
+ return org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects.
+ toStringHelper(this).add("startRow", startRow)
+ .add("stopRow", stopRow).toString();
+ }
+ }
+
+ @Test
+ public void testSetInputSetsSnapshotToScans() throws Exception {
+
+ callSetInput();
+
+ Map<String, Collection<Scan>> actual = subject.getSnapshotsToScans(conf);
+
+ // convert to scans we can use .equals on
+ Map<String, Collection<ScanWithEquals>> actualWithEquals = toScanWithEquals(actual);
+ Map<String, Collection<ScanWithEquals>> expectedWithEquals = toScanWithEquals(snapshotScans);
+
+ assertEquals(expectedWithEquals, actualWithEquals);
+ }
+
+ @Test
+ public void testSetInputPushesRestoreDirectories() throws Exception {
+ callSetInput();
+
+ Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
+
+ assertEquals(this.snapshotScans.keySet(), restoreDirs.keySet());
+ }
+
+ @Test
+ public void testSetInputCreatesRestoreDirectoriesUnderRootRestoreDir() throws Exception {
+ callSetInput();
+
+ Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
+
+ for (Path snapshotDir : restoreDirs.values()) {
+ assertEquals("Expected " + snapshotDir + " to be a child of " + restoreDir, restoreDir,
+ snapshotDir.getParent());
+ }
+ }
+
+ @Test
+ public void testSetInputRestoresSnapshots() throws Exception {
+ callSetInput();
+
+ Map<String, Path> snapshotDirs = subject.getSnapshotDirs(conf);
+
+ for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
+ verify(this.subject).restoreSnapshot(eq(this.conf), eq(entry.getKey()), eq(this.rootDir),
+ eq(entry.getValue()), any(FileSystem.class));
+ }
+ }
+}
[22/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
new file mode 100644
index 0000000..a9da98b
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Just shows a simple example of how the attributes can be extracted and added
+ * to the puts
+ */
+public class TsvImporterCustomTestMapperForOprAttr extends TsvImporterMapper {
+ @Override
+ protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i)
+ throws BadTsvLineException, IOException {
+ KeyValue kv;
+ kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+ parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+ parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
+ parsed.getColumnLength(i));
+ if (parsed.getIndividualAttributes() != null) {
+ String[] attributes = parsed.getIndividualAttributes();
+ for (String attr : attributes) {
+ String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR);
+ if (split == null || split.length <= 1) {
+ throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
+ } else {
+ if (split[0].length() <= 0 || split[1].length() <= 0) {
+ throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
+ }
+ put.setAttribute(split[0], Bytes.toBytes(split[1]));
+ }
+ }
+ }
+ put.add(kv);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
new file mode 100644
index 0000000..69c4c7c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
@@ -0,0 +1,1059 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.replication;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
+import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.replication.regionserver.Replication;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSource;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+@Category({ReplicationTests.class, LargeTests.class})
+public class TestReplicationSmallTests extends TestReplicationBase {
+
+ private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
+ private static final String PEER_ID = "2";
+
+ @Rule
+ public TestName name = new TestName();
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @Before
+ public void setUp() throws Exception {
+ // Starting and stopping replication can make us miss new logs,
+ // rolling like this makes sure the most recent one gets added to the queue
+ for ( JVMClusterUtil.RegionServerThread r :
+ utility1.getHBaseCluster().getRegionServerThreads()) {
+ utility1.getAdmin().rollWALWriter(r.getRegionServer().getServerName());
+ }
+ int rowCount = utility1.countRows(tableName);
+ utility1.deleteTableData(tableName);
+ // truncating the table will send one Delete per row to the slave cluster
+ // in an async fashion, which is why we cannot just call deleteTableData on
+ // utility2 since late writes could make it to the slave in some way.
+ // Instead, we truncate the first table and wait for all the Deletes to
+ // make it to the slave.
+ Scan scan = new Scan();
+ int lastCount = 0;
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for truncate");
+ }
+ ResultScanner scanner = htable2.getScanner(scan);
+ Result[] res = scanner.next(rowCount);
+ scanner.close();
+ if (res.length != 0) {
+ if (res.length < lastCount) {
+ i--; // Don't increment timeout if we make progress
+ }
+ lastCount = res.length;
+ LOG.info("Still got " + res.length + " rows");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Verify that version and column delete marker types are replicated
+ * correctly.
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testDeleteTypes() throws Exception {
+ LOG.info("testDeleteTypes");
+ final byte[] v1 = Bytes.toBytes("v1");
+ final byte[] v2 = Bytes.toBytes("v2");
+ final byte[] v3 = Bytes.toBytes("v3");
+ htable1 = utility1.getConnection().getTable(tableName);
+
+ long t = EnvironmentEdgeManager.currentTime();
+ // create three versions for "row"
+ Put put = new Put(row);
+ put.addColumn(famName, row, t, v1);
+ htable1.put(put);
+
+ put = new Put(row);
+ put.addColumn(famName, row, t + 1, v2);
+ htable1.put(put);
+
+ put = new Put(row);
+ put.addColumn(famName, row, t + 2, v3);
+ htable1.put(put);
+
+ Get get = new Get(row);
+ get.setMaxVersions();
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for put replication");
+ }
+ Result res = htable2.get(get);
+ if (res.size() < 3) {
+ LOG.info("Rows not available");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
+ assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
+ assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
+ break;
+ }
+ }
+ // place a version delete marker (delete last version)
+ Delete d = new Delete(row);
+ d.addColumn(famName, row, t);
+ htable1.delete(d);
+
+ get = new Get(row);
+ get.setMaxVersions();
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for put replication");
+ }
+ Result res = htable2.get(get);
+ if (res.size() > 2) {
+ LOG.info("Version not deleted");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
+ assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
+ break;
+ }
+ }
+
+ // place a column delete marker
+ d = new Delete(row);
+ d.addColumns(famName, row, t+2);
+ htable1.delete(d);
+
+ // now *both* of the remaining version should be deleted
+ // at the replica
+ get = new Get(row);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for del replication");
+ }
+ Result res = htable2.get(get);
+ if (res.size() >= 1) {
+ LOG.info("Rows not deleted");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Add a row, check it's replicated, delete it, check's gone
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testSimplePutDelete() throws Exception {
+ LOG.info("testSimplePutDelete");
+ Put put = new Put(row);
+ put.addColumn(famName, row, row);
+
+ htable1 = utility1.getConnection().getTable(tableName);
+ htable1.put(put);
+
+ Get get = new Get(row);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for put replication");
+ }
+ Result res = htable2.get(get);
+ if (res.isEmpty()) {
+ LOG.info("Row not available");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ assertArrayEquals(res.value(), row);
+ break;
+ }
+ }
+
+ Delete del = new Delete(row);
+ htable1.delete(del);
+
+ get = new Get(row);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for del replication");
+ }
+ Result res = htable2.get(get);
+ if (res.size() >= 1) {
+ LOG.info("Row not deleted");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Try a small batch upload using the write buffer, check it's replicated
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testSmallBatch() throws Exception {
+ LOG.info("testSmallBatch");
+ // normal Batch tests
+ loadData("", row);
+
+ Scan scan = new Scan();
+
+ ResultScanner scanner1 = htable1.getScanner(scan);
+ Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
+ scanner1.close();
+ assertEquals(NB_ROWS_IN_BATCH, res1.length);
+
+ waitForReplication(NB_ROWS_IN_BATCH, NB_RETRIES);
+ }
+
+ private void waitForReplication(int expectedRows, int retries) throws IOException, InterruptedException {
+ Scan scan;
+ for (int i = 0; i < retries; i++) {
+ scan = new Scan();
+ if (i== retries -1) {
+ fail("Waited too much time for normal batch replication");
+ }
+ ResultScanner scanner = htable2.getScanner(scan);
+ Result[] res = scanner.next(expectedRows);
+ scanner.close();
+ if (res.length != expectedRows) {
+ LOG.info("Only got " + res.length + " rows");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ }
+
+ private void loadData(String prefix, byte[] row) throws IOException {
+ List<Put> puts = new ArrayList<>(NB_ROWS_IN_BATCH);
+ for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
+ Put put = new Put(Bytes.toBytes(prefix + Integer.toString(i)));
+ put.addColumn(famName, row, row);
+ puts.add(put);
+ }
+ htable1.put(puts);
+ }
+
+ /**
+ * Test disable/enable replication, trying to insert, make sure nothing's
+ * replicated, enable it, the insert should be replicated
+ *
+ * @throws Exception
+ */
+ @Test(timeout = 300000)
+ public void testDisableEnable() throws Exception {
+
+ // Test disabling replication
+ admin.disablePeer(PEER_ID);
+
+ byte[] rowkey = Bytes.toBytes("disable enable");
+ Put put = new Put(rowkey);
+ put.addColumn(famName, row, row);
+ htable1.put(put);
+
+ Get get = new Get(rowkey);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ Result res = htable2.get(get);
+ if (res.size() >= 1) {
+ fail("Replication wasn't disabled");
+ } else {
+ LOG.info("Row not replicated, let's wait a bit more...");
+ Thread.sleep(SLEEP_TIME);
+ }
+ }
+
+ // Test enable replication
+ admin.enablePeer(PEER_ID);
+
+ for (int i = 0; i < NB_RETRIES; i++) {
+ Result res = htable2.get(get);
+ if (res.isEmpty()) {
+ LOG.info("Row not available");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ assertArrayEquals(res.value(), row);
+ return;
+ }
+ }
+ fail("Waited too much time for put replication");
+ }
+
+ /**
+ * Integration test for TestReplicationAdmin, removes and re-add a peer
+ * cluster
+ *
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testAddAndRemoveClusters() throws Exception {
+ LOG.info("testAddAndRemoveClusters");
+ admin.removePeer(PEER_ID);
+ Thread.sleep(SLEEP_TIME);
+ byte[] rowKey = Bytes.toBytes("Won't be replicated");
+ Put put = new Put(rowKey);
+ put.addColumn(famName, row, row);
+ htable1.put(put);
+
+ Get get = new Get(rowKey);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i == NB_RETRIES-1) {
+ break;
+ }
+ Result res = htable2.get(get);
+ if (res.size() >= 1) {
+ fail("Not supposed to be replicated");
+ } else {
+ LOG.info("Row not replicated, let's wait a bit more...");
+ Thread.sleep(SLEEP_TIME);
+ }
+ }
+ ReplicationPeerConfig rpc = new ReplicationPeerConfig();
+ rpc.setClusterKey(utility2.getClusterKey());
+ admin.addPeer(PEER_ID, rpc, null);
+ Thread.sleep(SLEEP_TIME);
+ rowKey = Bytes.toBytes("do rep");
+ put = new Put(rowKey);
+ put.addColumn(famName, row, row);
+ LOG.info("Adding new row");
+ htable1.put(put);
+
+ get = new Get(rowKey);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for put replication");
+ }
+ Result res = htable2.get(get);
+ if (res.isEmpty()) {
+ LOG.info("Row not available");
+ Thread.sleep(SLEEP_TIME*i);
+ } else {
+ assertArrayEquals(res.value(), row);
+ break;
+ }
+ }
+ }
+
+
+ /**
+ * Do a more intense version testSmallBatch, one that will trigger
+ * wal rolling and other non-trivial code paths
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testLoading() throws Exception {
+ LOG.info("Writing out rows to table1 in testLoading");
+ List<Put> puts = new ArrayList<>(NB_ROWS_IN_BIG_BATCH);
+ for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
+ Put put = new Put(Bytes.toBytes(i));
+ put.addColumn(famName, row, row);
+ puts.add(put);
+ }
+ // The puts will be iterated through and flushed only when the buffer
+ // size is reached.
+ htable1.put(puts);
+
+ Scan scan = new Scan();
+
+ ResultScanner scanner = htable1.getScanner(scan);
+ Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
+ scanner.close();
+
+ assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
+
+ LOG.info("Looking in table2 for replicated rows in testLoading");
+ long start = System.currentTimeMillis();
+ // Retry more than NB_RETRIES. As it was, retries were done in 5 seconds and we'd fail
+ // sometimes.
+ final long retries = NB_RETRIES * 10;
+ for (int i = 0; i < retries; i++) {
+ scan = new Scan();
+ scanner = htable2.getScanner(scan);
+ res = scanner.next(NB_ROWS_IN_BIG_BATCH);
+ scanner.close();
+ if (res.length != NB_ROWS_IN_BIG_BATCH) {
+ if (i == retries - 1) {
+ int lastRow = -1;
+ for (Result result : res) {
+ int currentRow = Bytes.toInt(result.getRow());
+ for (int row = lastRow+1; row < currentRow; row++) {
+ LOG.error("Row missing: " + row);
+ }
+ lastRow = currentRow;
+ }
+ LOG.error("Last row: " + lastRow);
+ fail("Waited too much time for normal batch replication, " +
+ res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
+ (System.currentTimeMillis() - start) + "ms");
+ } else {
+ LOG.info("Only got " + res.length + " rows... retrying");
+ Thread.sleep(SLEEP_TIME);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Do a small loading into a table, make sure the data is really the same,
+ * then run the VerifyReplication job to check the results. Do a second
+ * comparison where all the cells are different.
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testVerifyRepJob() throws Exception {
+ // Populate the tables, at the same time it guarantees that the tables are
+ // identical since it does the check
+ testSmallBatch();
+
+ String[] args = new String[] {PEER_ID, tableName.getNameAsString()};
+ runVerifyReplication(args, NB_ROWS_IN_BATCH, 0);
+
+ Scan scan = new Scan();
+ ResultScanner rs = htable2.getScanner(scan);
+ Put put = null;
+ for (Result result : rs) {
+ put = new Put(result.getRow());
+ Cell firstVal = result.rawCells()[0];
+ put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
+ Bytes.toBytes("diff data"));
+ htable2.put(put);
+ }
+ Delete delete = new Delete(put.getRow());
+ htable2.delete(delete);
+ runVerifyReplication(args, 0, NB_ROWS_IN_BATCH);
+ }
+
+ /**
+ * Load a row into a table, make sure the data is really the same,
+ * delete the row, make sure the delete marker is replicated,
+ * run verify replication with and without raw to check the results.
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testVerifyRepJobWithRawOptions() throws Exception {
+ LOG.info(name.getMethodName());
+
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ byte[] familyname = Bytes.toBytes("fam_raw");
+ byte[] row = Bytes.toBytes("row_raw");
+
+ Table lHtable1 = null;
+ Table lHtable2 = null;
+
+ try {
+ HTableDescriptor table = new HTableDescriptor(tableName);
+ HColumnDescriptor fam = new HColumnDescriptor(familyname);
+ fam.setMaxVersions(100);
+ fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
+ table.addFamily(fam);
+ scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (HColumnDescriptor f : table.getColumnFamilies()) {
+ scopes.put(f.getName(), f.getScope());
+ }
+
+ Connection connection1 = ConnectionFactory.createConnection(conf1);
+ Connection connection2 = ConnectionFactory.createConnection(conf2);
+ try (Admin admin1 = connection1.getAdmin()) {
+ admin1.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
+ }
+ try (Admin admin2 = connection2.getAdmin()) {
+ admin2.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
+ }
+ utility1.waitUntilAllRegionsAssigned(tableName);
+ utility2.waitUntilAllRegionsAssigned(tableName);
+
+ lHtable1 = utility1.getConnection().getTable(tableName);
+ lHtable2 = utility2.getConnection().getTable(tableName);
+
+ Put put = new Put(row);
+ put.addColumn(familyname, row, row);
+ lHtable1.put(put);
+
+ Get get = new Get(row);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for put replication");
+ }
+ Result res = lHtable2.get(get);
+ if (res.isEmpty()) {
+ LOG.info("Row not available");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ assertArrayEquals(res.value(), row);
+ break;
+ }
+ }
+
+ Delete del = new Delete(row);
+ lHtable1.delete(del);
+
+ get = new Get(row);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i==NB_RETRIES-1) {
+ fail("Waited too much time for del replication");
+ }
+ Result res = lHtable2.get(get);
+ if (res.size() >= 1) {
+ LOG.info("Row not deleted");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+
+ // Checking verifyReplication for the default behavior.
+ String[] argsWithoutRaw = new String[] {PEER_ID, tableName.getNameAsString()};
+ runVerifyReplication(argsWithoutRaw, 0, 0);
+
+ // Checking verifyReplication with raw
+ String[] argsWithRawAsTrue = new String[] {"--raw", PEER_ID, tableName.getNameAsString()};
+ runVerifyReplication(argsWithRawAsTrue, 1, 0);
+ } finally {
+ if (lHtable1 != null) {
+ lHtable1.close();
+ }
+ if (lHtable2 != null) {
+ lHtable2.close();
+ }
+ }
+ }
+
+ private void runVerifyReplication(String[] args, int expectedGoodRows, int expectedBadRows)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ Job job = new VerifyReplication().createSubmittableJob(new Configuration(conf1), args);
+ if (job == null) {
+ fail("Job wasn't created, see the log");
+ }
+ if (!job.waitForCompletion(true)) {
+ fail("Job failed, see the log");
+ }
+ assertEquals(expectedGoodRows, job.getCounters().
+ findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+ assertEquals(expectedBadRows, job.getCounters().
+ findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+ }
+
+ @Test(timeout=300000)
+ // VerifyReplication should honor versions option
+ public void testHBase14905() throws Exception {
+ // normal Batch tests
+ byte[] qualifierName = Bytes.toBytes("f1");
+ Put put = new Put(Bytes.toBytes("r1"));
+ put.addColumn(famName, qualifierName, Bytes.toBytes("v1002"));
+ htable1.put(put);
+ put.addColumn(famName, qualifierName, Bytes.toBytes("v1001"));
+ htable1.put(put);
+ put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
+ htable1.put(put);
+
+ Scan scan = new Scan();
+ scan.setMaxVersions(100);
+ ResultScanner scanner1 = htable1.getScanner(scan);
+ Result[] res1 = scanner1.next(1);
+ scanner1.close();
+
+ assertEquals(1, res1.length);
+ assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+ for (int i = 0; i < NB_RETRIES; i++) {
+ scan = new Scan();
+ scan.setMaxVersions(100);
+ scanner1 = htable2.getScanner(scan);
+ res1 = scanner1.next(1);
+ scanner1.close();
+ if (res1.length != 1) {
+ LOG.info("Only got " + res1.length + " rows");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
+ if (cellNumber != 3) {
+ LOG.info("Only got " + cellNumber + " cells");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ if (i == NB_RETRIES-1) {
+ fail("Waited too much time for normal batch replication");
+ }
+ }
+
+ put.addColumn(famName, qualifierName, Bytes.toBytes("v1111"));
+ htable2.put(put);
+ put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
+ htable2.put(put);
+
+ scan = new Scan();
+ scan.setMaxVersions(100);
+ scanner1 = htable2.getScanner(scan);
+ res1 = scanner1.next(NB_ROWS_IN_BATCH);
+ scanner1.close();
+
+ assertEquals(1, res1.length);
+ assertEquals(5, res1[0].getColumnCells(famName, qualifierName).size());
+
+ String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
+ runVerifyReplication(args, 0, 1);
+ }
+
+ @Test(timeout=300000)
+ // VerifyReplication should honor versions option
+ public void testVersionMismatchHBase14905() throws Exception {
+ // normal Batch tests
+ byte[] qualifierName = Bytes.toBytes("f1");
+ Put put = new Put(Bytes.toBytes("r1"));
+ long ts = System.currentTimeMillis();
+ put.addColumn(famName, qualifierName, ts + 1, Bytes.toBytes("v1"));
+ htable1.put(put);
+ put.addColumn(famName, qualifierName, ts + 2, Bytes.toBytes("v2"));
+ htable1.put(put);
+ put.addColumn(famName, qualifierName, ts + 3, Bytes.toBytes("v3"));
+ htable1.put(put);
+
+ Scan scan = new Scan();
+ scan.setMaxVersions(100);
+ ResultScanner scanner1 = htable1.getScanner(scan);
+ Result[] res1 = scanner1.next(1);
+ scanner1.close();
+
+ assertEquals(1, res1.length);
+ assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+ for (int i = 0; i < NB_RETRIES; i++) {
+ scan = new Scan();
+ scan.setMaxVersions(100);
+ scanner1 = htable2.getScanner(scan);
+ res1 = scanner1.next(1);
+ scanner1.close();
+ if (res1.length != 1) {
+ LOG.info("Only got " + res1.length + " rows");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
+ if (cellNumber != 3) {
+ LOG.info("Only got " + cellNumber + " cells");
+ Thread.sleep(SLEEP_TIME);
+ } else {
+ break;
+ }
+ }
+ if (i == NB_RETRIES-1) {
+ fail("Waited too much time for normal batch replication");
+ }
+ }
+
+ try {
+ // Disabling replication and modifying the particular version of the cell to validate the feature.
+ admin.disablePeer(PEER_ID);
+ Put put2 = new Put(Bytes.toBytes("r1"));
+ put2.addColumn(famName, qualifierName, ts +2, Bytes.toBytes("v99"));
+ htable2.put(put2);
+
+ scan = new Scan();
+ scan.setMaxVersions(100);
+ scanner1 = htable2.getScanner(scan);
+ res1 = scanner1.next(NB_ROWS_IN_BATCH);
+ scanner1.close();
+ assertEquals(1, res1.length);
+ assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
+
+ String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
+ runVerifyReplication(args, 0, 1);
+ }
+ finally {
+ admin.enablePeer(PEER_ID);
+ }
+ }
+
+ /**
+ * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
+ * the compaction WALEdit
+ * @throws Exception
+ */
+ @Test(timeout=300000)
+ public void testCompactionWALEdits() throws Exception {
+ WALProtos.CompactionDescriptor compactionDescriptor =
+ WALProtos.CompactionDescriptor.getDefaultInstance();
+ HRegionInfo hri = new HRegionInfo(htable1.getName(),
+ HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
+ WALEdit edit = WALEdit.createCompaction(hri, compactionDescriptor);
+ Replication.scopeWALEdits(new WALKey(), edit,
+ htable1.getConfiguration(), null);
+ }
+
+ /**
+ * Test for HBASE-8663
+ * Create two new Tables with colfamilies enabled for replication then run
+ * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
+ * TestReplicationAdmin is a better place for this testing but it would need mocks.
+ * @throws Exception
+ */
+ @Test(timeout = 300000)
+ public void testVerifyListReplicatedTable() throws Exception {
+ LOG.info("testVerifyListReplicatedTable");
+
+ final String tName = "VerifyListReplicated_";
+ final String colFam = "cf1";
+ final int numOfTables = 3;
+
+ Admin hadmin = utility1.getAdmin();
+
+ // Create Tables
+ for (int i = 0; i < numOfTables; i++) {
+ HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
+ HColumnDescriptor cfd = new HColumnDescriptor(colFam);
+ cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
+ ht.addFamily(cfd);
+ hadmin.createTable(ht);
+ }
+
+ // verify the result
+ List<HashMap<String, String>> replicationColFams = admin.listReplicated();
+ int[] match = new int[numOfTables]; // array of 3 with init value of zero
+
+ for (int i = 0; i < replicationColFams.size(); i++) {
+ HashMap<String, String> replicationEntry = replicationColFams.get(i);
+ String tn = replicationEntry.get(ReplicationAdmin.TNAME);
+ if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
+ int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
+ match[m]++; // should only increase once
+ }
+ }
+
+ // check the matching result
+ for (int i = 0; i < match.length; i++) {
+ assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
+ }
+
+ // drop tables
+ for (int i = 0; i < numOfTables; i++) {
+ TableName tableName = TableName.valueOf(tName + i);
+ hadmin.disableTable(tableName);
+ hadmin.deleteTable(tableName);
+ }
+
+ hadmin.close();
+ }
+
+ /**
+ * Test for HBase-15259 WALEdits under replay will also be replicated
+ * */
+ @Test
+ public void testReplicationInReplay() throws Exception {
+ final TableName tableName = htable1.getName();
+
+ HRegion region = utility1.getMiniHBaseCluster().getRegions(tableName).get(0);
+ HRegionInfo hri = region.getRegionInfo();
+ NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (byte[] fam : htable1.getTableDescriptor().getFamiliesKeys()) {
+ scopes.put(fam, 1);
+ }
+ final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
+ int index = utility1.getMiniHBaseCluster().getServerWith(hri.getRegionName());
+ WAL wal = utility1.getMiniHBaseCluster().getRegionServer(index).getWAL(region.getRegionInfo());
+ final byte[] rowName = Bytes.toBytes("testReplicationInReplay");
+ final byte[] qualifier = Bytes.toBytes("q");
+ final byte[] value = Bytes.toBytes("v");
+ WALEdit edit = new WALEdit(true);
+ long now = EnvironmentEdgeManager.currentTime();
+ edit.add(new KeyValue(rowName, famName, qualifier,
+ now, value));
+ WALKey walKey = new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes);
+ wal.append(hri, walKey, edit, true);
+ wal.sync();
+
+ Get get = new Get(rowName);
+ for (int i = 0; i < NB_RETRIES; i++) {
+ if (i == NB_RETRIES-1) {
+ break;
+ }
+ Result res = htable2.get(get);
+ if (res.size() >= 1) {
+ fail("Not supposed to be replicated for " + Bytes.toString(res.getRow()));
+ } else {
+ LOG.info("Row not replicated, let's wait a bit more...");
+ Thread.sleep(SLEEP_TIME);
+ }
+ }
+ }
+
+ @Test(timeout=300000)
+ public void testVerifyReplicationPrefixFiltering() throws Exception {
+ final byte[] prefixRow = Bytes.toBytes("prefixrow");
+ final byte[] prefixRow2 = Bytes.toBytes("secondrow");
+ loadData("prefixrow", prefixRow);
+ loadData("secondrow", prefixRow2);
+ loadData("aaa", row);
+ loadData("zzz", row);
+ waitForReplication(NB_ROWS_IN_BATCH * 4, NB_RETRIES * 4);
+ String[] args = new String[] {"--row-prefixes=prefixrow,secondrow", PEER_ID,
+ tableName.getNameAsString()};
+ runVerifyReplication(args, NB_ROWS_IN_BATCH *2, 0);
+ }
+
+ @Test(timeout = 300000)
+ public void testVerifyReplicationSnapshotArguments() {
+ String[] args =
+ new String[] { "--sourceSnapshotName=snapshot1", "2", tableName.getNameAsString() };
+ assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--sourceSnapshotTmpDir=tmp", "2", tableName.getNameAsString() };
+ assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=tmp", "2",
+ tableName.getNameAsString() };
+ assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--peerSnapshotName=snapshot1", "2", tableName.getNameAsString() };
+ assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--peerSnapshotTmpDir=/tmp/", "2", tableName.getNameAsString() };
+ assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--peerSnapshotName=snapshot1", "--peerSnapshotTmpDir=/tmp/",
+ "--peerFSAddress=tempfs", "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2",
+ tableName.getNameAsString() };
+ assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+
+ args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=/tmp/",
+ "--peerSnapshotName=snapshot2", "--peerSnapshotTmpDir=/tmp/", "--peerFSAddress=tempfs",
+ "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2", tableName.getNameAsString() };
+
+ assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
+ }
+
+ @Test(timeout = 300000)
+ public void testVerifyReplicationWithSnapshotSupport() throws Exception {
+ // Populate the tables, at the same time it guarantees that the tables are
+ // identical since it does the check
+ testSmallBatch();
+
+ // Take source and target tables snapshot
+ Path rootDir = FSUtils.getRootDir(conf1);
+ FileSystem fs = rootDir.getFileSystem(conf1);
+ String sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
+ SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
+ new String(famName), sourceSnapshotName, rootDir, fs, true);
+
+ // Take target snapshot
+ Path peerRootDir = FSUtils.getRootDir(conf2);
+ FileSystem peerFs = peerRootDir.getFileSystem(conf2);
+ String peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
+ SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
+ new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
+
+ String peerFSAddress = peerFs.getUri().toString();
+ String temPath1 = utility1.getRandomDir().toString();
+ String temPath2 = "/tmp2";
+
+ String[] args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
+ "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
+ "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
+ "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
+
+ Job job = new VerifyReplication().createSubmittableJob(conf1, args);
+ if (job == null) {
+ fail("Job wasn't created, see the log");
+ }
+ if (!job.waitForCompletion(true)) {
+ fail("Job failed, see the log");
+ }
+ assertEquals(NB_ROWS_IN_BATCH,
+ job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+ assertEquals(0,
+ job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+
+ Scan scan = new Scan();
+ ResultScanner rs = htable2.getScanner(scan);
+ Put put = null;
+ for (Result result : rs) {
+ put = new Put(result.getRow());
+ Cell firstVal = result.rawCells()[0];
+ put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
+ Bytes.toBytes("diff data"));
+ htable2.put(put);
+ }
+ Delete delete = new Delete(put.getRow());
+ htable2.delete(delete);
+
+ sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
+ SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
+ new String(famName), sourceSnapshotName, rootDir, fs, true);
+
+ peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
+ SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
+ new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
+
+ args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
+ "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
+ "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
+ "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
+
+ job = new VerifyReplication().createSubmittableJob(conf1, args);
+ if (job == null) {
+ fail("Job wasn't created, see the log");
+ }
+ if (!job.waitForCompletion(true)) {
+ fail("Job failed, see the log");
+ }
+ assertEquals(0,
+ job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
+ assertEquals(NB_ROWS_IN_BATCH,
+ job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
+ }
+
+ @Test
+ public void testEmptyWALRecovery() throws Exception {
+ final int numRs = utility1.getHBaseCluster().getRegionServerThreads().size();
+
+ // for each RS, create an empty wal with same walGroupId
+ final List<Path> emptyWalPaths = new ArrayList<>();
+ long ts = System.currentTimeMillis();
+ for (int i = 0; i < numRs; i++) {
+ HRegionInfo regionInfo =
+ utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
+ WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
+ Path currentWalPath = AbstractFSWALProvider.getCurrentFileName(wal);
+ String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(currentWalPath.getName());
+ Path emptyWalPath = new Path(utility1.getDataTestDir(), walGroupId + "." + ts);
+ utility1.getTestFileSystem().create(emptyWalPath).close();
+ emptyWalPaths.add(emptyWalPath);
+ }
+
+ // inject our empty wal into the replication queue
+ for (int i = 0; i < numRs; i++) {
+ Replication replicationService =
+ (Replication) utility1.getHBaseCluster().getRegionServer(i).getReplicationSourceService();
+ replicationService.preLogRoll(null, emptyWalPaths.get(i));
+ replicationService.postLogRoll(null, emptyWalPaths.get(i));
+ }
+
+ // wait for ReplicationSource to start reading from our empty wal
+ waitForLogAdvance(numRs, emptyWalPaths, false);
+
+ // roll the original wal, which enqueues a new wal behind our empty wal
+ for (int i = 0; i < numRs; i++) {
+ HRegionInfo regionInfo =
+ utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
+ WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
+ wal.rollWriter(true);
+ }
+
+ // ReplicationSource should advance past the empty wal, or else the test will fail
+ waitForLogAdvance(numRs, emptyWalPaths, true);
+
+ // we're now writing to the new wal
+ // if everything works, the source should've stopped reading from the empty wal, and start
+ // replicating from the new wal
+ testSimplePutDelete();
+ }
+
+ /**
+ * Waits for the ReplicationSource to start reading from the given paths
+ * @param numRs number of regionservers
+ * @param emptyWalPaths path for each regionserver
+ * @param invert if true, waits until ReplicationSource is NOT reading from the given paths
+ */
+ private void waitForLogAdvance(final int numRs, final List<Path> emptyWalPaths,
+ final boolean invert) throws Exception {
+ Waiter.waitFor(conf1, 10000, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ for (int i = 0; i < numRs; i++) {
+ Replication replicationService = (Replication) utility1.getHBaseCluster()
+ .getRegionServer(i).getReplicationSourceService();
+ for (ReplicationSourceInterface rsi : replicationService.getReplicationManager()
+ .getSources()) {
+ ReplicationSource source = (ReplicationSource) rsi;
+ if (!invert && !emptyWalPaths.get(i).equals(source.getCurrentPath())) {
+ return false;
+ }
+ if (invert && emptyWalPaths.get(i).equals(source.getCurrentPath())) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ });
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
new file mode 100644
index 0000000..2e3cb5e
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -0,0 +1,381 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import static org.apache.hadoop.util.ToolRunner.run;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Ignore
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestExportSnapshot {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private static final Log LOG = LogFactory.getLog(TestExportSnapshot.class);
+
+ protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+ protected final static byte[] FAMILY = Bytes.toBytes("cf");
+
+ @Rule
+ public final TestName testName = new TestName();
+
+ protected TableName tableName;
+ private byte[] emptySnapshotName;
+ private byte[] snapshotName;
+ private int tableNumFiles;
+ private Admin admin;
+
+ public static void setUpBaseConf(Configuration conf) {
+ conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+ conf.setInt("hbase.regionserver.msginterval", 100);
+ conf.setInt("hbase.client.pause", 250);
+ conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
+ conf.setBoolean("hbase.master.enabletable.roundrobin", true);
+ conf.setInt("mapreduce.map.maxattempts", 10);
+ // If a single node has enough failures (default 3), resource manager will blacklist it.
+ // With only 2 nodes and tests injecting faults, we don't want that.
+ conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
+ }
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ setUpBaseConf(TEST_UTIL.getConfiguration());
+ TEST_UTIL.startMiniCluster(1, 3);
+ TEST_UTIL.startMiniMapReduceCluster();
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniMapReduceCluster();
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Create a table and take a snapshot of the table used by the export test.
+ */
+ @Before
+ public void setUp() throws Exception {
+ this.admin = TEST_UTIL.getAdmin();
+
+ tableName = TableName.valueOf("testtb-" + testName.getMethodName());
+ snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
+ emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
+
+ // create Table
+ createTable();
+
+ // Take an empty snapshot
+ admin.snapshot(emptySnapshotName, tableName);
+
+ // Add some rows
+ SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 50, FAMILY);
+ tableNumFiles = admin.getTableRegions(tableName).size();
+
+ // take a snapshot
+ admin.snapshot(snapshotName, tableName);
+ }
+
+ protected void createTable() throws Exception {
+ SnapshotTestingUtils.createPreSplitTable(TEST_UTIL, tableName, 2, FAMILY);
+ }
+
+ protected interface RegionPredicate {
+ boolean evaluate(final HRegionInfo regionInfo);
+ }
+
+ protected RegionPredicate getBypassRegionPredicate() {
+ return null;
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ TEST_UTIL.deleteTable(tableName);
+ SnapshotTestingUtils.deleteAllSnapshots(TEST_UTIL.getAdmin());
+ SnapshotTestingUtils.deleteArchiveDirectory(TEST_UTIL);
+ }
+
+ /**
+ * Verify if exported snapshot and copied files matches the original one.
+ */
+ @Test
+ public void testExportFileSystemState() throws Exception {
+ testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
+ }
+
+ @Test
+ public void testExportFileSystemStateWithSkipTmp() throws Exception {
+ TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);
+ try {
+ testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
+ } finally {
+ TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, false);
+ }
+ }
+
+ @Test
+ public void testEmptyExportFileSystemState() throws Exception {
+ testExportFileSystemState(tableName, emptySnapshotName, emptySnapshotName, 0);
+ }
+
+ @Test
+ public void testConsecutiveExports() throws Exception {
+ Path copyDir = getLocalDestinationDir();
+ testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, false);
+ testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, true);
+ removeExportDir(copyDir);
+ }
+
+ @Test
+ public void testExportWithTargetName() throws Exception {
+ final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
+ testExportFileSystemState(tableName, snapshotName, targetName, tableNumFiles);
+ }
+
+ private void testExportFileSystemState(final TableName tableName, final byte[] snapshotName,
+ final byte[] targetName, int filesExpected) throws Exception {
+ testExportFileSystemState(tableName, snapshotName, targetName,
+ filesExpected, getHdfsDestinationDir(), false);
+ }
+
+ protected void testExportFileSystemState(final TableName tableName,
+ final byte[] snapshotName, final byte[] targetName, int filesExpected,
+ Path copyDir, boolean overwrite) throws Exception {
+ testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
+ filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
+ overwrite, getBypassRegionPredicate(), true);
+ }
+
+ /**
+ * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
+ */
+ protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
+ final byte[] snapshotName, final byte[] targetName, final int filesExpected,
+ final Path sourceDir, Path copyDir, final boolean overwrite,
+ final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
+ URI hdfsUri = FileSystem.get(conf).getUri();
+ FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+ copyDir = copyDir.makeQualified(fs);
+
+ List<String> opts = new ArrayList<>();
+ opts.add("--snapshot");
+ opts.add(Bytes.toString(snapshotName));
+ opts.add("--copy-to");
+ opts.add(copyDir.toString());
+ if (targetName != snapshotName) {
+ opts.add("--target");
+ opts.add(Bytes.toString(targetName));
+ }
+ if (overwrite) opts.add("--overwrite");
+
+ // Export Snapshot
+ int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
+ assertEquals(success ? 0 : 1, res);
+ if (!success) {
+ final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+ assertFalse(fs.exists(new Path(copyDir, targetDir)));
+ return;
+ }
+
+ // Verify File-System state
+ FileStatus[] rootFiles = fs.listStatus(copyDir);
+ assertEquals(filesExpected > 0 ? 2 : 1, rootFiles.length);
+ for (FileStatus fileStatus: rootFiles) {
+ String name = fileStatus.getPath().getName();
+ assertTrue(fileStatus.isDirectory());
+ assertTrue(name.equals(HConstants.SNAPSHOT_DIR_NAME) ||
+ name.equals(HConstants.HFILE_ARCHIVE_DIRECTORY));
+ }
+
+ // compare the snapshot metadata and verify the hfiles
+ final FileSystem hdfs = FileSystem.get(hdfsUri, conf);
+ final Path snapshotDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(snapshotName));
+ final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+ verifySnapshotDir(hdfs, new Path(sourceDir, snapshotDir),
+ fs, new Path(copyDir, targetDir));
+ Set<String> snapshotFiles = verifySnapshot(conf, fs, copyDir, tableName,
+ Bytes.toString(targetName), bypassregionPredicate);
+ assertEquals(filesExpected, snapshotFiles.size());
+ }
+
+ /**
+ * Check that ExportSnapshot will succeed if something fails but the retry succeed.
+ */
+ @Test
+ public void testExportRetry() throws Exception {
+ Path copyDir = getLocalDestinationDir();
+ FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+ copyDir = copyDir.makeQualified(fs);
+ Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+ conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+ conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
+ conf.setInt("mapreduce.map.maxattempts", 3);
+ testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+ TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
+ }
+
+ /**
+ * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
+ */
+ @Test
+ public void testExportFailure() throws Exception {
+ Path copyDir = getLocalDestinationDir();
+ FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+ copyDir = copyDir.makeQualified(fs);
+ Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+ conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+ conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
+ conf.setInt("mapreduce.map.maxattempts", 3);
+ testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+ TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
+ }
+
+ /*
+ * verify if the snapshot folder on file-system 1 match the one on file-system 2
+ */
+ protected static void verifySnapshotDir(final FileSystem fs1, final Path root1,
+ final FileSystem fs2, final Path root2) throws IOException {
+ assertEquals(listFiles(fs1, root1, root1), listFiles(fs2, root2, root2));
+ }
+
+ protected Set<String> verifySnapshot(final FileSystem fs, final Path rootDir,
+ final TableName tableName, final String snapshotName) throws IOException {
+ return verifySnapshot(TEST_UTIL.getConfiguration(), fs, rootDir, tableName,
+ snapshotName, getBypassRegionPredicate());
+ }
+
+ /*
+ * Verify if the files exists
+ */
+ protected static Set<String> verifySnapshot(final Configuration conf, final FileSystem fs,
+ final Path rootDir, final TableName tableName, final String snapshotName,
+ final RegionPredicate bypassregionPredicate) throws IOException {
+ final Path exportedSnapshot = new Path(rootDir,
+ new Path(HConstants.SNAPSHOT_DIR_NAME, snapshotName));
+ final Set<String> snapshotFiles = new HashSet<>();
+ final Path exportedArchive = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
+ SnapshotReferenceUtil.visitReferencedFiles(conf, fs, exportedSnapshot,
+ new SnapshotReferenceUtil.SnapshotVisitor() {
+ @Override
+ public void storeFile(final HRegionInfo regionInfo, final String family,
+ final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
+ if (bypassregionPredicate != null && bypassregionPredicate.evaluate(regionInfo))
+ return;
+
+ String hfile = storeFile.getName();
+ snapshotFiles.add(hfile);
+ if (storeFile.hasReference()) {
+ // Nothing to do here, we have already the reference embedded
+ } else {
+ verifyNonEmptyFile(new Path(exportedArchive,
+ new Path(FSUtils.getTableDir(new Path("./"), tableName),
+ new Path(regionInfo.getEncodedName(), new Path(family, hfile)))));
+ }
+ }
+
+ private void verifyNonEmptyFile(final Path path) throws IOException {
+ assertTrue(path + " should exists", fs.exists(path));
+ assertTrue(path + " should not be empty", fs.getFileStatus(path).getLen() > 0);
+ }
+ });
+
+ // Verify Snapshot description
+ SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, exportedSnapshot);
+ assertTrue(desc.getName().equals(snapshotName));
+ assertTrue(desc.getTable().equals(tableName.getNameAsString()));
+ return snapshotFiles;
+ }
+
+ private static Set<String> listFiles(final FileSystem fs, final Path root, final Path dir)
+ throws IOException {
+ Set<String> files = new HashSet<>();
+ int rootPrefix = root.makeQualified(fs).toString().length();
+ FileStatus[] list = FSUtils.listStatus(fs, dir);
+ if (list != null) {
+ for (FileStatus fstat: list) {
+ LOG.debug(fstat.getPath());
+ if (fstat.isDirectory()) {
+ files.addAll(listFiles(fs, root, fstat.getPath()));
+ } else {
+ files.add(fstat.getPath().makeQualified(fs).toString().substring(rootPrefix));
+ }
+ }
+ }
+ return files;
+ }
+
+ private Path getHdfsDestinationDir() {
+ Path rootDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+ Path path = new Path(new Path(rootDir, "export-test"), "export-" + System.currentTimeMillis());
+ LOG.info("HDFS export destination path: " + path);
+ return path;
+ }
+
+ private Path getLocalDestinationDir() {
+ Path path = TEST_UTIL.getDataTestDir("local-export-" + System.currentTimeMillis());
+ LOG.info("Local export destination path: " + path);
+ return path;
+ }
+
+ private static void removeExportDir(final Path path) throws IOException {
+ FileSystem fs = FileSystem.get(path.toUri(), new Configuration());
+ fs.delete(path, true);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
new file mode 100644
index 0000000..e31e81e
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Export Snapshot Tool helpers
+ */
+@Category({RegionServerTests.class, SmallTests.class})
+public class TestExportSnapshotHelpers {
+ /**
+ * Verfy the result of getBalanceSplits() method.
+ * The result are groups of files, used as input list for the "export" mappers.
+ * All the groups should have similar amount of data.
+ *
+ * The input list is a pair of file path and length.
+ * The getBalanceSplits() function sort it by length,
+ * and assign to each group a file, going back and forth through the groups.
+ */
+ @Test
+ public void testBalanceSplit() throws Exception {
+ // Create a list of files
+ List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>(21);
+ for (long i = 0; i <= 20; i++) {
+ SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
+ .setType(SnapshotFileInfo.Type.HFILE)
+ .setHfile("file-" + i)
+ .build();
+ files.add(new Pair<>(fileInfo, i));
+ }
+
+ // Create 5 groups (total size 210)
+ // group 0: 20, 11, 10, 1 (total size: 42)
+ // group 1: 19, 12, 9, 2 (total size: 42)
+ // group 2: 18, 13, 8, 3 (total size: 42)
+ // group 3: 17, 12, 7, 4 (total size: 42)
+ // group 4: 16, 11, 6, 5 (total size: 42)
+ List<List<Pair<SnapshotFileInfo, Long>>> splits = ExportSnapshot.getBalancedSplits(files, 5);
+ assertEquals(5, splits.size());
+
+ String[] split0 = new String[] {"file-20", "file-11", "file-10", "file-1", "file-0"};
+ verifyBalanceSplit(splits.get(0), split0, 42);
+ String[] split1 = new String[] {"file-19", "file-12", "file-9", "file-2"};
+ verifyBalanceSplit(splits.get(1), split1, 42);
+ String[] split2 = new String[] {"file-18", "file-13", "file-8", "file-3"};
+ verifyBalanceSplit(splits.get(2), split2, 42);
+ String[] split3 = new String[] {"file-17", "file-14", "file-7", "file-4"};
+ verifyBalanceSplit(splits.get(3), split3, 42);
+ String[] split4 = new String[] {"file-16", "file-15", "file-6", "file-5"};
+ verifyBalanceSplit(splits.get(4), split4, 42);
+ }
+
+ private void verifyBalanceSplit(final List<Pair<SnapshotFileInfo, Long>> split,
+ final String[] expected, final long expectedSize) {
+ assertEquals(expected.length, split.size());
+ long totalSize = 0;
+ for (int i = 0; i < expected.length; ++i) {
+ Pair<SnapshotFileInfo, Long> fileInfo = split.get(i);
+ assertEquals(expected[i], fileInfo.getFirst().getHfile());
+ totalSize += fileInfo.getSecond();
+ }
+ assertEquals(expectedSize, totalSize);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
new file mode 100644
index 0000000..00778502
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestExportSnapshotNoCluster {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private static final Log LOG = LogFactory.getLog(TestExportSnapshotNoCluster.class);
+
+ protected final static HBaseCommonTestingUtility TEST_UTIL = new HBaseCommonTestingUtility();
+
+ private static FileSystem fs;
+ private static Path testDir;
+
+ public static void setUpBaseConf(Configuration conf) {
+ conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
+ conf.setInt("hbase.regionserver.msginterval", 100);
+ conf.setInt("hbase.client.pause", 250);
+ conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
+ conf.setBoolean("hbase.master.enabletable.roundrobin", true);
+ conf.setInt("mapreduce.map.maxattempts", 10);
+ conf.set(HConstants.HBASE_DIR, testDir.toString());
+ }
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ testDir = TEST_UTIL.getDataTestDir();
+ fs = testDir.getFileSystem(TEST_UTIL.getConfiguration());
+
+ setUpBaseConf(TEST_UTIL.getConfiguration());
+ }
+
+ /**
+ * Mock a snapshot with files in the archive dir,
+ * two regions, and one reference file.
+ */
+ @Test
+ public void testSnapshotWithRefsExportFileSystemState() throws Exception {
+ SnapshotMock snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
+ SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2("tableWithRefsV1",
+ "tableWithRefsV1");
+ testSnapshotWithRefsExportFileSystemState(builder);
+
+ snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
+ builder = snapshotMock.createSnapshotV2("tableWithRefsV2", "tableWithRefsV2");
+ testSnapshotWithRefsExportFileSystemState(builder);
+ }
+
+ /**
+ * Generates a couple of regions for the specified SnapshotMock,
+ * and then it will run the export and verification.
+ */
+ private void testSnapshotWithRefsExportFileSystemState(SnapshotMock.SnapshotBuilder builder)
+ throws Exception {
+ Path[] r1Files = builder.addRegion();
+ Path[] r2Files = builder.addRegion();
+ builder.commit();
+ int snapshotFilesCount = r1Files.length + r2Files.length;
+
+ byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName());
+ TableName tableName = builder.getTableDescriptor().getTableName();
+ TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
+ tableName, snapshotName, snapshotName, snapshotFilesCount,
+ testDir, getDestinationDir(), false, null, true);
+ }
+
+ private Path getDestinationDir() {
+ Path path = new Path(new Path(testDir, "export-test"), "export-" + System.currentTimeMillis());
+ LOG.info("HDFS export destination path: " + path);
+ return path;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
new file mode 100644
index 0000000..7407a7d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.mob.MobConstants;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Export Snapshot Tool
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestMobExportSnapshot extends TestExportSnapshot {
+
+ public static void setUpBaseConf(Configuration conf) {
+ TestExportSnapshot.setUpBaseConf(conf);
+ conf.setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0);
+ }
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ setUpBaseConf(TEST_UTIL.getConfiguration());
+ TEST_UTIL.startMiniCluster(1, 3);
+ TEST_UTIL.startMiniMapReduceCluster();
+ }
+
+ @Override
+ protected void createTable() throws Exception {
+ MobSnapshotTestingUtils.createPreSplitMobTable(TEST_UTIL, tableName, 2, FAMILY);
+ }
+
+ @Override
+ protected RegionPredicate getBypassRegionPredicate() {
+ return new RegionPredicate() {
+ @Override
+ public boolean evaluate(final HRegionInfo regionInfo) {
+ return MobUtils.isMobRegionInfo(regionInfo);
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
new file mode 100644
index 0000000..98d03c0
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
@@ -0,0 +1,59 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Reruns TestMobExportSnapshot using MobExportSnapshot in secure mode.
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ setUpBaseConf(TEST_UTIL.getConfiguration());
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
+
+ // set the always on security provider
+ UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
+ HadoopSecurityEnabledUserProviderForTesting.class);
+
+ // setup configuration
+ SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
+
+ TEST_UTIL.startMiniCluster(1, 3);
+ TEST_UTIL.startMiniMapReduceCluster();
+
+ // Wait for the ACL table to become available
+ TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
new file mode 100644
index 0000000..7d4832c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
@@ -0,0 +1,64 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Reruns TestExportSnapshot using ExportSnapshot in secure mode.
+ */
+@Ignore
+@Category({VerySlowRegionServerTests.class, LargeTests.class})
+public class TestSecureExportSnapshot extends TestExportSnapshot {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ setUpBaseConf(TEST_UTIL.getConfiguration());
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
+
+ // set the always on security provider
+ UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
+ HadoopSecurityEnabledUserProviderForTesting.class);
+
+ // setup configuration
+ SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
+
+ TEST_UTIL.startMiniCluster(1, 3);
+ TEST_UTIL.startMiniMapReduceCluster();
+
+ // Wait for the ACL table to become available
+ TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+ }
+}
[20/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
deleted file mode 100644
index 43560fd..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A job with a map to count rows.
- * Map outputs table rows IF the input row has columns that have content.
- * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
- */
-@InterfaceAudience.Public
-public class RowCounter extends Configured implements Tool {
- // Name of this 'program'
- static final String NAME = "rowcounter";
-
- /**
- * Mapper that runs the count.
- */
- static class RowCounterMapper
- implements TableMap<ImmutableBytesWritable, Result> {
- private static enum Counters {ROWS}
-
- public void map(ImmutableBytesWritable row, Result values,
- OutputCollector<ImmutableBytesWritable, Result> output,
- Reporter reporter)
- throws IOException {
- // Count every row containing data, whether it's in qualifiers or values
- reporter.incrCounter(Counters.ROWS, 1);
- }
-
- public void configure(JobConf jc) {
- // Nothing to do.
- }
-
- public void close() throws IOException {
- // Nothing to do.
- }
- }
-
- /**
- * @param args
- * @return the JobConf
- * @throws IOException
- */
- public JobConf createSubmittableJob(String[] args) throws IOException {
- JobConf c = new JobConf(getConf(), getClass());
- c.setJobName(NAME);
- // Columns are space delimited
- StringBuilder sb = new StringBuilder();
- final int columnoffset = 2;
- for (int i = columnoffset; i < args.length; i++) {
- if (i > columnoffset) {
- sb.append(" ");
- }
- sb.append(args[i]);
- }
- // Second argument is the table name.
- TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
- RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
- c.setNumReduceTasks(0);
- // First arg is the output directory.
- FileOutputFormat.setOutputPath(c, new Path(args[0]));
- return c;
- }
-
- static int printUsage() {
- System.out.println(NAME +
- " <outputdir> <tablename> <column1> [<column2>...]");
- return -1;
- }
-
- public int run(final String[] args) throws Exception {
- // Make sure there are at least 3 parameters
- if (args.length < 3) {
- System.err.println("ERROR: Wrong number of parameters: " + args.length);
- return printUsage();
- }
- JobClient.runJob(createSubmittableJob(args));
- return 0;
- }
-
- /**
- * @param args
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
- int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
- System.exit(errCode);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
deleted file mode 100644
index 208849a..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Convert HBase tabular data into a format that is consumable by Map/Reduce.
- */
-@InterfaceAudience.Public
-public class TableInputFormat extends TableInputFormatBase implements
- JobConfigurable {
- private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
-
- /**
- * space delimited list of columns
- */
- public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
-
- public void configure(JobConf job) {
- try {
- initialize(job);
- } catch (Exception e) {
- LOG.error(StringUtils.stringifyException(e));
- }
- }
-
- @Override
- protected void initialize(JobConf job) throws IOException {
- Path[] tableNames = FileInputFormat.getInputPaths(job);
- String colArg = job.get(COLUMN_LIST);
- String[] colNames = colArg.split(" ");
- byte [][] m_cols = new byte[colNames.length][];
- for (int i = 0; i < m_cols.length; i++) {
- m_cols[i] = Bytes.toBytes(colNames[i]);
- }
- setInputColumns(m_cols);
- Connection connection = ConnectionFactory.createConnection(job);
- initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
- }
-
- public void validateInput(JobConf job) throws IOException {
- // expecting exactly one path
- Path [] tableNames = FileInputFormat.getInputPaths(job);
- if (tableNames == null || tableNames.length > 1) {
- throw new IOException("expecting one table name");
- }
-
- // connected to table?
- if (getTable() == null) {
- throw new IOException("could not connect to table '" +
- tableNames[0].getName() + "'");
- }
-
- // expecting at least one column
- String colArg = job.get(COLUMN_LIST);
- if (colArg == null || colArg.length() == 0) {
- throw new IOException("expecting at least one column");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
deleted file mode 100644
index c65810f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.Closeable;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * A Base for {@link TableInputFormat}s. Receives a {@link Table}, a
- * byte[] of input columns and optionally a {@link Filter}.
- * Subclasses may use other TableRecordReader implementations.
- *
- * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
- * function properly. Each of the entry points to this class used by the MapReduce framework,
- * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
- * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
- * retrieving the necessary configuration information. If your subclass overrides either of these
- * methods, either call the parent version or call initialize yourself.
- *
- * <p>
- * An example of a subclass:
- * <pre>
- * class ExampleTIF extends TableInputFormatBase {
- *
- * {@literal @}Override
- * protected void initialize(JobConf context) throws IOException {
- * // We are responsible for the lifecycle of this connection until we hand it over in
- * // initializeTable.
- * Connection connection =
- * ConnectionFactory.createConnection(HBaseConfiguration.create(job));
- * TableName tableName = TableName.valueOf("exampleTable");
- * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
- * initializeTable(connection, tableName);
- * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- * Bytes.toBytes("columnB") };
- * // mandatory
- * setInputColumns(inputColumns);
- * // optional, by default we'll get everything for the given columns.
- * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- * setRowFilter(exampleFilter);
- * }
- * }
- * </pre>
- */
-
-@InterfaceAudience.Public
-public abstract class TableInputFormatBase
-implements InputFormat<ImmutableBytesWritable, Result> {
- private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
- private byte [][] inputColumns;
- private Table table;
- private RegionLocator regionLocator;
- private Connection connection;
- private TableRecordReader tableRecordReader;
- private Filter rowFilter;
-
- private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
- "initialized. Ensure you call initializeTable either in your constructor or initialize " +
- "method";
- private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
- " previous error. Please look at the previous logs lines from" +
- " the task's full log for more details.";
-
- /**
- * Builds a TableRecordReader. If no TableRecordReader was provided, uses
- * the default.
- *
- * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
- * JobConf, Reporter)
- */
- public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
- InputSplit split, JobConf job, Reporter reporter)
- throws IOException {
- // In case a subclass uses the deprecated approach or calls initializeTable directly
- if (table == null) {
- initialize(job);
- }
- // null check in case our child overrides getTable to not throw.
- try {
- if (getTable() == null) {
- // initialize() must not have been implemented in the subclass.
- throw new IOException(INITIALIZATION_ERROR);
- }
- } catch (IllegalStateException exception) {
- throw new IOException(INITIALIZATION_ERROR, exception);
- }
-
- TableSplit tSplit = (TableSplit) split;
- // if no table record reader was provided use default
- final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :
- this.tableRecordReader;
- trr.setStartRow(tSplit.getStartRow());
- trr.setEndRow(tSplit.getEndRow());
- trr.setHTable(this.table);
- trr.setInputColumns(this.inputColumns);
- trr.setRowFilter(this.rowFilter);
- trr.init();
- return new RecordReader<ImmutableBytesWritable, Result>() {
-
- @Override
- public void close() throws IOException {
- trr.close();
- closeTable();
- }
-
- @Override
- public ImmutableBytesWritable createKey() {
- return trr.createKey();
- }
-
- @Override
- public Result createValue() {
- return trr.createValue();
- }
-
- @Override
- public long getPos() throws IOException {
- return trr.getPos();
- }
-
- @Override
- public float getProgress() throws IOException {
- return trr.getProgress();
- }
-
- @Override
- public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
- return trr.next(key, value);
- }
- };
- }
-
- /**
- * Calculates the splits that will serve as input for the map tasks.
- *
- * Splits are created in number equal to the smallest between numSplits and
- * the number of {@link org.apache.hadoop.hbase.regionserver.HRegion}s in the table.
- * If the number of splits is smaller than the number of
- * {@link org.apache.hadoop.hbase.regionserver.HRegion}s then splits are spanned across
- * multiple {@link org.apache.hadoop.hbase.regionserver.HRegion}s
- * and are grouped the most evenly possible. In the
- * case splits are uneven the bigger splits are placed first in the
- * {@link InputSplit} array.
- *
- * @param job the map task {@link JobConf}
- * @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
- *
- * @return the input splits
- *
- * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
- */
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- if (this.table == null) {
- initialize(job);
- }
- // null check in case our child overrides getTable to not throw.
- try {
- if (getTable() == null) {
- // initialize() must not have been implemented in the subclass.
- throw new IOException(INITIALIZATION_ERROR);
- }
- } catch (IllegalStateException exception) {
- throw new IOException(INITIALIZATION_ERROR, exception);
- }
-
- byte [][] startKeys = this.regionLocator.getStartKeys();
- if (startKeys == null || startKeys.length == 0) {
- throw new IOException("Expecting at least one region");
- }
- if (this.inputColumns == null || this.inputColumns.length == 0) {
- throw new IOException("Expecting at least one column");
- }
- int realNumSplits = numSplits > startKeys.length? startKeys.length:
- numSplits;
- InputSplit[] splits = new InputSplit[realNumSplits];
- int middle = startKeys.length / realNumSplits;
- int startPos = 0;
- for (int i = 0; i < realNumSplits; i++) {
- int lastPos = startPos + middle;
- lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
- String regionLocation = regionLocator.getRegionLocation(startKeys[startPos]).
- getHostname();
- splits[i] = new TableSplit(this.table.getName(),
- startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
- HConstants.EMPTY_START_ROW, regionLocation);
- LOG.info("split: " + i + "->" + splits[i]);
- startPos = lastPos;
- }
- return splits;
- }
-
- /**
- * Allows subclasses to initialize the table information.
- *
- * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.
- * @param tableName The {@link TableName} of the table to process.
- * @throws IOException
- */
- protected void initializeTable(Connection connection, TableName tableName) throws IOException {
- if (this.table != null || this.connection != null) {
- LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
- "reference; TableInputFormatBase will not close these old references when done.");
- }
- this.table = connection.getTable(tableName);
- this.regionLocator = connection.getRegionLocator(tableName);
- this.connection = connection;
- }
-
- /**
- * @param inputColumns to be passed in {@link Result} to the map task.
- */
- protected void setInputColumns(byte [][] inputColumns) {
- this.inputColumns = inputColumns;
- }
-
- /**
- * Allows subclasses to get the {@link Table}.
- */
- protected Table getTable() {
- if (table == null) {
- throw new IllegalStateException(NOT_INITIALIZED);
- }
- return this.table;
- }
-
- /**
- * Allows subclasses to set the {@link TableRecordReader}.
- *
- * @param tableRecordReader
- * to provide other {@link TableRecordReader} implementations.
- */
- protected void setTableRecordReader(TableRecordReader tableRecordReader) {
- this.tableRecordReader = tableRecordReader;
- }
-
- /**
- * Allows subclasses to set the {@link Filter} to be used.
- *
- * @param rowFilter
- */
- protected void setRowFilter(Filter rowFilter) {
- this.rowFilter = rowFilter;
- }
-
- /**
- * Handle subclass specific set up.
- * Each of the entry points used by the MapReduce framework,
- * {@link #getRecordReader(InputSplit, JobConf, Reporter)} and {@link #getSplits(JobConf, int)},
- * will call {@link #initialize(JobConf)} as a convenient centralized location to handle
- * retrieving the necessary configuration information and calling
- * {@link #initializeTable(Connection, TableName)}.
- *
- * Subclasses should implement their initialize call such that it is safe to call multiple times.
- * The current TableInputFormatBase implementation relies on a non-null table reference to decide
- * if an initialize call is needed, but this behavior may change in the future. In particular,
- * it is critical that initializeTable not be called multiple times since this will leak
- * Connection instances.
- *
- */
- protected void initialize(JobConf job) throws IOException {
- }
-
- /**
- * Close the Table and related objects that were initialized via
- * {@link #initializeTable(Connection, TableName)}.
- *
- * @throws IOException
- */
- protected void closeTable() throws IOException {
- close(table, connection);
- table = null;
- connection = null;
- }
-
- private void close(Closeable... closables) throws IOException {
- for (Closeable c : closables) {
- if(c != null) { c.close(); }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
deleted file mode 100644
index a9f1e61..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMap.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.Mapper;
-
-/**
- * Scan an HBase table to sort by a specified sort column.
- * If the column does not exist, the record is not passed to Reduce.
- *
- * @param <K> WritableComparable key class
- * @param <V> Writable value class
- */
-@InterfaceAudience.Public
-public interface TableMap<K extends WritableComparable<? super K>, V>
-extends Mapper<ImmutableBytesWritable, Result, K, V> {
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
deleted file mode 100644
index 63ec418..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
+++ /dev/null
@@ -1,376 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
-import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.token.TokenUtil;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-/**
- * Utility for {@link TableMap} and {@link TableReduce}
- */
-@InterfaceAudience.Public
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class TableMapReduceUtil {
-
- /**
- * Use this before submitting a TableMap job. It will
- * appropriately set up the JobConf.
- *
- * @param table The table name to read from.
- * @param columns The columns to scan.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job configuration to adjust.
- */
- public static void initTableMapJob(String table, String columns,
- Class<? extends TableMap> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, JobConf job) {
- initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
- true, TableInputFormat.class);
- }
-
- public static void initTableMapJob(String table, String columns,
- Class<? extends TableMap> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
- initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
- addDependencyJars, TableInputFormat.class);
- }
-
- /**
- * Use this before submitting a TableMap job. It will
- * appropriately set up the JobConf.
- *
- * @param table The table name to read from.
- * @param columns The columns to scan.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job configuration to adjust.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- */
- public static void initTableMapJob(String table, String columns,
- Class<? extends TableMap> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
- Class<? extends InputFormat> inputFormat) {
-
- job.setInputFormat(inputFormat);
- job.setMapOutputValueClass(outputValueClass);
- job.setMapOutputKeyClass(outputKeyClass);
- job.setMapperClass(mapper);
- job.setStrings("io.serializations", job.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName());
- FileInputFormat.addInputPaths(job, table);
- job.set(TableInputFormat.COLUMN_LIST, columns);
- if (addDependencyJars) {
- try {
- addDependencyJars(job);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- try {
- initCredentials(job);
- } catch (IOException ioe) {
- // just spit out the stack trace? really?
- ioe.printStackTrace();
- }
- }
-
- /**
- * Sets up the job for reading from one or more multiple table snapshots, with one or more scans
- * per snapshot.
- * It bypasses hbase servers and read directly from snapshot files.
- *
- * @param snapshotScans map of snapshot name to scans on that snapshot.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- */
- public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
- Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
- JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
- MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
-
- job.setInputFormat(MultiTableSnapshotInputFormat.class);
- if (outputValueClass != null) {
- job.setMapOutputValueClass(outputValueClass);
- }
- if (outputKeyClass != null) {
- job.setMapOutputKeyClass(outputKeyClass);
- }
- job.setMapperClass(mapper);
- if (addDependencyJars) {
- addDependencyJars(job);
- }
-
- org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
- }
-
- /**
- * Sets up the job for reading from a table snapshot. It bypasses hbase servers
- * and read directly from snapshot files.
- *
- * @param snapshotName The name of the snapshot (of a table) to read from.
- * @param columns The columns to scan.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
- * have write permissions to this directory, and this should not be a subdirectory of rootdir.
- * After the job is finished, restore directory can be deleted.
- * @throws IOException When setting up the details fails.
- * @see TableSnapshotInputFormat
- */
- public static void initTableSnapshotMapJob(String snapshotName, String columns,
- Class<? extends TableMap> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, JobConf job,
- boolean addDependencyJars, Path tmpRestoreDir)
- throws IOException {
- TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
- initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
- addDependencyJars, TableSnapshotInputFormat.class);
- org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job configuration to adjust.
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReduceJob(String table,
- Class<? extends TableReduce> reducer, JobConf job)
- throws IOException {
- initTableReduceJob(table, reducer, job, null);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job configuration to adjust.
- * @param partitioner Partitioner to use. Pass <code>null</code> to use
- * default partitioner.
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReduceJob(String table,
- Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
- throws IOException {
- initTableReduceJob(table, reducer, job, partitioner, true);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job configuration to adjust.
- * @param partitioner Partitioner to use. Pass <code>null</code> to use
- * default partitioner.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReduceJob(String table,
- Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
- boolean addDependencyJars) throws IOException {
- job.setOutputFormat(TableOutputFormat.class);
- job.setReducerClass(reducer);
- job.set(TableOutputFormat.OUTPUT_TABLE, table);
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(Put.class);
- job.setStrings("io.serializations", job.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName());
- if (partitioner == HRegionPartitioner.class) {
- job.setPartitionerClass(HRegionPartitioner.class);
- int regions =
- MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
- if (job.getNumReduceTasks() > regions) {
- job.setNumReduceTasks(regions);
- }
- } else if (partitioner != null) {
- job.setPartitionerClass(partitioner);
- }
- if (addDependencyJars) {
- addDependencyJars(job);
- }
- initCredentials(job);
- }
-
- public static void initCredentials(JobConf job) throws IOException {
- UserProvider userProvider = UserProvider.instantiate(job);
- if (userProvider.isHadoopSecurityEnabled()) {
- // propagate delegation related props from launcher job to MR job
- if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
- job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
- }
- }
-
- if (userProvider.isHBaseSecurityEnabled()) {
- Connection conn = ConnectionFactory.createConnection(job);
- try {
- // login the server principal (if using secure Hadoop)
- User user = userProvider.getCurrent();
- TokenUtil.addTokenForJob(conn, job, user);
- } catch (InterruptedException ie) {
- ie.printStackTrace();
- Thread.currentThread().interrupt();
- } finally {
- conn.close();
- }
- }
- }
-
- /**
- * Ensures that the given number of reduce tasks for the given job
- * configuration does not exceed the number of regions for the given table.
- *
- * @param table The table to get the region count for.
- * @param job The current job configuration to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- // Used by tests.
- public static void limitNumReduceTasks(String table, JobConf job)
- throws IOException {
- int regions =
- MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
- if (job.getNumReduceTasks() > regions)
- job.setNumReduceTasks(regions);
- }
-
- /**
- * Ensures that the given number of map tasks for the given job
- * configuration does not exceed the number of regions for the given table.
- *
- * @param table The table to get the region count for.
- * @param job The current job configuration to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- // Used by tests.
- public static void limitNumMapTasks(String table, JobConf job)
- throws IOException {
- int regions =
- MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
- if (job.getNumMapTasks() > regions)
- job.setNumMapTasks(regions);
- }
-
- /**
- * Sets the number of reduce tasks for the given job configuration to the
- * number of regions the given table has.
- *
- * @param table The table to get the region count for.
- * @param job The current job configuration to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- public static void setNumReduceTasks(String table, JobConf job)
- throws IOException {
- job.setNumReduceTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
- TableName.valueOf(table)));
- }
-
- /**
- * Sets the number of map tasks for the given job configuration to the
- * number of regions the given table has.
- *
- * @param table The table to get the region count for.
- * @param job The current job configuration to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- public static void setNumMapTasks(String table, JobConf job)
- throws IOException {
- job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
- TableName.valueOf(table)));
- }
-
- /**
- * Sets the number of rows to return and cache with each scanner iteration.
- * Higher caching values will enable faster mapreduce jobs at the expense of
- * requiring more heap to contain the cached rows.
- *
- * @param job The current job configuration to adjust.
- * @param batchSize The number of rows to return in batch with each scanner
- * iteration.
- */
- public static void setScannerCaching(JobConf job, int batchSize) {
- job.setInt("hbase.client.scanner.caching", batchSize);
- }
-
- /**
- * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
- */
- public static void addDependencyJars(JobConf job) throws IOException {
- org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
- org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(
- job,
- // when making changes here, consider also mapreduce.TableMapReduceUtil
- // pull job classes
- job.getMapOutputKeyClass(),
- job.getMapOutputValueClass(),
- job.getOutputKeyClass(),
- job.getOutputValueClass(),
- job.getPartitionerClass(),
- job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
- job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
- job.getCombinerClass());
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
deleted file mode 100644
index 8878eee..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableOutputFormat.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileAlreadyExistsException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.InvalidJobConfException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Progressable;
-
-/**
- * Convert Map/Reduce output and write it to an HBase table
- */
-@InterfaceAudience.Public
-public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable, Put> {
-
- /** JobConf parameter that specifies the output table */
- public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
-
- /**
- * Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
- * and write to an HBase table.
- */
- protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
- private BufferedMutator m_mutator;
- private Connection conn;
-
-
- /**
- * Instantiate a TableRecordWriter with the HBase HClient for writing.
- *
- * @deprecated Please use {@code #TableRecordWriter(JobConf)} This version does not clean up
- * connections and will leak connections (removed in 2.0)
- */
- @Deprecated
- public TableRecordWriter(final BufferedMutator mutator) throws IOException {
- this.m_mutator = mutator;
- this.conn = null;
- }
-
- /**
- * Instantiate a TableRecordWriter with a BufferedMutator for batch writing.
- */
- public TableRecordWriter(JobConf job) throws IOException {
- // expecting exactly one path
- TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
- try {
- this.conn = ConnectionFactory.createConnection(job);
- this.m_mutator = conn.getBufferedMutator(tableName);
- } finally {
- if (this.m_mutator == null) {
- conn.close();
- conn = null;
- }
- }
- }
-
- public void close(Reporter reporter) throws IOException {
- try {
- if (this.m_mutator != null) {
- this.m_mutator.close();
- }
- } finally {
- if (conn != null) {
- this.conn.close();
- }
- }
- }
-
- public void write(ImmutableBytesWritable key, Put value) throws IOException {
- m_mutator.mutate(new Put(value));
- }
- }
-
- /**
- * Creates a new record writer.
- *
- * Be aware that the baseline javadoc gives the impression that there is a single
- * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
- * RecordWriter per call of this method. You must close the returned RecordWriter when done.
- * Failure to do so will drop writes.
- *
- * @param ignored Ignored filesystem
- * @param job Current JobConf
- * @param name Name of the job
- * @param progress
- * @return The newly created writer instance.
- * @throws IOException When creating the writer fails.
- */
- @Override
- public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
- Progressable progress)
- throws IOException {
- // Clear write buffer on fail is true by default so no need to reset it.
- return new TableRecordWriter(job);
- }
-
- @Override
- public void checkOutputSpecs(FileSystem ignored, JobConf job)
- throws FileAlreadyExistsException, InvalidJobConfException, IOException {
- String tableName = job.get(OUTPUT_TABLE);
- if (tableName == null) {
- throw new IOException("Must specify table name");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
deleted file mode 100644
index cecef7d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReader.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.RecordReader;
-
-
-/**
- * Iterate over an HBase table data, return (Text, RowResult) pairs
- */
-@InterfaceAudience.Public
-public class TableRecordReader
-implements RecordReader<ImmutableBytesWritable, Result> {
-
- private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
-
- /**
- * Restart from survivable exceptions by creating a new scanner.
- *
- * @param firstRow
- * @throws IOException
- */
- public void restart(byte[] firstRow) throws IOException {
- this.recordReaderImpl.restart(firstRow);
- }
-
- /**
- * Build the scanner. Not done in constructor to allow for extension.
- *
- * @throws IOException
- */
- public void init() throws IOException {
- this.recordReaderImpl.restart(this.recordReaderImpl.getStartRow());
- }
-
- /**
- * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
- */
- public void setHTable(Table htable) {
- this.recordReaderImpl.setHTable(htable);
- }
-
- /**
- * @param inputColumns the columns to be placed in {@link Result}.
- */
- public void setInputColumns(final byte [][] inputColumns) {
- this.recordReaderImpl.setInputColumns(inputColumns);
- }
-
- /**
- * @param startRow the first row in the split
- */
- public void setStartRow(final byte [] startRow) {
- this.recordReaderImpl.setStartRow(startRow);
- }
-
- /**
- *
- * @param endRow the last row in the split
- */
- public void setEndRow(final byte [] endRow) {
- this.recordReaderImpl.setEndRow(endRow);
- }
-
- /**
- * @param rowFilter the {@link Filter} to be used.
- */
- public void setRowFilter(Filter rowFilter) {
- this.recordReaderImpl.setRowFilter(rowFilter);
- }
-
- public void close() {
- this.recordReaderImpl.close();
- }
-
- /**
- * @return ImmutableBytesWritable
- *
- * @see org.apache.hadoop.mapred.RecordReader#createKey()
- */
- public ImmutableBytesWritable createKey() {
- return this.recordReaderImpl.createKey();
- }
-
- /**
- * @return RowResult
- *
- * @see org.apache.hadoop.mapred.RecordReader#createValue()
- */
- public Result createValue() {
- return this.recordReaderImpl.createValue();
- }
-
- public long getPos() {
-
- // This should be the ordinal tuple in the range;
- // not clear how to calculate...
- return this.recordReaderImpl.getPos();
- }
-
- public float getProgress() {
- // Depends on the total number of tuples and getPos
- return this.recordReaderImpl.getPos();
- }
-
- /**
- * @param key HStoreKey as input key.
- * @param value MapWritable as input value
- * @return true if there was more data
- * @throws IOException
- */
- public boolean next(ImmutableBytesWritable key, Result value)
- throws IOException {
- return this.recordReaderImpl.next(key, value);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
deleted file mode 100644
index f6b79c3..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.ScannerCallable;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.StringUtils;
-
-import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT;
-
-/**
- * Iterate over an HBase table data, return (Text, RowResult) pairs
- */
-@InterfaceAudience.Public
-public class TableRecordReaderImpl {
- private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
-
- private byte [] startRow;
- private byte [] endRow;
- private byte [] lastSuccessfulRow;
- private Filter trrRowFilter;
- private ResultScanner scanner;
- private Table htable;
- private byte [][] trrInputColumns;
- private long timestamp;
- private int rowcount;
- private boolean logScannerActivity = false;
- private int logPerRowCount = 100;
-
- /**
- * Restart from survivable exceptions by creating a new scanner.
- *
- * @param firstRow
- * @throws IOException
- */
- public void restart(byte[] firstRow) throws IOException {
- Scan currentScan;
- if ((endRow != null) && (endRow.length > 0)) {
- if (trrRowFilter != null) {
- Scan scan = new Scan(firstRow, endRow);
- TableInputFormat.addColumns(scan, trrInputColumns);
- scan.setFilter(trrRowFilter);
- scan.setCacheBlocks(false);
- this.scanner = this.htable.getScanner(scan);
- currentScan = scan;
- } else {
- LOG.debug("TIFB.restart, firstRow: " +
- Bytes.toStringBinary(firstRow) + ", endRow: " +
- Bytes.toStringBinary(endRow));
- Scan scan = new Scan(firstRow, endRow);
- TableInputFormat.addColumns(scan, trrInputColumns);
- this.scanner = this.htable.getScanner(scan);
- currentScan = scan;
- }
- } else {
- LOG.debug("TIFB.restart, firstRow: " +
- Bytes.toStringBinary(firstRow) + ", no endRow");
-
- Scan scan = new Scan(firstRow);
- TableInputFormat.addColumns(scan, trrInputColumns);
- scan.setFilter(trrRowFilter);
- this.scanner = this.htable.getScanner(scan);
- currentScan = scan;
- }
- if (logScannerActivity) {
- LOG.info("Current scan=" + currentScan.toString());
- timestamp = System.currentTimeMillis();
- rowcount = 0;
- }
- }
-
- /**
- * Build the scanner. Not done in constructor to allow for extension.
- *
- * @throws IOException
- */
- public void init() throws IOException {
- restart(startRow);
- }
-
- byte[] getStartRow() {
- return this.startRow;
- }
- /**
- * @param htable the {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
- */
- public void setHTable(Table htable) {
- Configuration conf = htable.getConfiguration();
- logScannerActivity = conf.getBoolean(
- ScannerCallable.LOG_SCANNER_ACTIVITY, false);
- logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
- this.htable = htable;
- }
-
- /**
- * @param inputColumns the columns to be placed in {@link Result}.
- */
- public void setInputColumns(final byte [][] inputColumns) {
- this.trrInputColumns = inputColumns;
- }
-
- /**
- * @param startRow the first row in the split
- */
- public void setStartRow(final byte [] startRow) {
- this.startRow = startRow;
- }
-
- /**
- *
- * @param endRow the last row in the split
- */
- public void setEndRow(final byte [] endRow) {
- this.endRow = endRow;
- }
-
- /**
- * @param rowFilter the {@link Filter} to be used.
- */
- public void setRowFilter(Filter rowFilter) {
- this.trrRowFilter = rowFilter;
- }
-
- public void close() {
- if (this.scanner != null) {
- this.scanner.close();
- }
- try {
- this.htable.close();
- } catch (IOException ioe) {
- LOG.warn("Error closing table", ioe);
- }
- }
-
- /**
- * @return ImmutableBytesWritable
- *
- * @see org.apache.hadoop.mapred.RecordReader#createKey()
- */
- public ImmutableBytesWritable createKey() {
- return new ImmutableBytesWritable();
- }
-
- /**
- * @return RowResult
- *
- * @see org.apache.hadoop.mapred.RecordReader#createValue()
- */
- public Result createValue() {
- return new Result();
- }
-
- public long getPos() {
- // This should be the ordinal tuple in the range;
- // not clear how to calculate...
- return 0;
- }
-
- public float getProgress() {
- // Depends on the total number of tuples and getPos
- return 0;
- }
-
- /**
- * @param key HStoreKey as input key.
- * @param value MapWritable as input value
- * @return true if there was more data
- * @throws IOException
- */
- public boolean next(ImmutableBytesWritable key, Result value)
- throws IOException {
- Result result;
- try {
- try {
- result = this.scanner.next();
- if (logScannerActivity) {
- rowcount ++;
- if (rowcount >= logPerRowCount) {
- long now = System.currentTimeMillis();
- LOG.info("Mapper took " + (now-timestamp)
- + "ms to process " + rowcount + " rows");
- timestamp = now;
- rowcount = 0;
- }
- }
- } catch (IOException e) {
- // do not retry if the exception tells us not to do so
- if (e instanceof DoNotRetryIOException) {
- throw e;
- }
- // try to handle all other IOExceptions by restarting
- // the scanner, if the second call fails, it will be rethrown
- LOG.debug("recovered from " + StringUtils.stringifyException(e));
- if (lastSuccessfulRow == null) {
- LOG.warn("We are restarting the first next() invocation," +
- " if your mapper has restarted a few other times like this" +
- " then you should consider killing this job and investigate" +
- " why it's taking so long.");
- }
- if (lastSuccessfulRow == null) {
- restart(startRow);
- } else {
- restart(lastSuccessfulRow);
- this.scanner.next(); // skip presumed already mapped row
- }
- result = this.scanner.next();
- }
-
- if (result != null && result.size() > 0) {
- key.set(result.getRow());
- lastSuccessfulRow = key.get();
- value.copyFrom(result);
- return true;
- }
- return false;
- } catch (IOException ioe) {
- if (logScannerActivity) {
- long now = System.currentTimeMillis();
- LOG.info("Mapper took " + (now-timestamp)
- + "ms to process " + rowcount + " rows");
- LOG.info(ioe);
- String lastRow = lastSuccessfulRow == null ?
- "null" : Bytes.toStringBinary(lastSuccessfulRow);
- LOG.info("lastSuccessfulRow=" + lastRow);
- }
- throw ioe;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
deleted file mode 100644
index 91fb4a1..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableReduce.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.Reducer;
-
-/**
- * Write a table, sorting by the input key
- *
- * @param <K> key class
- * @param <V> value class
- */
-@InterfaceAudience.Public
-@SuppressWarnings("unchecked")
-public interface TableReduce<K extends WritableComparable, V>
-extends Reducer<K, V, ImmutableBytesWritable, Put> {
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
deleted file mode 100644
index d7b49ff..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-/**
- * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
- * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- */
-@InterfaceAudience.Public
-public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
-
- public static class TableSnapshotRegionSplit implements InputSplit {
- private TableSnapshotInputFormatImpl.InputSplit delegate;
-
- // constructor for mapreduce framework / Writable
- public TableSnapshotRegionSplit() {
- this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
- }
-
- public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
- this.delegate = delegate;
- }
-
- public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
- List<String> locations, Scan scan, Path restoreDir) {
- this.delegate =
- new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
- }
-
- @Override
- public long getLength() throws IOException {
- return delegate.getLength();
- }
-
- @Override
- public String[] getLocations() throws IOException {
- return delegate.getLocations();
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- delegate.write(out);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- delegate.readFields(in);
- }
- }
-
- static class TableSnapshotRecordReader
- implements RecordReader<ImmutableBytesWritable, Result> {
-
- private TableSnapshotInputFormatImpl.RecordReader delegate;
-
- public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
- throws IOException {
- delegate = new TableSnapshotInputFormatImpl.RecordReader();
- delegate.initialize(split.delegate, job);
- }
-
- @Override
- public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
- if (!delegate.nextKeyValue()) {
- return false;
- }
- ImmutableBytesWritable currentKey = delegate.getCurrentKey();
- key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
- value.copyFrom(delegate.getCurrentValue());
- return true;
- }
-
- @Override
- public ImmutableBytesWritable createKey() {
- return new ImmutableBytesWritable();
- }
-
- @Override
- public Result createValue() {
- return new Result();
- }
-
- @Override
- public long getPos() throws IOException {
- return delegate.getPos();
- }
-
- @Override
- public void close() throws IOException {
- delegate.close();
- }
-
- @Override
- public float getProgress() throws IOException {
- return delegate.getProgress();
- }
- }
-
- @Override
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- List<TableSnapshotInputFormatImpl.InputSplit> splits =
- TableSnapshotInputFormatImpl.getSplits(job);
- InputSplit[] results = new InputSplit[splits.size()];
- for (int i = 0; i < splits.size(); i++) {
- results[i] = new TableSnapshotRegionSplit(splits.get(i));
- }
- return results;
- }
-
- @Override
- public RecordReader<ImmutableBytesWritable, Result>
- getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
- return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
- }
-
- /**
- * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
- * @param job the job to configure
- * @param snapshotName the name of the snapshot to read from
- * @param restoreDir a temporary directory to restore the snapshot into. Current user should
- * have write permissions to this directory, and this should not be a subdirectory of rootdir.
- * After the job is finished, restoreDir can be deleted.
- * @throws IOException if an error occurs
- */
- public static void setInput(JobConf job, String snapshotName, Path restoreDir)
- throws IOException {
- TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
deleted file mode 100644
index 0784e5e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableSplit.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.InputSplit;
-
-/**
- * A table split corresponds to a key range [low, high)
- */
-@InterfaceAudience.Public
-public class TableSplit implements InputSplit, Comparable<TableSplit> {
- private TableName m_tableName;
- private byte [] m_startRow;
- private byte [] m_endRow;
- private String m_regionLocation;
-
- /** default constructor */
- public TableSplit() {
- this((TableName)null, HConstants.EMPTY_BYTE_ARRAY,
- HConstants.EMPTY_BYTE_ARRAY, "");
- }
-
- /**
- * Constructor
- * @param tableName
- * @param startRow
- * @param endRow
- * @param location
- */
- public TableSplit(TableName tableName, byte [] startRow, byte [] endRow,
- final String location) {
- this.m_tableName = tableName;
- this.m_startRow = startRow;
- this.m_endRow = endRow;
- this.m_regionLocation = location;
- }
-
- public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
- final String location) {
- this(TableName.valueOf(tableName), startRow, endRow,
- location);
- }
-
- /** @return table name */
- public TableName getTable() {
- return this.m_tableName;
- }
-
- /** @return table name */
- public byte [] getTableName() {
- return this.m_tableName.getName();
- }
-
- /** @return starting row key */
- public byte [] getStartRow() {
- return this.m_startRow;
- }
-
- /** @return end row key */
- public byte [] getEndRow() {
- return this.m_endRow;
- }
-
- /** @return the region's hostname */
- public String getRegionLocation() {
- return this.m_regionLocation;
- }
-
- public String[] getLocations() {
- return new String[] {this.m_regionLocation};
- }
-
- public long getLength() {
- // Not clear how to obtain this... seems to be used only for sorting splits
- return 0;
- }
-
- public void readFields(DataInput in) throws IOException {
- this.m_tableName = TableName.valueOf(Bytes.readByteArray(in));
- this.m_startRow = Bytes.readByteArray(in);
- this.m_endRow = Bytes.readByteArray(in);
- this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
- }
-
- public void write(DataOutput out) throws IOException {
- Bytes.writeByteArray(out, this.m_tableName.getName());
- Bytes.writeByteArray(out, this.m_startRow);
- Bytes.writeByteArray(out, this.m_endRow);
- Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("HBase table split(");
- sb.append("table name: ").append(m_tableName);
- sb.append(", start row: ").append(Bytes.toStringBinary(m_startRow));
- sb.append(", end row: ").append(Bytes.toStringBinary(m_endRow));
- sb.append(", region location: ").append(m_regionLocation);
- sb.append(")");
- return sb.toString();
- }
-
- @Override
- public int compareTo(TableSplit o) {
- return Bytes.compareTo(getStartRow(), o.getStartRow());
- }
-
- @Override
- public boolean equals(Object o) {
- if (o == null || !(o instanceof TableSplit)) {
- return false;
- }
- TableSplit other = (TableSplit)o;
- return m_tableName.equals(other.m_tableName) &&
- Bytes.equals(m_startRow, other.m_startRow) &&
- Bytes.equals(m_endRow, other.m_endRow) &&
- m_regionLocation.equals(other.m_regionLocation);
- }
-
- @Override
- public int hashCode() {
- int result = m_tableName != null ? m_tableName.hashCode() : 0;
- result = 31 * result + Arrays.hashCode(m_startRow);
- result = 31 * result + Arrays.hashCode(m_endRow);
- result = 31 * result + (m_regionLocation != null ? m_regionLocation.hashCode() : 0);
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
deleted file mode 100644
index 8a2a363..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/package-info.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
-Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
-Input/OutputFormats, a table indexing MapReduce job, and utility methods.
-
-<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
-in the HBase Reference Guide for mapreduce over hbase documentation.
-*/
-package org.apache.hadoop.hbase.mapred;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
deleted file mode 100644
index 078033e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
+++ /dev/null
@@ -1,333 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-
-
-/**
- * A job with a a map and reduce phase to count cells in a table.
- * The counter lists the following stats for a given table:
- * <pre>
- * 1. Total number of rows in the table
- * 2. Total number of CFs across all rows
- * 3. Total qualifiers across all rows
- * 4. Total occurrence of each CF
- * 5. Total occurrence of each qualifier
- * 6. Total number of versions of each qualifier.
- * </pre>
- *
- * The cellcounter can take optional parameters to use a user
- * supplied row/family/qualifier string to use in the report and
- * second a regex based or prefix based row filter to restrict the
- * count operation to a limited subset of rows from the table or a
- * start time and/or end time to limit the count to a time range.
- */
-@InterfaceAudience.Public
-public class CellCounter extends Configured implements Tool {
- private static final Log LOG =
- LogFactory.getLog(CellCounter.class.getName());
-
-
- /**
- * Name of this 'program'.
- */
- static final String NAME = "CellCounter";
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- /**
- * Mapper that runs the count.
- */
- static class CellCounterMapper
- extends TableMapper<Text, IntWritable> {
- /**
- * Counter enumeration to count the actual rows.
- */
- public static enum Counters {
- ROWS,
- CELLS
- }
-
- private Configuration conf;
- private String separator;
-
- // state of current row, family, column needs to persist across map() invocations
- // in order to properly handle scanner batching, where a single qualifier may have too
- // many versions for a single map() call
- private byte[] lastRow;
- private String currentRowKey;
- byte[] currentFamily = null;
- String currentFamilyName = null;
- byte[] currentQualifier = null;
- // family + qualifier
- String currentQualifierName = null;
- // rowkey + family + qualifier
- String currentRowQualifierName = null;
-
- @Override
- protected void setup(Context context) throws IOException, InterruptedException {
- conf = context.getConfiguration();
- separator = conf.get("ReportSeparator",":");
- }
-
- /**
- * Maps the data.
- *
- * @param row The current table row key.
- * @param values The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
- * org.apache.hadoop.mapreduce.Mapper.Context)
- */
-
- @Override
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
- justification="Findbugs is blind to the Precondition null check")
- public void map(ImmutableBytesWritable row, Result values,
- Context context)
- throws IOException {
- Preconditions.checkState(values != null,
- "values passed to the map is null");
-
- try {
- byte[] currentRow = values.getRow();
- if (lastRow == null || !Bytes.equals(lastRow, currentRow)) {
- lastRow = currentRow;
- currentRowKey = Bytes.toStringBinary(currentRow);
- currentFamily = null;
- currentQualifier = null;
- context.getCounter(Counters.ROWS).increment(1);
- context.write(new Text("Total ROWS"), new IntWritable(1));
- }
- if (!values.isEmpty()) {
- int cellCount = 0;
- for (Cell value : values.listCells()) {
- cellCount++;
- if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {
- currentFamily = CellUtil.cloneFamily(value);
- currentFamilyName = Bytes.toStringBinary(currentFamily);
- currentQualifier = null;
- context.getCounter("CF", currentFamilyName).increment(1);
- if (1 == context.getCounter("CF", currentFamilyName).getValue()) {
- context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
- context.write(new Text(currentFamily), new IntWritable(1));
- }
- }
- if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {
- currentQualifier = CellUtil.cloneQualifier(value);
- currentQualifierName = currentFamilyName + separator +
- Bytes.toStringBinary(currentQualifier);
- currentRowQualifierName = currentRowKey + separator + currentQualifierName;
-
- context.write(new Text("Total Qualifiers across all Rows"),
- new IntWritable(1));
- context.write(new Text(currentQualifierName), new IntWritable(1));
- }
- // Increment versions
- context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));
- }
- context.getCounter(Counters.CELLS).increment(cellCount);
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
-
- static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
- Key, IntWritable> {
-
- private IntWritable result = new IntWritable();
- public void reduce(Key key, Iterable<IntWritable> values,
- Context context)
- throws IOException, InterruptedException {
- int sum = 0;
- for (IntWritable val : values) {
- sum += val.get();
- }
- result.set(sum);
- context.write(key, result);
- }
- }
-
- /**
- * Sets up the actual job.
- *
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public static Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException {
- String tableName = args[0];
- Path outputDir = new Path(args[1]);
- String reportSeparatorString = (args.length > 2) ? args[2]: ":";
- conf.set("ReportSeparator", reportSeparatorString);
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJarByClass(CellCounter.class);
- Scan scan = getConfiguredScanForJob(conf, args);
- TableMapReduceUtil.initTableMapperJob(tableName, scan,
- CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
- job.setNumReduceTasks(1);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(IntWritable.class);
- job.setOutputFormatClass(TextOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(IntWritable.class);
- FileOutputFormat.setOutputPath(job, outputDir);
- job.setReducerClass(IntSumReducer.class);
- return job;
- }
-
- private static Scan getConfiguredScanForJob(Configuration conf, String[] args)
- throws IOException {
- // create scan with any properties set from TableInputFormat
- Scan s = TableInputFormat.createScanFromConfiguration(conf);
- // Set Scan Versions
- if (conf.get(TableInputFormat.SCAN_MAXVERSIONS) == null) {
- // default to all versions unless explicitly set
- s.setMaxVersions(Integer.MAX_VALUE);
- }
- s.setCacheBlocks(false);
- // Set RowFilter or Prefix Filter if applicable.
- Filter rowFilter = getRowFilter(args);
- if (rowFilter!= null) {
- LOG.info("Setting Row Filter for counter.");
- s.setFilter(rowFilter);
- }
- // Set TimeRange if defined
- long timeRange[] = getTimeRange(args);
- if (timeRange != null) {
- LOG.info("Setting TimeRange for counter.");
- s.setTimeRange(timeRange[0], timeRange[1]);
- }
- return s;
- }
-
-
- private static Filter getRowFilter(String[] args) {
- Filter rowFilter = null;
- String filterCriteria = (args.length > 3) ? args[3]: null;
- if (filterCriteria == null) return null;
- if (filterCriteria.startsWith("^")) {
- String regexPattern = filterCriteria.substring(1, filterCriteria.length());
- rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
- } else {
- rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
- }
- return rowFilter;
- }
-
- private static long[] getTimeRange(String[] args) throws IOException {
- final String startTimeArgKey = "--starttime=";
- final String endTimeArgKey = "--endtime=";
- long startTime = 0L;
- long endTime = 0L;
-
- for (int i = 1; i < args.length; i++) {
- System.out.println("i:" + i + "arg[i]" + args[i]);
- if (args[i].startsWith(startTimeArgKey)) {
- startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
- }
- if (args[i].startsWith(endTimeArgKey)) {
- endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
- }
- }
-
- if (startTime == 0 && endTime == 0)
- return null;
-
- endTime = endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime;
- return new long [] {startTime, endTime};
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 2) {
- System.err.println("ERROR: Wrong number of parameters: " + args.length);
- System.err.println("Usage: CellCounter ");
- System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
- "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
- System.err.println(" Note: -D properties will be applied to the conf used. ");
- System.err.println(" Additionally, all of the SCAN properties from TableInputFormat");
- System.err.println(" can be specified to get fine grained control on what is counted..");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
- System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
- System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
- System.err.println(" -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
- System.err.println(" -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
- System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
- "string : used to separate the rowId/column family name and qualifier name.");
- System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
- "operation to a limited subset of rows from the table based on regex or prefix pattern.");
- return -1;
- }
- Job job = createSubmittableJob(getConf(), args);
- return (job.waitForCompletion(true) ? 0 : 1);
- }
-
- /**
- * Main entry point.
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int errCode = ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args);
- System.exit(errCode);
- }
-
-}
[31/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
new file mode 100644
index 0000000..e80410f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -0,0 +1,1111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.snapshot;
+
+import java.io.BufferedInputStream;
+import java.io.FileNotFoundException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileChecksum;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.io.FileLink;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.io.WALLink;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.util.AbstractHBaseTool;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+
+/**
+ * Export the specified snapshot to a given FileSystem.
+ *
+ * The .snapshot/name folder is copied to the destination cluster
+ * and then all the hfiles/wals are copied using a Map-Reduce Job in the .archive/ location.
+ * When everything is done, the second cluster can restore the snapshot.
+ */
+@InterfaceAudience.Public
+public class ExportSnapshot extends AbstractHBaseTool implements Tool {
+ public static final String NAME = "exportsnapshot";
+ /** Configuration prefix for overrides for the source filesystem */
+ public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
+ /** Configuration prefix for overrides for the destination filesystem */
+ public static final String CONF_DEST_PREFIX = NAME + ".to.";
+
+ private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
+
+ private static final String MR_NUM_MAPS = "mapreduce.job.maps";
+ private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
+ private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
+ private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
+ private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
+ private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
+ private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
+ private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
+ private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
+ private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
+ private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
+ private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
+ private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
+ protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
+
+ static class Testing {
+ static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
+ static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
+ int failuresCountToInject = 0;
+ int injectedFailureCount = 0;
+ }
+
+ // Command line options and defaults.
+ static final class Options {
+ static final Option SNAPSHOT = new Option(null, "snapshot", true, "Snapshot to restore.");
+ static final Option TARGET_NAME = new Option(null, "target", true,
+ "Target name for the snapshot.");
+ static final Option COPY_TO = new Option(null, "copy-to", true, "Remote "
+ + "destination hdfs://");
+ static final Option COPY_FROM = new Option(null, "copy-from", true,
+ "Input folder hdfs:// (default hbase.rootdir)");
+ static final Option NO_CHECKSUM_VERIFY = new Option(null, "no-checksum-verify", false,
+ "Do not verify checksum, use name+length only.");
+ static final Option NO_TARGET_VERIFY = new Option(null, "no-target-verify", false,
+ "Do not verify the integrity of the exported snapshot.");
+ static final Option OVERWRITE = new Option(null, "overwrite", false,
+ "Rewrite the snapshot manifest if already exists.");
+ static final Option CHUSER = new Option(null, "chuser", true,
+ "Change the owner of the files to the specified one.");
+ static final Option CHGROUP = new Option(null, "chgroup", true,
+ "Change the group of the files to the specified one.");
+ static final Option CHMOD = new Option(null, "chmod", true,
+ "Change the permission of the files to the specified one.");
+ static final Option MAPPERS = new Option(null, "mappers", true,
+ "Number of mappers to use during the copy (mapreduce.job.maps).");
+ static final Option BANDWIDTH = new Option(null, "bandwidth", true,
+ "Limit bandwidth to this value in MB/second.");
+ }
+
+ // Export Map-Reduce Counters, to keep track of the progress
+ public enum Counter {
+ MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
+ BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
+ }
+
+ private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
+ NullWritable, NullWritable> {
+ private static final Log LOG = LogFactory.getLog(ExportMapper.class);
+ final static int REPORT_SIZE = 1 * 1024 * 1024;
+ final static int BUFFER_SIZE = 64 * 1024;
+
+ private boolean verifyChecksum;
+ private String filesGroup;
+ private String filesUser;
+ private short filesMode;
+ private int bufferSize;
+
+ private FileSystem outputFs;
+ private Path outputArchive;
+ private Path outputRoot;
+
+ private FileSystem inputFs;
+ private Path inputArchive;
+ private Path inputRoot;
+
+ private static Testing testing = new Testing();
+
+ @Override
+ public void setup(Context context) throws IOException {
+ Configuration conf = context.getConfiguration();
+
+ Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+ Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+
+ verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
+
+ filesGroup = conf.get(CONF_FILES_GROUP);
+ filesUser = conf.get(CONF_FILES_USER);
+ filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
+ outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
+ inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
+
+ inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
+ outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
+
+ try {
+ srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+ inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
+ } catch (IOException e) {
+ throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
+ }
+
+ try {
+ destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+ outputFs = FileSystem.get(outputRoot.toUri(), destConf);
+ } catch (IOException e) {
+ throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
+ }
+
+ // Use the default block size of the outputFs if bigger
+ int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(outputRoot), BUFFER_SIZE);
+ bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
+ LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
+
+ for (Counter c : Counter.values()) {
+ context.getCounter(c).increment(0);
+ }
+ if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
+ testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
+ // Get number of times we have already injected failure based on attempt number of this
+ // task.
+ testing.injectedFailureCount = context.getTaskAttemptID().getId();
+ }
+ }
+
+ @Override
+ protected void cleanup(Context context) {
+ IOUtils.closeStream(inputFs);
+ IOUtils.closeStream(outputFs);
+ }
+
+ @Override
+ public void map(BytesWritable key, NullWritable value, Context context)
+ throws InterruptedException, IOException {
+ SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(key.copyBytes());
+ Path outputPath = getOutputPath(inputInfo);
+
+ copyFile(context, inputInfo, outputPath);
+ }
+
+ /**
+ * Returns the location where the inputPath will be copied.
+ */
+ private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
+ Path path = null;
+ switch (inputInfo.getType()) {
+ case HFILE:
+ Path inputPath = new Path(inputInfo.getHfile());
+ String family = inputPath.getParent().getName();
+ TableName table =HFileLink.getReferencedTableName(inputPath.getName());
+ String region = HFileLink.getReferencedRegionName(inputPath.getName());
+ String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
+ path = new Path(FSUtils.getTableDir(new Path("./"), table),
+ new Path(region, new Path(family, hfile)));
+ break;
+ case WAL:
+ LOG.warn("snapshot does not keeps WALs: " + inputInfo);
+ break;
+ default:
+ throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
+ }
+ return new Path(outputArchive, path);
+ }
+
+ /**
+ * Used by TestExportSnapshot to test for retries when failures happen.
+ * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
+ */
+ private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
+ throws IOException {
+ if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
+ if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
+ testing.injectedFailureCount++;
+ context.getCounter(Counter.COPY_FAILED).increment(1);
+ LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
+ throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
+ testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
+ }
+
+ private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
+ final Path outputPath) throws IOException {
+ // Get the file information
+ FileStatus inputStat = getSourceFileStatus(context, inputInfo);
+
+ // Verify if the output file exists and is the same that we want to copy
+ if (outputFs.exists(outputPath)) {
+ FileStatus outputStat = outputFs.getFileStatus(outputPath);
+ if (outputStat != null && sameFile(inputStat, outputStat)) {
+ LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
+ context.getCounter(Counter.FILES_SKIPPED).increment(1);
+ context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
+ return;
+ }
+ }
+
+ InputStream in = openSourceFile(context, inputInfo);
+ int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
+ if (Integer.MAX_VALUE != bandwidthMB) {
+ in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L);
+ }
+
+ try {
+ context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
+
+ // Ensure that the output folder is there and copy the file
+ createOutputPath(outputPath.getParent());
+ FSDataOutputStream out = outputFs.create(outputPath, true);
+ try {
+ copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
+ } finally {
+ out.close();
+ }
+
+ // Try to Preserve attributes
+ if (!preserveAttributes(outputPath, inputStat)) {
+ LOG.warn("You may have to run manually chown on: " + outputPath);
+ }
+ } finally {
+ in.close();
+ injectTestFailure(context, inputInfo);
+ }
+ }
+
+ /**
+ * Create the output folder and optionally set ownership.
+ */
+ private void createOutputPath(final Path path) throws IOException {
+ if (filesUser == null && filesGroup == null) {
+ outputFs.mkdirs(path);
+ } else {
+ Path parent = path.getParent();
+ if (!outputFs.exists(parent) && !parent.isRoot()) {
+ createOutputPath(parent);
+ }
+ outputFs.mkdirs(path);
+ if (filesUser != null || filesGroup != null) {
+ // override the owner when non-null user/group is specified
+ outputFs.setOwner(path, filesUser, filesGroup);
+ }
+ if (filesMode > 0) {
+ outputFs.setPermission(path, new FsPermission(filesMode));
+ }
+ }
+ }
+
+ /**
+ * Try to Preserve the files attribute selected by the user copying them from the source file
+ * This is only required when you are exporting as a different user than "hbase" or on a system
+ * that doesn't have the "hbase" user.
+ *
+ * This is not considered a blocking failure since the user can force a chmod with the user
+ * that knows is available on the system.
+ */
+ private boolean preserveAttributes(final Path path, final FileStatus refStat) {
+ FileStatus stat;
+ try {
+ stat = outputFs.getFileStatus(path);
+ } catch (IOException e) {
+ LOG.warn("Unable to get the status for file=" + path);
+ return false;
+ }
+
+ try {
+ if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
+ outputFs.setPermission(path, new FsPermission(filesMode));
+ } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
+ outputFs.setPermission(path, refStat.getPermission());
+ }
+ } catch (IOException e) {
+ LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
+ return false;
+ }
+
+ boolean hasRefStat = (refStat != null);
+ String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
+ String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
+ if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
+ try {
+ if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
+ outputFs.setOwner(path, user, group);
+ }
+ } catch (IOException e) {
+ LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
+ LOG.warn("The user/group may not exist on the destination cluster: user=" +
+ user + " group=" + group);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private boolean stringIsNotEmpty(final String str) {
+ return str != null && str.length() > 0;
+ }
+
+ private void copyData(final Context context,
+ final Path inputPath, final InputStream in,
+ final Path outputPath, final FSDataOutputStream out,
+ final long inputFileSize)
+ throws IOException {
+ final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
+ " (%.1f%%)";
+
+ try {
+ byte[] buffer = new byte[bufferSize];
+ long totalBytesWritten = 0;
+ int reportBytes = 0;
+ int bytesRead;
+
+ long stime = System.currentTimeMillis();
+ while ((bytesRead = in.read(buffer)) > 0) {
+ out.write(buffer, 0, bytesRead);
+ totalBytesWritten += bytesRead;
+ reportBytes += bytesRead;
+
+ if (reportBytes >= REPORT_SIZE) {
+ context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
+ context.setStatus(String.format(statusMessage,
+ StringUtils.humanReadableInt(totalBytesWritten),
+ (totalBytesWritten/(float)inputFileSize) * 100.0f) +
+ " from " + inputPath + " to " + outputPath);
+ reportBytes = 0;
+ }
+ }
+ long etime = System.currentTimeMillis();
+
+ context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
+ context.setStatus(String.format(statusMessage,
+ StringUtils.humanReadableInt(totalBytesWritten),
+ (totalBytesWritten/(float)inputFileSize) * 100.0f) +
+ " from " + inputPath + " to " + outputPath);
+
+ // Verify that the written size match
+ if (totalBytesWritten != inputFileSize) {
+ String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
+ " expected=" + inputFileSize + " for file=" + inputPath;
+ throw new IOException(msg);
+ }
+
+ LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
+ LOG.info("size=" + totalBytesWritten +
+ " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
+ " time=" + StringUtils.formatTimeDiff(etime, stime) +
+ String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
+ context.getCounter(Counter.FILES_COPIED).increment(1);
+ } catch (IOException e) {
+ LOG.error("Error copying " + inputPath + " to " + outputPath, e);
+ context.getCounter(Counter.COPY_FAILED).increment(1);
+ throw e;
+ }
+ }
+
+ /**
+ * Try to open the "source" file.
+ * Throws an IOException if the communication with the inputFs fail or
+ * if the file is not found.
+ */
+ private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
+ throws IOException {
+ try {
+ Configuration conf = context.getConfiguration();
+ FileLink link = null;
+ switch (fileInfo.getType()) {
+ case HFILE:
+ Path inputPath = new Path(fileInfo.getHfile());
+ link = getFileLink(inputPath, conf);
+ break;
+ case WAL:
+ String serverName = fileInfo.getWalServer();
+ String logName = fileInfo.getWalName();
+ link = new WALLink(inputRoot, serverName, logName);
+ break;
+ default:
+ throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
+ }
+ return link.open(inputFs);
+ } catch (IOException e) {
+ context.getCounter(Counter.MISSING_FILES).increment(1);
+ LOG.error("Unable to open source file=" + fileInfo.toString(), e);
+ throw e;
+ }
+ }
+
+ private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
+ throws IOException {
+ try {
+ Configuration conf = context.getConfiguration();
+ FileLink link = null;
+ switch (fileInfo.getType()) {
+ case HFILE:
+ Path inputPath = new Path(fileInfo.getHfile());
+ link = getFileLink(inputPath, conf);
+ break;
+ case WAL:
+ link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
+ break;
+ default:
+ throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
+ }
+ return link.getFileStatus(inputFs);
+ } catch (FileNotFoundException e) {
+ context.getCounter(Counter.MISSING_FILES).increment(1);
+ LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
+ throw e;
+ } catch (IOException e) {
+ LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
+ throw e;
+ }
+ }
+
+ private FileLink getFileLink(Path path, Configuration conf) throws IOException{
+ String regionName = HFileLink.getReferencedRegionName(path.getName());
+ TableName tableName = HFileLink.getReferencedTableName(path.getName());
+ if(MobUtils.getMobRegionInfo(tableName).getEncodedName().equals(regionName)) {
+ return HFileLink.buildFromHFileLinkPattern(MobUtils.getQualifiedMobRootDir(conf),
+ HFileArchiveUtil.getArchivePath(conf), path);
+ }
+ return HFileLink.buildFromHFileLinkPattern(inputRoot, inputArchive, path);
+ }
+
+ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
+ try {
+ return fs.getFileChecksum(path);
+ } catch (IOException e) {
+ LOG.warn("Unable to get checksum for file=" + path, e);
+ return null;
+ }
+ }
+
+ /**
+ * Check if the two files are equal by looking at the file length,
+ * and at the checksum (if user has specified the verifyChecksum flag).
+ */
+ private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
+ // Not matching length
+ if (inputStat.getLen() != outputStat.getLen()) return false;
+
+ // Mark files as equals, since user asked for no checksum verification
+ if (!verifyChecksum) return true;
+
+ // If checksums are not available, files are not the same.
+ FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
+ if (inChecksum == null) return false;
+
+ FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
+ if (outChecksum == null) return false;
+
+ return inChecksum.equals(outChecksum);
+ }
+ }
+
+ // ==========================================================================
+ // Input Format
+ // ==========================================================================
+
+ /**
+ * Extract the list of files (HFiles/WALs) to copy using Map-Reduce.
+ * @return list of files referenced by the snapshot (pair of path and size)
+ */
+ private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
+ final FileSystem fs, final Path snapshotDir) throws IOException {
+ SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+
+ final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>();
+ final TableName table = TableName.valueOf(snapshotDesc.getTable());
+
+ // Get snapshot files
+ LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
+ SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
+ new SnapshotReferenceUtil.SnapshotVisitor() {
+ @Override
+ public void storeFile(final HRegionInfo regionInfo, final String family,
+ final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
+ // for storeFile.hasReference() case, copied as part of the manifest
+ if (!storeFile.hasReference()) {
+ String region = regionInfo.getEncodedName();
+ String hfile = storeFile.getName();
+ Path path = HFileLink.createPath(table, region, family, hfile);
+
+ SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
+ .setType(SnapshotFileInfo.Type.HFILE)
+ .setHfile(path.toString())
+ .build();
+
+ long size;
+ if (storeFile.hasFileSize()) {
+ size = storeFile.getFileSize();
+ } else {
+ size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
+ }
+ files.add(new Pair<>(fileInfo, size));
+ }
+ }
+ });
+
+ return files;
+ }
+
+ /**
+ * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
+ * The groups created will have similar amounts of bytes.
+ * <p>
+ * The algorithm used is pretty straightforward; the file list is sorted by size,
+ * and then each group fetch the bigger file available, iterating through groups
+ * alternating the direction.
+ */
+ static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
+ final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
+ // Sort files by size, from small to big
+ Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
+ public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
+ long r = a.getSecond() - b.getSecond();
+ return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
+ }
+ });
+
+ // create balanced groups
+ List<List<Pair<SnapshotFileInfo, Long>>> fileGroups = new LinkedList<>();
+ long[] sizeGroups = new long[ngroups];
+ int hi = files.size() - 1;
+ int lo = 0;
+
+ List<Pair<SnapshotFileInfo, Long>> group;
+ int dir = 1;
+ int g = 0;
+
+ while (hi >= lo) {
+ if (g == fileGroups.size()) {
+ group = new LinkedList<>();
+ fileGroups.add(group);
+ } else {
+ group = fileGroups.get(g);
+ }
+
+ Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
+
+ // add the hi one
+ sizeGroups[g] += fileInfo.getSecond();
+ group.add(fileInfo);
+
+ // change direction when at the end or the beginning
+ g += dir;
+ if (g == ngroups) {
+ dir = -1;
+ g = ngroups - 1;
+ } else if (g < 0) {
+ dir = 1;
+ g = 0;
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ for (int i = 0; i < sizeGroups.length; ++i) {
+ LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
+ }
+ }
+
+ return fileGroups;
+ }
+
+ private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
+ @Override
+ public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
+ TaskAttemptContext tac) throws IOException, InterruptedException {
+ return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
+ }
+
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
+ Configuration conf = context.getConfiguration();
+ Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
+ FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
+
+ List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
+ int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
+ if (mappers == 0 && snapshotFiles.size() > 0) {
+ mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
+ mappers = Math.min(mappers, snapshotFiles.size());
+ conf.setInt(CONF_NUM_SPLITS, mappers);
+ conf.setInt(MR_NUM_MAPS, mappers);
+ }
+
+ List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
+ List<InputSplit> splits = new ArrayList(groups.size());
+ for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
+ splits.add(new ExportSnapshotInputSplit(files));
+ }
+ return splits;
+ }
+
+ private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
+ private List<Pair<BytesWritable, Long>> files;
+ private long length;
+
+ public ExportSnapshotInputSplit() {
+ this.files = null;
+ }
+
+ public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
+ this.files = new ArrayList(snapshotFiles.size());
+ for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
+ this.files.add(new Pair<>(
+ new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
+ this.length += fileInfo.getSecond();
+ }
+ }
+
+ private List<Pair<BytesWritable, Long>> getSplitKeys() {
+ return files;
+ }
+
+ @Override
+ public long getLength() throws IOException, InterruptedException {
+ return length;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException, InterruptedException {
+ return new String[] {};
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ int count = in.readInt();
+ files = new ArrayList<>(count);
+ length = 0;
+ for (int i = 0; i < count; ++i) {
+ BytesWritable fileInfo = new BytesWritable();
+ fileInfo.readFields(in);
+ long size = in.readLong();
+ files.add(new Pair<>(fileInfo, size));
+ length += size;
+ }
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(files.size());
+ for (final Pair<BytesWritable, Long> fileInfo: files) {
+ fileInfo.getFirst().write(out);
+ out.writeLong(fileInfo.getSecond());
+ }
+ }
+ }
+
+ private static class ExportSnapshotRecordReader
+ extends RecordReader<BytesWritable, NullWritable> {
+ private final List<Pair<BytesWritable, Long>> files;
+ private long totalSize = 0;
+ private long procSize = 0;
+ private int index = -1;
+
+ ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
+ this.files = files;
+ for (Pair<BytesWritable, Long> fileInfo: files) {
+ totalSize += fileInfo.getSecond();
+ }
+ }
+
+ @Override
+ public void close() { }
+
+ @Override
+ public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
+
+ @Override
+ public NullWritable getCurrentValue() { return NullWritable.get(); }
+
+ @Override
+ public float getProgress() { return (float)procSize / totalSize; }
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext tac) { }
+
+ @Override
+ public boolean nextKeyValue() {
+ if (index >= 0) {
+ procSize += files.get(index).getSecond();
+ }
+ return(++index < files.size());
+ }
+ }
+ }
+
+ // ==========================================================================
+ // Tool
+ // ==========================================================================
+
+ /**
+ * Run Map-Reduce Job to perform the files copy.
+ */
+ private void runCopyJob(final Path inputRoot, final Path outputRoot,
+ final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
+ final String filesUser, final String filesGroup, final int filesMode,
+ final int mappers, final int bandwidthMB)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ Configuration conf = getConf();
+ if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
+ if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
+ if (mappers > 0) {
+ conf.setInt(CONF_NUM_SPLITS, mappers);
+ conf.setInt(MR_NUM_MAPS, mappers);
+ }
+ conf.setInt(CONF_FILES_MODE, filesMode);
+ conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
+ conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
+ conf.set(CONF_INPUT_ROOT, inputRoot.toString());
+ conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
+ conf.set(CONF_SNAPSHOT_NAME, snapshotName);
+ conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
+
+ Job job = new Job(conf);
+ job.setJobName("ExportSnapshot-" + snapshotName);
+ job.setJarByClass(ExportSnapshot.class);
+ TableMapReduceUtil.addDependencyJars(job);
+ job.setMapperClass(ExportMapper.class);
+ job.setInputFormatClass(ExportSnapshotInputFormat.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.setMapSpeculativeExecution(false);
+ job.setNumReduceTasks(0);
+
+ // Acquire the delegation Tokens
+ Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+ TokenCache.obtainTokensForNamenodes(job.getCredentials(),
+ new Path[] { inputRoot }, srcConf);
+ Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+ TokenCache.obtainTokensForNamenodes(job.getCredentials(),
+ new Path[] { outputRoot }, destConf);
+
+ // Run the MR Job
+ if (!job.waitForCompletion(true)) {
+ // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
+ // when it will be available on all the supported versions.
+ throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
+ }
+ }
+
+ private void verifySnapshot(final Configuration baseConf,
+ final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
+ // Update the conf with the current root dir, since may be a different cluster
+ Configuration conf = new Configuration(baseConf);
+ FSUtils.setRootDir(conf, rootDir);
+ FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
+ SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+ SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
+ }
+
+ /**
+ * Set path ownership.
+ */
+ private void setOwner(final FileSystem fs, final Path path, final String user,
+ final String group, final boolean recursive) throws IOException {
+ if (user != null || group != null) {
+ if (recursive && fs.isDirectory(path)) {
+ for (FileStatus child : fs.listStatus(path)) {
+ setOwner(fs, child.getPath(), user, group, recursive);
+ }
+ }
+ fs.setOwner(path, user, group);
+ }
+ }
+
+ /**
+ * Set path permission.
+ */
+ private void setPermission(final FileSystem fs, final Path path, final short filesMode,
+ final boolean recursive) throws IOException {
+ if (filesMode > 0) {
+ FsPermission perm = new FsPermission(filesMode);
+ if (recursive && fs.isDirectory(path)) {
+ for (FileStatus child : fs.listStatus(path)) {
+ setPermission(fs, child.getPath(), filesMode, recursive);
+ }
+ }
+ fs.setPermission(path, perm);
+ }
+ }
+
+ private boolean verifyTarget = true;
+ private boolean verifyChecksum = true;
+ private String snapshotName = null;
+ private String targetName = null;
+ private boolean overwrite = false;
+ private String filesGroup = null;
+ private String filesUser = null;
+ private Path outputRoot = null;
+ private Path inputRoot = null;
+ private int bandwidthMB = Integer.MAX_VALUE;
+ private int filesMode = 0;
+ private int mappers = 0;
+
+ @Override
+ protected void processOptions(CommandLine cmd) {
+ snapshotName = cmd.getOptionValue(Options.SNAPSHOT.getLongOpt(), snapshotName);
+ targetName = cmd.getOptionValue(Options.TARGET_NAME.getLongOpt(), targetName);
+ if (cmd.hasOption(Options.COPY_TO.getLongOpt())) {
+ outputRoot = new Path(cmd.getOptionValue(Options.COPY_TO.getLongOpt()));
+ }
+ if (cmd.hasOption(Options.COPY_FROM.getLongOpt())) {
+ inputRoot = new Path(cmd.getOptionValue(Options.COPY_FROM.getLongOpt()));
+ }
+ mappers = getOptionAsInt(cmd, Options.MAPPERS.getLongOpt(), mappers);
+ filesUser = cmd.getOptionValue(Options.CHUSER.getLongOpt(), filesUser);
+ filesGroup = cmd.getOptionValue(Options.CHGROUP.getLongOpt(), filesGroup);
+ filesMode = getOptionAsInt(cmd, Options.CHMOD.getLongOpt(), filesMode);
+ bandwidthMB = getOptionAsInt(cmd, Options.BANDWIDTH.getLongOpt(), bandwidthMB);
+ overwrite = cmd.hasOption(Options.OVERWRITE.getLongOpt());
+ // And verifyChecksum and verifyTarget with values read from old args in processOldArgs(...).
+ verifyChecksum = !cmd.hasOption(Options.NO_CHECKSUM_VERIFY.getLongOpt());
+ verifyTarget = !cmd.hasOption(Options.NO_TARGET_VERIFY.getLongOpt());
+ }
+
+ /**
+ * Execute the export snapshot by copying the snapshot metadata, hfiles and wals.
+ * @return 0 on success, and != 0 upon failure.
+ */
+ @Override
+ public int doWork() throws IOException {
+ Configuration conf = getConf();
+
+ // Check user options
+ if (snapshotName == null) {
+ System.err.println("Snapshot name not provided.");
+ LOG.error("Use -h or --help for usage instructions.");
+ return 0;
+ }
+
+ if (outputRoot == null) {
+ System.err.println("Destination file-system (--" + Options.COPY_TO.getLongOpt()
+ + ") not provided.");
+ LOG.error("Use -h or --help for usage instructions.");
+ return 0;
+ }
+
+ if (targetName == null) {
+ targetName = snapshotName;
+ }
+ if (inputRoot == null) {
+ inputRoot = FSUtils.getRootDir(conf);
+ } else {
+ FSUtils.setRootDir(conf, inputRoot);
+ }
+
+ Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
+ srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+ FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
+ LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
+ Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
+ destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
+ FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
+ LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
+
+ boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
+
+ Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
+ Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
+ Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
+ Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
+
+ // Find the necessary directory which need to change owner and group
+ Path needSetOwnerDir = SnapshotDescriptionUtils.getSnapshotRootDir(outputRoot);
+ if (outputFs.exists(needSetOwnerDir)) {
+ if (skipTmp) {
+ needSetOwnerDir = outputSnapshotDir;
+ } else {
+ needSetOwnerDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(outputRoot);
+ if (outputFs.exists(needSetOwnerDir)) {
+ needSetOwnerDir = snapshotTmpDir;
+ }
+ }
+ }
+
+ // Check if the snapshot already exists
+ if (outputFs.exists(outputSnapshotDir)) {
+ if (overwrite) {
+ if (!outputFs.delete(outputSnapshotDir, true)) {
+ System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
+ return 1;
+ }
+ } else {
+ System.err.println("The snapshot '" + targetName +
+ "' already exists in the destination: " + outputSnapshotDir);
+ return 1;
+ }
+ }
+
+ if (!skipTmp) {
+ // Check if the snapshot already in-progress
+ if (outputFs.exists(snapshotTmpDir)) {
+ if (overwrite) {
+ if (!outputFs.delete(snapshotTmpDir, true)) {
+ System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
+ return 1;
+ }
+ } else {
+ System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
+ System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
+ System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
+ return 1;
+ }
+ }
+ }
+
+ // Step 1 - Copy fs1:/.snapshot/<snapshot> to fs2:/.snapshot/.tmp/<snapshot>
+ // The snapshot references must be copied before the hfiles otherwise the cleaner
+ // will remove them because they are unreferenced.
+ try {
+ LOG.info("Copy Snapshot Manifest");
+ FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
+ } catch (IOException e) {
+ throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
+ snapshotDir + " to=" + initialOutputSnapshotDir, e);
+ } finally {
+ if (filesUser != null || filesGroup != null) {
+ LOG.warn((filesUser == null ? "" : "Change the owner of " + needSetOwnerDir + " to "
+ + filesUser)
+ + (filesGroup == null ? "" : ", Change the group of " + needSetOwnerDir + " to "
+ + filesGroup));
+ setOwner(outputFs, needSetOwnerDir, filesUser, filesGroup, true);
+ }
+ if (filesMode > 0) {
+ LOG.warn("Change the permission of " + needSetOwnerDir + " to " + filesMode);
+ setPermission(outputFs, needSetOwnerDir, (short)filesMode, true);
+ }
+ }
+
+ // Write a new .snapshotinfo if the target name is different from the source name
+ if (!targetName.equals(snapshotName)) {
+ SnapshotDescription snapshotDesc =
+ SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
+ .toBuilder()
+ .setName(targetName)
+ .build();
+ SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, initialOutputSnapshotDir, outputFs);
+ if (filesUser != null || filesGroup != null) {
+ outputFs.setOwner(new Path(initialOutputSnapshotDir,
+ SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), filesUser, filesGroup);
+ }
+ if (filesMode > 0) {
+ outputFs.setPermission(new Path(initialOutputSnapshotDir,
+ SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), new FsPermission((short)filesMode));
+ }
+ }
+
+ // Step 2 - Start MR Job to copy files
+ // The snapshot references must be copied before the files otherwise the files gets removed
+ // by the HFileArchiver, since they have no references.
+ try {
+ runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
+ filesUser, filesGroup, filesMode, mappers, bandwidthMB);
+
+ LOG.info("Finalize the Snapshot Export");
+ if (!skipTmp) {
+ // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
+ if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
+ throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
+ snapshotTmpDir + " to=" + outputSnapshotDir);
+ }
+ }
+
+ // Step 4 - Verify snapshot integrity
+ if (verifyTarget) {
+ LOG.info("Verify snapshot integrity");
+ verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
+ }
+
+ LOG.info("Export Completed: " + targetName);
+ return 0;
+ } catch (Exception e) {
+ LOG.error("Snapshot export failed", e);
+ if (!skipTmp) {
+ outputFs.delete(snapshotTmpDir, true);
+ }
+ outputFs.delete(outputSnapshotDir, true);
+ return 1;
+ } finally {
+ IOUtils.closeStream(inputFs);
+ IOUtils.closeStream(outputFs);
+ }
+ }
+
+ @Override
+ protected void printUsage() {
+ super.printUsage();
+ System.out.println("\n"
+ + "Examples:\n"
+ + " hbase snapshot export \\\n"
+ + " --snapshot MySnapshot --copy-to hdfs://srv2:8082/hbase \\\n"
+ + " --chuser MyUser --chgroup MyGroup --chmod 700 --mappers 16\n"
+ + "\n"
+ + " hbase snapshot export \\\n"
+ + " --snapshot MySnapshot --copy-from hdfs://srv2:8082/hbase \\\n"
+ + " --copy-to hdfs://srv1:50070/hbase");
+ }
+
+ @Override protected void addOptions() {
+ addRequiredOption(Options.SNAPSHOT);
+ addOption(Options.COPY_TO);
+ addOption(Options.COPY_FROM);
+ addOption(Options.TARGET_NAME);
+ addOption(Options.NO_CHECKSUM_VERIFY);
+ addOption(Options.NO_TARGET_VERIFY);
+ addOption(Options.OVERWRITE);
+ addOption(Options.CHUSER);
+ addOption(Options.CHGROUP);
+ addOption(Options.CHMOD);
+ addOption(Options.MAPPERS);
+ addOption(Options.BANDWIDTH);
+ }
+
+ public static void main(String[] args) {
+ new ExportSnapshot().doStaticMain(args);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
new file mode 100644
index 0000000..e8f073d
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * Generate a classpath string containing any jars required by mapreduce jobs. Specify
+ * additional values by providing a comma-separated list of paths via -Dtmpjars.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class MapreduceDependencyClasspathTool implements Tool {
+
+ private Configuration conf;
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length > 0) {
+ System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
+ System.err.println(" Construct a CLASSPATH containing dependency jars required to run a mapreduce");
+ System.err.println(" job. By default, includes any jars detected by TableMapReduceUtils. Provide");
+ System.err.println(" additional entries by specifying a comma-separated list in tmpjars.");
+ return 0;
+ }
+
+ TableMapReduceUtil.addHBaseDependencyJars(getConf());
+ System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
+ return 0;
+ }
+
+ public static void main(String[] argv) throws Exception {
+ // Silence the usual noise. This is probably fragile...
+ Logger logger = Logger.getLogger("org.apache.hadoop.hbase");
+ if (logger != null) {
+ logger.setLevel(Level.WARN);
+ }
+ System.exit(ToolRunner.run(
+ HBaseConfiguration.create(), new MapreduceDependencyClasspathTool(), argv));
+ }
+}
[19/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
deleted file mode 100644
index 1d4d37b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCreator.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.util.ReflectionUtils;
-
-/**
- * Facade to create Cells for HFileOutputFormat. The created Cells are of <code>Put</code> type.
- */
-@InterfaceAudience.Public
-public class CellCreator {
-
- public static final String VISIBILITY_EXP_RESOLVER_CLASS =
- "hbase.mapreduce.visibility.expression.resolver.class";
-
- private VisibilityExpressionResolver visExpResolver;
-
- public CellCreator(Configuration conf) {
- Class<? extends VisibilityExpressionResolver> clazz = conf.getClass(
- VISIBILITY_EXP_RESOLVER_CLASS, DefaultVisibilityExpressionResolver.class,
- VisibilityExpressionResolver.class);
- this.visExpResolver = ReflectionUtils.newInstance(clazz, conf);
- this.visExpResolver.init();
- }
-
- /**
- * @param row row key
- * @param roffset row offset
- * @param rlength row length
- * @param family family name
- * @param foffset family offset
- * @param flength family length
- * @param qualifier column qualifier
- * @param qoffset qualifier offset
- * @param qlength qualifier length
- * @param timestamp version timestamp
- * @param value column value
- * @param voffset value offset
- * @param vlength value length
- * @return created Cell
- * @throws IOException
- */
- public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
- byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
- int vlength) throws IOException {
- return create(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength,
- timestamp, value, voffset, vlength, (List<Tag>)null);
- }
-
- /**
- * @param row row key
- * @param roffset row offset
- * @param rlength row length
- * @param family family name
- * @param foffset family offset
- * @param flength family length
- * @param qualifier column qualifier
- * @param qoffset qualifier offset
- * @param qlength qualifier length
- * @param timestamp version timestamp
- * @param value column value
- * @param voffset value offset
- * @param vlength value length
- * @param visExpression visibility expression to be associated with cell
- * @return created Cell
- * @throws IOException
- */
- @Deprecated
- public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
- byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
- int vlength, String visExpression) throws IOException {
- List<Tag> visTags = null;
- if (visExpression != null) {
- visTags = this.visExpResolver.createVisibilityExpTags(visExpression);
- }
- return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
- qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, visTags);
- }
-
- /**
- * @param row row key
- * @param roffset row offset
- * @param rlength row length
- * @param family family name
- * @param foffset family offset
- * @param flength family length
- * @param qualifier column qualifier
- * @param qoffset qualifier offset
- * @param qlength qualifier length
- * @param timestamp version timestamp
- * @param value column value
- * @param voffset value offset
- * @param vlength value length
- * @param tags
- * @return created Cell
- * @throws IOException
- */
- public Cell create(byte[] row, int roffset, int rlength, byte[] family, int foffset, int flength,
- byte[] qualifier, int qoffset, int qlength, long timestamp, byte[] value, int voffset,
- int vlength, List<Tag> tags) throws IOException {
- return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
- qlength, timestamp, KeyValue.Type.Put, value, voffset, vlength, tags);
- }
-
- /**
- * @return Visibility expression resolver
- */
- public VisibilityExpressionResolver getVisibilityExpressionResolver() {
- return this.visExpResolver;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
deleted file mode 100644
index 21b8556..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Tool used to copy a table to another one which can be on a different setup.
- * It is also configurable with a start and time as well as a specification
- * of the region server implementation if different from the local cluster.
- */
-@InterfaceAudience.Public
-public class CopyTable extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(CopyTable.class);
-
- final static String NAME = "copytable";
- long startTime = 0;
- long endTime = HConstants.LATEST_TIMESTAMP;
- int batch = Integer.MAX_VALUE;
- int cacheRow = -1;
- int versions = -1;
- String tableName = null;
- String startRow = null;
- String stopRow = null;
- String dstTableName = null;
- String peerAddress = null;
- String families = null;
- boolean allCells = false;
- static boolean shuffle = false;
-
- boolean bulkload = false;
- Path bulkloadDir = null;
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- /**
- * Sets up the actual job.
- *
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public Job createSubmittableJob(String[] args)
- throws IOException {
- if (!doCommandLine(args)) {
- return null;
- }
-
- Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJarByClass(CopyTable.class);
- Scan scan = new Scan();
-
- scan.setBatch(batch);
- scan.setCacheBlocks(false);
-
- if (cacheRow > 0) {
- scan.setCaching(cacheRow);
- } else {
- scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
- }
-
- scan.setTimeRange(startTime, endTime);
-
- if (allCells) {
- scan.setRaw(true);
- }
- if (shuffle) {
- job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");
- }
- if (versions >= 0) {
- scan.setMaxVersions(versions);
- }
-
- if (startRow != null) {
- scan.setStartRow(Bytes.toBytesBinary(startRow));
- }
-
- if (stopRow != null) {
- scan.setStopRow(Bytes.toBytesBinary(stopRow));
- }
-
- if(families != null) {
- String[] fams = families.split(",");
- Map<String,String> cfRenameMap = new HashMap<>();
- for(String fam : fams) {
- String sourceCf;
- if(fam.contains(":")) {
- // fam looks like "sourceCfName:destCfName"
- String[] srcAndDest = fam.split(":", 2);
- sourceCf = srcAndDest[0];
- String destCf = srcAndDest[1];
- cfRenameMap.put(sourceCf, destCf);
- } else {
- // fam is just "sourceCf"
- sourceCf = fam;
- }
- scan.addFamily(Bytes.toBytes(sourceCf));
- }
- Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
- }
- job.setNumReduceTasks(0);
-
- if (bulkload) {
- TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
- null, job);
-
- // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
- TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
-
- FileSystem fs = FileSystem.get(getConf());
- Random rand = new Random();
- Path root = new Path(fs.getWorkingDirectory(), "copytable");
- fs.mkdirs(root);
- while (true) {
- bulkloadDir = new Path(root, "" + rand.nextLong());
- if (!fs.exists(bulkloadDir)) {
- break;
- }
- }
-
- System.out.println("HFiles will be stored at " + this.bulkloadDir);
- HFileOutputFormat2.setOutputPath(job, bulkloadDir);
- try (Connection conn = ConnectionFactory.createConnection(getConf());
- Admin admin = conn.getAdmin()) {
- HFileOutputFormat2.configureIncrementalLoadMap(job,
- admin.listTableDescriptor((TableName.valueOf(dstTableName))));
- }
- } else {
- TableMapReduceUtil.initTableMapperJob(tableName, scan,
- Import.Importer.class, null, null, job);
-
- TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
- null);
- }
-
- return job;
- }
-
- /*
- * @param errorMsg Error message. Can be null.
- */
- private static void printUsage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
- "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
- System.err.println();
- System.err.println("Options:");
- System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
- System.err.println(" specify if different from current cluster");
- System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
- System.err.println(" startrow the start row");
- System.err.println(" stoprow the stop row");
- System.err.println(" starttime beginning of the time range (unixtime in millis)");
- System.err.println(" without endtime means from starttime to forever");
- System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
- System.err.println(" versions number of cell versions to copy");
- System.err.println(" new.name new table's name");
- System.err.println(" peer.adr Address of the peer cluster given in the format");
- System.err.println(" hbase.zookeeper.quorum:hbase.zookeeper.client"
- + ".port:zookeeper.znode.parent");
- System.err.println(" families comma-separated list of families to copy");
- System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
- System.err.println(" To keep the same name, just give \"cfName\"");
- System.err.println(" all.cells also copy delete markers and deleted cells");
- System.err.println(" bulkload Write input into HFiles and bulk load to the destination "
- + "table");
- System.err.println();
- System.err.println("Args:");
- System.err.println(" tablename Name of the table to copy");
- System.err.println();
- System.err.println("Examples:");
- System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
- System.err.println(" $ hbase " +
- "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
- "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
- System.err.println("For performance consider the following general option:\n"
- + " It is recommended that you set the following to >=100. A higher value uses more memory but\n"
- + " decreases the round trip time to the server and may increase performance.\n"
- + " -Dhbase.client.scanner.caching=100\n"
- + " The following should always be set to false, to prevent writing data twice, which may produce \n"
- + " inaccurate results.\n"
- + " -Dmapreduce.map.speculative=false");
- }
-
- private boolean doCommandLine(final String[] args) {
- // Process command-line args. TODO: Better cmd-line processing
- // (but hopefully something not as painful as cli options).
- if (args.length < 1) {
- printUsage(null);
- return false;
- }
- try {
- for (int i = 0; i < args.length; i++) {
- String cmd = args[i];
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
- printUsage(null);
- return false;
- }
-
- final String startRowArgKey = "--startrow=";
- if (cmd.startsWith(startRowArgKey)) {
- startRow = cmd.substring(startRowArgKey.length());
- continue;
- }
-
- final String stopRowArgKey = "--stoprow=";
- if (cmd.startsWith(stopRowArgKey)) {
- stopRow = cmd.substring(stopRowArgKey.length());
- continue;
- }
-
- final String startTimeArgKey = "--starttime=";
- if (cmd.startsWith(startTimeArgKey)) {
- startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
- continue;
- }
-
- final String endTimeArgKey = "--endtime=";
- if (cmd.startsWith(endTimeArgKey)) {
- endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
- continue;
- }
-
- final String batchArgKey = "--batch=";
- if (cmd.startsWith(batchArgKey)) {
- batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
- continue;
- }
-
- final String cacheRowArgKey = "--cacheRow=";
- if (cmd.startsWith(cacheRowArgKey)) {
- cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));
- continue;
- }
-
- final String versionsArgKey = "--versions=";
- if (cmd.startsWith(versionsArgKey)) {
- versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
- continue;
- }
-
- final String newNameArgKey = "--new.name=";
- if (cmd.startsWith(newNameArgKey)) {
- dstTableName = cmd.substring(newNameArgKey.length());
- continue;
- }
-
- final String peerAdrArgKey = "--peer.adr=";
- if (cmd.startsWith(peerAdrArgKey)) {
- peerAddress = cmd.substring(peerAdrArgKey.length());
- continue;
- }
-
- final String familiesArgKey = "--families=";
- if (cmd.startsWith(familiesArgKey)) {
- families = cmd.substring(familiesArgKey.length());
- continue;
- }
-
- if (cmd.startsWith("--all.cells")) {
- allCells = true;
- continue;
- }
-
- if (cmd.startsWith("--bulkload")) {
- bulkload = true;
- continue;
- }
-
- if (cmd.startsWith("--shuffle")) {
- shuffle = true;
- continue;
- }
-
- if (i == args.length-1) {
- tableName = cmd;
- } else {
- printUsage("Invalid argument '" + cmd + "'");
- return false;
- }
- }
- if (dstTableName == null && peerAddress == null) {
- printUsage("At least a new table name or a " +
- "peer address must be specified");
- return false;
- }
- if ((endTime != 0) && (startTime > endTime)) {
- printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
- return false;
- }
-
- if (bulkload && peerAddress != null) {
- printUsage("Remote bulkload is not supported!");
- return false;
- }
-
- // set dstTableName if necessary
- if (dstTableName == null) {
- dstTableName = tableName;
- }
- } catch (Exception e) {
- e.printStackTrace();
- printUsage("Can't start because " + e.getMessage());
- return false;
- }
- return true;
- }
-
- /**
- * Main entry point.
- *
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int ret = ToolRunner.run(HBaseConfiguration.create(), new CopyTable(), args);
- System.exit(ret);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Job job = createSubmittableJob(args);
- if (job == null) return 1;
- if (!job.waitForCompletion(true)) {
- LOG.info("Map-reduce job failed!");
- if (bulkload) {
- LOG.info("Files are not bulkloaded!");
- }
- return 1;
- }
- int code = 0;
- if (bulkload) {
- code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
- this.dstTableName});
- if (code == 0) {
- // bulkloadDir is deleted only LoadIncrementalHFiles was successful so that one can rerun
- // LoadIncrementalHFiles.
- FileSystem fs = FileSystem.get(this.getConf());
- if (!fs.delete(this.bulkloadDir, true)) {
- LOG.error("Deleting folder " + bulkloadDir + " failed!");
- code = 1;
- }
- }
- }
- return code;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
deleted file mode 100644
index 004ee5c..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_FAMILY;
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_NAME;
-import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABEL_QUALIFIER;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.security.visibility.Authorizations;
-import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
-import org.apache.hadoop.hbase.security.visibility.VisibilityLabelOrdinalProvider;
-import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-
-/**
- * This implementation creates tags by expanding expression using label ordinal. Labels will be
- * serialized in sorted order of it's ordinal.
- */
-@InterfaceAudience.Private
-public class DefaultVisibilityExpressionResolver implements VisibilityExpressionResolver {
- private static final Log LOG = LogFactory.getLog(DefaultVisibilityExpressionResolver.class);
-
- private Configuration conf;
- private final Map<String, Integer> labels = new HashMap<>();
-
- @Override
- public Configuration getConf() {
- return this.conf;
- }
-
- @Override
- public void setConf(Configuration conf) {
- this.conf = conf;
- }
-
- @Override
- public void init() {
- // Reading all the labels and ordinal.
- // This scan should be done by user with global_admin privileges.. Ensure that it works
- Table labelsTable = null;
- Connection connection = null;
- try {
- connection = ConnectionFactory.createConnection(conf);
- try {
- labelsTable = connection.getTable(LABELS_TABLE_NAME);
- } catch (IOException e) {
- LOG.error("Error opening 'labels' table", e);
- return;
- }
- Scan scan = new Scan();
- scan.setAuthorizations(new Authorizations(VisibilityUtils.SYSTEM_LABEL));
- scan.addColumn(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
- ResultScanner scanner = null;
- try {
- scanner = labelsTable.getScanner(scan);
- Result next = null;
- while ((next = scanner.next()) != null) {
- byte[] row = next.getRow();
- byte[] value = next.getValue(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
- labels.put(Bytes.toString(value), Bytes.toInt(row));
- }
- } catch (TableNotFoundException e) {
- // Table not found. So just return
- return;
- } catch (IOException e) {
- LOG.error("Error scanning 'labels' table", e);
- } finally {
- if (scanner != null) scanner.close();
- }
- } catch (IOException ioe) {
- LOG.error("Failed reading 'labels' tags", ioe);
- return;
- } finally {
- if (labelsTable != null) {
- try {
- labelsTable.close();
- } catch (IOException ioe) {
- LOG.warn("Error closing 'labels' table", ioe);
- }
- }
- if (connection != null)
- try {
- connection.close();
- } catch (IOException ioe) {
- LOG.warn("Failed close of temporary connection", ioe);
- }
- }
- }
-
- @Override
- public List<Tag> createVisibilityExpTags(String visExpression) throws IOException {
- VisibilityLabelOrdinalProvider provider = new VisibilityLabelOrdinalProvider() {
- @Override
- public int getLabelOrdinal(String label) {
- Integer ordinal = null;
- ordinal = labels.get(label);
- if (ordinal != null) {
- return ordinal.intValue();
- }
- return VisibilityConstants.NON_EXIST_LABEL_ORDINAL;
- }
-
- @Override
- public String getLabel(int ordinal) {
- // Unused
- throw new UnsupportedOperationException(
- "getLabel should not be used in VisibilityExpressionResolver");
- }
- };
- return VisibilityUtils.createVisibilityExpTags(visExpression, true, false, null, provider);
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
deleted file mode 100644
index 9737b55..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
-import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
-import org.apache.hadoop.util.ProgramDriver;
-
-/**
- * Driver for hbase mapreduce jobs. Select which to run by passing
- * name of job to this main.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-@InterfaceStability.Stable
-public class Driver {
- /**
- * @param args
- * @throws Throwable
- */
- public static void main(String[] args) throws Throwable {
- ProgramDriver pgd = new ProgramDriver();
-
- pgd.addClass(RowCounter.NAME, RowCounter.class,
- "Count rows in HBase table.");
- pgd.addClass(CellCounter.NAME, CellCounter.class,
- "Count cells in HBase table.");
- pgd.addClass(Export.NAME, Export.class, "Write table data to HDFS.");
- pgd.addClass(Import.NAME, Import.class, "Import data written by Export.");
- pgd.addClass(ImportTsv.NAME, ImportTsv.class, "Import data in TSV format.");
- pgd.addClass(LoadIncrementalHFiles.NAME, LoadIncrementalHFiles.class,
- "Complete a bulk data load.");
- pgd.addClass(CopyTable.NAME, CopyTable.class,
- "Export a table from local cluster to peer cluster.");
- pgd.addClass(VerifyReplication.NAME, VerifyReplication.class, "Compare" +
- " the data from tables in two different clusters. WARNING: It" +
- " doesn't work for incrementColumnValues'd cells since the" +
- " timestamp is changed after being appended to the log.");
- pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
- pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
- " the specific snapshot to a given FileSystem.");
-
- ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
- invoke(pgd, new Object[]{args});
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
deleted file mode 100644
index 4c01528..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
-* Export an HBase table.
-* Writes content to sequence files up in HDFS. Use {@link Import} to read it
-* back in again.
-*/
-@InterfaceAudience.Public
-public class Export extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(Export.class);
- final static String NAME = "export";
- final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
- final static String EXPORT_BATCHING = "hbase.export.scanner.batch";
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- /**
- * Sets up the actual job.
- *
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public static Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException {
- String tableName = args[0];
- Path outputDir = new Path(args[1]);
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJobName(NAME + "_" + tableName);
- job.setJarByClass(Export.class);
- // Set optional scan parameters
- Scan s = getConfiguredScanForJob(conf, args);
- IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
- // No reducers. Just write straight to output files.
- job.setNumReduceTasks(0);
- job.setOutputFormatClass(SequenceFileOutputFormat.class);
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(Result.class);
- FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
- return job;
- }
-
- private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
- Scan s = new Scan();
- // Optional arguments.
- // Set Scan Versions
- int versions = args.length > 2? Integer.parseInt(args[2]): 1;
- s.setMaxVersions(versions);
- // Set Scan Range
- long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
- long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
- s.setTimeRange(startTime, endTime);
- // Set cache blocks
- s.setCacheBlocks(false);
- // set Start and Stop row
- if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
- s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
- }
- if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
- s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
- }
- // Set Scan Column Family
- boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
- if (raw) {
- s.setRaw(raw);
- }
- for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
- s.addFamily(Bytes.toBytes(columnFamily));
- }
- // Set RowFilter or Prefix Filter if applicable.
- Filter exportFilter = getExportFilter(args);
- if (exportFilter!= null) {
- LOG.info("Setting Scan Filter for Export.");
- s.setFilter(exportFilter);
- }
-
- int batching = conf.getInt(EXPORT_BATCHING, -1);
- if (batching != -1){
- try {
- s.setBatch(batching);
- } catch (IncompatibleFilterException e) {
- LOG.error("Batching could not be set", e);
- }
- }
- LOG.info("versions=" + versions + ", starttime=" + startTime +
- ", endtime=" + endTime + ", keepDeletedCells=" + raw);
- return s;
- }
-
- private static Filter getExportFilter(String[] args) {
- Filter exportFilter = null;
- String filterCriteria = (args.length > 5) ? args[5]: null;
- if (filterCriteria == null) return null;
- if (filterCriteria.startsWith("^")) {
- String regexPattern = filterCriteria.substring(1, filterCriteria.length());
- exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
- } else {
- exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
- }
- return exportFilter;
- }
-
- /*
- * @param errorMsg Error message. Can be null.
- */
- private static void usage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
- "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
- System.err.println(" Note: -D properties will be applied to the conf used. ");
- System.err.println(" For example: ");
- System.err.println(" -D mapreduce.output.fileoutputformat.compress=true");
- System.err.println(" -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");
- System.err.println(" -D mapreduce.output.fileoutputformat.compress.type=BLOCK");
- System.err.println(" Additionally, the following SCAN properties can be specified");
- System.err.println(" to control/limit what is exported..");
- System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
- System.err.println(" -D " + RAW_SCAN + "=true");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");
- System.err.println(" -D " + JOB_NAME_CONF_KEY
- + "=jobName - use the specified mapreduce job name for the export");
- System.err.println("For performance consider the following properties:\n"
- + " -Dhbase.client.scanner.caching=100\n"
- + " -Dmapreduce.map.speculative=false\n"
- + " -Dmapreduce.reduce.speculative=false");
- System.err.println("For tables with very wide rows consider setting the batch size as below:\n"
- + " -D" + EXPORT_BATCHING + "=10");
- }
-
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 2) {
- usage("Wrong number of arguments: " + args.length);
- return -1;
- }
- Job job = createSubmittableJob(getConf(), args);
- return (job.waitForCompletion(true) ? 0 : 1);
- }
-
- /**
- * Main entry point.
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);
- System.exit(errCode);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
deleted file mode 100644
index dc30c6e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Extract grouping columns from input record.
- */
-@InterfaceAudience.Public
-public class GroupingTableMapper
-extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
-
- /**
- * JobConf parameter to specify the columns used to produce the key passed to
- * collect from the map phase.
- */
- public static final String GROUP_COLUMNS =
- "hbase.mapred.groupingtablemap.columns";
-
- /** The grouping columns. */
- protected byte [][] columns;
- /** The current configuration. */
- private Configuration conf = null;
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table to be processed.
- * @param scan The scan with the columns etc.
- * @param groupColumns A space separated list of columns used to form the
- * key used in collect.
- * @param mapper The mapper class.
- * @param job The current job.
- * @throws IOException When setting up the job fails.
- */
- @SuppressWarnings("unchecked")
- public static void initJob(String table, Scan scan, String groupColumns,
- Class<? extends TableMapper> mapper, Job job) throws IOException {
- TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
- ImmutableBytesWritable.class, Result.class, job);
- job.getConfiguration().set(GROUP_COLUMNS, groupColumns);
- }
-
- /**
- * Extract the grouping columns from value to construct a new key. Pass the
- * new key and value to reduce. If any of the grouping columns are not found
- * in the value, the record is skipped.
- *
- * @param key The current key.
- * @param value The current value.
- * @param context The current context.
- * @throws IOException When writing the record fails.
- * @throws InterruptedException When the job is aborted.
- */
- @Override
- public void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException, InterruptedException {
- byte[][] keyVals = extractKeyValues(value);
- if(keyVals != null) {
- ImmutableBytesWritable tKey = createGroupKey(keyVals);
- context.write(tKey, value);
- }
- }
-
- /**
- * Extract columns values from the current record. This method returns
- * null if any of the columns are not found.
- * <p>
- * Override this method if you want to deal with nulls differently.
- *
- * @param r The current values.
- * @return Array of byte values.
- */
- protected byte[][] extractKeyValues(Result r) {
- byte[][] keyVals = null;
- ArrayList<byte[]> foundList = new ArrayList<>();
- int numCols = columns.length;
- if (numCols > 0) {
- for (Cell value: r.listCells()) {
- byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
- CellUtil.cloneQualifier(value));
- for (int i = 0; i < numCols; i++) {
- if (Bytes.equals(column, columns[i])) {
- foundList.add(CellUtil.cloneValue(value));
- break;
- }
- }
- }
- if(foundList.size() == numCols) {
- keyVals = foundList.toArray(new byte[numCols][]);
- }
- }
- return keyVals;
- }
-
- /**
- * Create a key by concatenating multiple column values.
- * <p>
- * Override this function in order to produce different types of keys.
- *
- * @param vals The current key/values.
- * @return A key generated by concatenating multiple column values.
- */
- protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
- if(vals == null) {
- return null;
- }
- StringBuilder sb = new StringBuilder();
- for(int i = 0; i < vals.length; i++) {
- if(i > 0) {
- sb.append(" ");
- }
- sb.append(Bytes.toString(vals[i]));
- }
- return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
- }
-
- /**
- * Returns the current configuration.
- *
- * @return The current configuration.
- * @see org.apache.hadoop.conf.Configurable#getConf()
- */
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * Sets the configuration. This is used to set up the grouping details.
- *
- * @param configuration The configuration to set.
- * @see org.apache.hadoop.conf.Configurable#setConf(
- * org.apache.hadoop.conf.Configuration)
- */
- @Override
- public void setConf(Configuration configuration) {
- this.conf = configuration;
- String[] cols = conf.get(GROUP_COLUMNS, "").split(" ");
- columns = new byte[cols.length][];
- for(int i = 0; i < cols.length; i++) {
- columns[i] = Bytes.toBytes(cols[i]);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
deleted file mode 100644
index e90d5c1..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Simple MR input format for HFiles.
- * This code was borrowed from Apache Crunch project.
- * Updated to the recent version of HBase.
- */
-public class HFileInputFormat extends FileInputFormat<NullWritable, Cell> {
-
- private static final Logger LOG = LoggerFactory.getLogger(HFileInputFormat.class);
-
- /**
- * File filter that removes all "hidden" files. This might be something worth removing from
- * a more general purpose utility; it accounts for the presence of metadata files created
- * in the way we're doing exports.
- */
- static final PathFilter HIDDEN_FILE_FILTER = new PathFilter() {
- @Override
- public boolean accept(Path p) {
- String name = p.getName();
- return !name.startsWith("_") && !name.startsWith(".");
- }
- };
-
- /**
- * Record reader for HFiles.
- */
- private static class HFileRecordReader extends RecordReader<NullWritable, Cell> {
-
- private Reader in;
- protected Configuration conf;
- private HFileScanner scanner;
-
- /**
- * A private cache of the key value so it doesn't need to be loaded twice from the scanner.
- */
- private Cell value = null;
- private long count;
- private boolean seeked = false;
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- FileSplit fileSplit = (FileSplit) split;
- conf = context.getConfiguration();
- Path path = fileSplit.getPath();
- FileSystem fs = path.getFileSystem(conf);
- LOG.info("Initialize HFileRecordReader for {}", path);
- this.in = HFile.createReader(fs, path, conf);
-
- // The file info must be loaded before the scanner can be used.
- // This seems like a bug in HBase, but it's easily worked around.
- this.in.loadFileInfo();
- this.scanner = in.getScanner(false, false);
-
- }
-
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- boolean hasNext;
- if (!seeked) {
- LOG.info("Seeking to start");
- hasNext = scanner.seekTo();
- seeked = true;
- } else {
- hasNext = scanner.next();
- }
- if (!hasNext) {
- return false;
- }
- value = scanner.getCell();
- count++;
- return true;
- }
-
- @Override
- public NullWritable getCurrentKey() throws IOException, InterruptedException {
- return NullWritable.get();
- }
-
- @Override
- public Cell getCurrentValue() throws IOException, InterruptedException {
- return value;
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- // This would be inaccurate if KVs are not uniformly-sized or we have performed a seek to
- // the start row, but better than nothing anyway.
- return 1.0f * count / in.getEntries();
- }
-
- @Override
- public void close() throws IOException {
- if (in != null) {
- in.close();
- in = null;
- }
- }
- }
-
- @Override
- protected List<FileStatus> listStatus(JobContext job) throws IOException {
- List<FileStatus> result = new ArrayList<FileStatus>();
-
- // Explode out directories that match the original FileInputFormat filters
- // since HFiles are written to directories where the
- // directory name is the column name
- for (FileStatus status : super.listStatus(job)) {
- if (status.isDirectory()) {
- FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
- for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
- result.add(match);
- }
- } else {
- result.add(status);
- }
- }
- return result;
- }
-
- @Override
- public RecordReader<NullWritable, Cell> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new HFileRecordReader();
- }
-
- @Override
- protected boolean isSplitable(JobContext context, Path filename) {
- // This file isn't splittable.
- return false;
- }
-}
\ No newline at end of file
[17/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
deleted file mode 100644
index b5bb2ec..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
+++ /dev/null
@@ -1,780 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.zookeeper.KeeperException;
-
-
-/**
- * Import data written by {@link Export}.
- */
-@InterfaceAudience.Public
-public class Import extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(Import.class);
- final static String NAME = "import";
- public final static String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
- public final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
- public final static String FILTER_CLASS_CONF_KEY = "import.filter.class";
- public final static String FILTER_ARGS_CONF_KEY = "import.filter.args";
- public final static String TABLE_NAME = "import.table.name";
- public final static String WAL_DURABILITY = "import.wal.durability";
- public final static String HAS_LARGE_RESULT= "import.bulk.hasLargeResult";
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- public static class KeyValueWritableComparablePartitioner
- extends Partitioner<KeyValueWritableComparable, KeyValue> {
- private static KeyValueWritableComparable[] START_KEYS = null;
- @Override
- public int getPartition(KeyValueWritableComparable key, KeyValue value,
- int numPartitions) {
- for (int i = 0; i < START_KEYS.length; ++i) {
- if (key.compareTo(START_KEYS[i]) <= 0) {
- return i;
- }
- }
- return START_KEYS.length;
- }
-
- }
-
- public static class KeyValueWritableComparable
- implements WritableComparable<KeyValueWritableComparable> {
-
- private KeyValue kv = null;
-
- static {
- // register this comparator
- WritableComparator.define(KeyValueWritableComparable.class,
- new KeyValueWritableComparator());
- }
-
- public KeyValueWritableComparable() {
- }
-
- public KeyValueWritableComparable(KeyValue kv) {
- this.kv = kv;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- KeyValue.write(kv, out);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- kv = KeyValue.create(in);
- }
-
- @Override
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
- justification="This is wrong, yes, but we should be purging Writables, not fixing them")
- public int compareTo(KeyValueWritableComparable o) {
- return CellComparator.COMPARATOR.compare(this.kv, ((KeyValueWritableComparable)o).kv);
- }
-
- public static class KeyValueWritableComparator extends WritableComparator {
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- try {
- KeyValueWritableComparable kv1 = new KeyValueWritableComparable();
- kv1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
- KeyValueWritableComparable kv2 = new KeyValueWritableComparable();
- kv2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
- return compare(kv1, kv2);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- }
-
- }
-
- public static class KeyValueReducer
- extends
- Reducer<KeyValueWritableComparable, KeyValue, ImmutableBytesWritable, KeyValue> {
- protected void reduce(
- KeyValueWritableComparable row,
- Iterable<KeyValue> kvs,
- Reducer<KeyValueWritableComparable,
- KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
- throws java.io.IOException, InterruptedException {
- int index = 0;
- for (KeyValue kv : kvs) {
- context.write(new ImmutableBytesWritable(kv.getRowArray()), kv);
- if (++index % 100 == 0)
- context.setStatus("Wrote " + index + " KeyValues, "
- + "and the rowkey whose is being wrote is " + Bytes.toString(kv.getRowArray()));
- }
- }
- }
-
- public static class KeyValueSortImporter
- extends TableMapper<KeyValueWritableComparable, KeyValue> {
- private Map<byte[], byte[]> cfRenameMap;
- private Filter filter;
- private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
-
- /**
- * @param row The current table row key.
- * @param value The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- */
- @Override
- public void map(ImmutableBytesWritable row, Result value,
- Context context)
- throws IOException {
- try {
- if (LOG.isTraceEnabled()) {
- LOG.trace("Considering the row."
- + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
- }
- if (filter == null
- || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
- (short) row.getLength()))) {
- for (Cell kv : value.rawCells()) {
- kv = filterKv(filter, kv);
- // skip if we filtered it out
- if (kv == null) continue;
- // TODO get rid of ensureKeyValue
- KeyValue ret = KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap));
- context.write(new KeyValueWritableComparable(ret.createKeyOnly(false)), ret);
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void setup(Context context) throws IOException {
- cfRenameMap = createCfRenameMap(context.getConfiguration());
- filter = instantiateFilter(context.getConfiguration());
- int reduceNum = context.getNumReduceTasks();
- Configuration conf = context.getConfiguration();
- TableName tableName = TableName.valueOf(context.getConfiguration().get(TABLE_NAME));
- try (Connection conn = ConnectionFactory.createConnection(conf);
- RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
- byte[][] startKeys = regionLocator.getStartKeys();
- if (startKeys.length != reduceNum) {
- throw new IOException("Region split after job initialization");
- }
- KeyValueWritableComparable[] startKeyWraps =
- new KeyValueWritableComparable[startKeys.length - 1];
- for (int i = 1; i < startKeys.length; ++i) {
- startKeyWraps[i - 1] =
- new KeyValueWritableComparable(KeyValueUtil.createFirstOnRow(startKeys[i]));
- }
- KeyValueWritableComparablePartitioner.START_KEYS = startKeyWraps;
- }
- }
- }
-
- /**
- * A mapper that just writes out KeyValues.
- */
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
- justification="Writables are going away and this has been this way forever")
- public static class KeyValueImporter extends TableMapper<ImmutableBytesWritable, KeyValue> {
- private Map<byte[], byte[]> cfRenameMap;
- private Filter filter;
- private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
-
- /**
- * @param row The current table row key.
- * @param value The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- */
- @Override
- public void map(ImmutableBytesWritable row, Result value,
- Context context)
- throws IOException {
- try {
- if (LOG.isTraceEnabled()) {
- LOG.trace("Considering the row."
- + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
- }
- if (filter == null
- || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
- (short) row.getLength()))) {
- for (Cell kv : value.rawCells()) {
- kv = filterKv(filter, kv);
- // skip if we filtered it out
- if (kv == null) continue;
- // TODO get rid of ensureKeyValue
- context.write(row, KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap)));
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void setup(Context context) {
- cfRenameMap = createCfRenameMap(context.getConfiguration());
- filter = instantiateFilter(context.getConfiguration());
- }
- }
-
- /**
- * Write table content out to files in hdfs.
- */
- public static class Importer extends TableMapper<ImmutableBytesWritable, Mutation> {
- private Map<byte[], byte[]> cfRenameMap;
- private List<UUID> clusterIds;
- private Filter filter;
- private Durability durability;
-
- /**
- * @param row The current table row key.
- * @param value The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- */
- @Override
- public void map(ImmutableBytesWritable row, Result value,
- Context context)
- throws IOException {
- try {
- writeResult(row, value, context);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- private void writeResult(ImmutableBytesWritable key, Result result, Context context)
- throws IOException, InterruptedException {
- Put put = null;
- Delete delete = null;
- if (LOG.isTraceEnabled()) {
- LOG.trace("Considering the row."
- + Bytes.toString(key.get(), key.getOffset(), key.getLength()));
- }
- if (filter == null
- || !filter.filterRowKey(CellUtil.createFirstOnRow(key.get(), key.getOffset(),
- (short) key.getLength()))) {
- processKV(key, result, context, put, delete);
- }
- }
-
- protected void processKV(ImmutableBytesWritable key, Result result, Context context, Put put,
- Delete delete) throws IOException, InterruptedException {
- for (Cell kv : result.rawCells()) {
- kv = filterKv(filter, kv);
- // skip if we filter it out
- if (kv == null) continue;
-
- kv = convertKv(kv, cfRenameMap);
- // Deletes and Puts are gathered and written when finished
- /*
- * If there are sequence of mutations and tombstones in an Export, and after Import the same
- * sequence should be restored as it is. If we combine all Delete tombstones into single
- * request then there is chance of ignoring few DeleteFamily tombstones, because if we
- * submit multiple DeleteFamily tombstones in single Delete request then we are maintaining
- * only newest in hbase table and ignoring other. Check - HBASE-12065
- */
- if (CellUtil.isDeleteFamily(kv)) {
- Delete deleteFamily = new Delete(key.get());
- deleteFamily.add(kv);
- if (durability != null) {
- deleteFamily.setDurability(durability);
- }
- deleteFamily.setClusterIds(clusterIds);
- context.write(key, deleteFamily);
- } else if (CellUtil.isDelete(kv)) {
- if (delete == null) {
- delete = new Delete(key.get());
- }
- delete.add(kv);
- } else {
- if (put == null) {
- put = new Put(key.get());
- }
- addPutToKv(put, kv);
- }
- }
- if (put != null) {
- if (durability != null) {
- put.setDurability(durability);
- }
- put.setClusterIds(clusterIds);
- context.write(key, put);
- }
- if (delete != null) {
- if (durability != null) {
- delete.setDurability(durability);
- }
- delete.setClusterIds(clusterIds);
- context.write(key, delete);
- }
- }
-
- protected void addPutToKv(Put put, Cell kv) throws IOException {
- put.add(kv);
- }
-
- @Override
- public void setup(Context context) {
- LOG.info("Setting up " + getClass() + " mapper.");
- Configuration conf = context.getConfiguration();
- cfRenameMap = createCfRenameMap(conf);
- filter = instantiateFilter(conf);
- String durabilityStr = conf.get(WAL_DURABILITY);
- if(durabilityStr != null){
- durability = Durability.valueOf(durabilityStr.toUpperCase(Locale.ROOT));
- LOG.info("setting WAL durability to " + durability);
- } else {
- LOG.info("setting WAL durability to default.");
- }
- // TODO: This is kind of ugly doing setup of ZKW just to read the clusterid.
- ZooKeeperWatcher zkw = null;
- Exception ex = null;
- try {
- zkw = new ZooKeeperWatcher(conf, context.getTaskAttemptID().toString(), null);
- clusterIds = Collections.singletonList(ZKClusterId.getUUIDForCluster(zkw));
- } catch (ZooKeeperConnectionException e) {
- ex = e;
- LOG.error("Problem connecting to ZooKeper during task setup", e);
- } catch (KeeperException e) {
- ex = e;
- LOG.error("Problem reading ZooKeeper data during task setup", e);
- } catch (IOException e) {
- ex = e;
- LOG.error("Problem setting up task", e);
- } finally {
- if (zkw != null) zkw.close();
- }
- if (clusterIds == null) {
- // exit early if setup fails
- throw new RuntimeException(ex);
- }
- }
- }
-
- /**
- * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
- * optionally not include in the job output
- * @param conf {@link Configuration} from which to load the filter
- * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
- * @throws IllegalArgumentException if the filter is misconfigured
- */
- public static Filter instantiateFilter(Configuration conf) {
- // get the filter, if it was configured
- Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
- if (filterClass == null) {
- LOG.debug("No configured filter class, accepting all keyvalues.");
- return null;
- }
- LOG.debug("Attempting to create filter:" + filterClass);
- String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
- ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
- try {
- Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
- return (Filter) m.invoke(null, quotedArgs);
- } catch (IllegalAccessException e) {
- LOG.error("Couldn't instantiate filter!", e);
- throw new RuntimeException(e);
- } catch (SecurityException e) {
- LOG.error("Couldn't instantiate filter!", e);
- throw new RuntimeException(e);
- } catch (NoSuchMethodException e) {
- LOG.error("Couldn't instantiate filter!", e);
- throw new RuntimeException(e);
- } catch (IllegalArgumentException e) {
- LOG.error("Couldn't instantiate filter!", e);
- throw new RuntimeException(e);
- } catch (InvocationTargetException e) {
- LOG.error("Couldn't instantiate filter!", e);
- throw new RuntimeException(e);
- }
- }
-
- private static ArrayList<byte[]> toQuotedByteArrays(String... stringArgs) {
- ArrayList<byte[]> quotedArgs = new ArrayList<>();
- for (String stringArg : stringArgs) {
- // all the filters' instantiation methods expected quoted args since they are coming from
- // the shell, so add them here, though it shouldn't really be needed :-/
- quotedArgs.add(Bytes.toBytes("'" + stringArg + "'"));
- }
- return quotedArgs;
- }
-
- /**
- * Attempt to filter out the keyvalue
- * @param kv {@link KeyValue} on which to apply the filter
- * @return <tt>null</tt> if the key should not be written, otherwise returns the original
- * {@link KeyValue}
- */
- public static Cell filterKv(Filter filter, Cell kv) throws IOException {
- // apply the filter and skip this kv if the filter doesn't apply
- if (filter != null) {
- Filter.ReturnCode code = filter.filterKeyValue(kv);
- if (LOG.isTraceEnabled()) {
- LOG.trace("Filter returned:" + code + " for the key value:" + kv);
- }
- // if its not an accept type, then skip this kv
- if (!(code.equals(Filter.ReturnCode.INCLUDE) || code
- .equals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL))) {
- return null;
- }
- }
- return kv;
- }
-
- // helper: create a new KeyValue based on CF rename map
- private static Cell convertKv(Cell kv, Map<byte[], byte[]> cfRenameMap) {
- if(cfRenameMap != null) {
- // If there's a rename mapping for this CF, create a new KeyValue
- byte[] newCfName = cfRenameMap.get(CellUtil.cloneFamily(kv));
- if(newCfName != null) {
- kv = new KeyValue(kv.getRowArray(), // row buffer
- kv.getRowOffset(), // row offset
- kv.getRowLength(), // row length
- newCfName, // CF buffer
- 0, // CF offset
- newCfName.length, // CF length
- kv.getQualifierArray(), // qualifier buffer
- kv.getQualifierOffset(), // qualifier offset
- kv.getQualifierLength(), // qualifier length
- kv.getTimestamp(), // timestamp
- KeyValue.Type.codeToType(kv.getTypeByte()), // KV Type
- kv.getValueArray(), // value buffer
- kv.getValueOffset(), // value offset
- kv.getValueLength()); // value length
- }
- }
- return kv;
- }
-
- // helper: make a map from sourceCfName to destCfName by parsing a config key
- private static Map<byte[], byte[]> createCfRenameMap(Configuration conf) {
- Map<byte[], byte[]> cfRenameMap = null;
- String allMappingsPropVal = conf.get(CF_RENAME_PROP);
- if(allMappingsPropVal != null) {
- // The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
- String[] allMappings = allMappingsPropVal.split(",");
- for (String mapping: allMappings) {
- if(cfRenameMap == null) {
- cfRenameMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- }
- String [] srcAndDest = mapping.split(":");
- if(srcAndDest.length != 2) {
- continue;
- }
- cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
- }
- }
- return cfRenameMap;
- }
-
- /**
- * <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
- * the mapper how to rename column families.
- *
- * <p>Alternately, instead of calling this function, you could set the configuration key
- * {@link #CF_RENAME_PROP} yourself. The value should look like
- * <pre>srcCf1:destCf1,srcCf2:destCf2,....</pre>. This would have the same effect on
- * the mapper behavior.
- *
- * @param conf the Configuration in which the {@link #CF_RENAME_PROP} key will be
- * set
- * @param renameMap a mapping from source CF names to destination CF names
- */
- static public void configureCfRenaming(Configuration conf,
- Map<String, String> renameMap) {
- StringBuilder sb = new StringBuilder();
- for(Map.Entry<String,String> entry: renameMap.entrySet()) {
- String sourceCf = entry.getKey();
- String destCf = entry.getValue();
-
- if(sourceCf.contains(":") || sourceCf.contains(",") ||
- destCf.contains(":") || destCf.contains(",")) {
- throw new IllegalArgumentException("Illegal character in CF names: "
- + sourceCf + ", " + destCf);
- }
-
- if(sb.length() != 0) {
- sb.append(",");
- }
- sb.append(sourceCf + ":" + destCf);
- }
- conf.set(CF_RENAME_PROP, sb.toString());
- }
-
- /**
- * Add a Filter to be instantiated on import
- * @param conf Configuration to update (will be passed to the job)
- * @param clazz {@link Filter} subclass to instantiate on the server.
- * @param filterArgs List of arguments to pass to the filter on instantiation
- */
- public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz,
- List<String> filterArgs) throws IOException {
- conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName());
- conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()]));
- }
-
- /**
- * Sets up the actual job.
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public static Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException {
- TableName tableName = TableName.valueOf(args[0]);
- conf.set(TABLE_NAME, tableName.getNameAsString());
- Path inputDir = new Path(args[1]);
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJarByClass(Importer.class);
- FileInputFormat.setInputPaths(job, inputDir);
- job.setInputFormatClass(SequenceFileInputFormat.class);
- String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
-
- // make sure we get the filter in the jars
- try {
- Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
- if (filter != null) {
- TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
- }
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
- LOG.info("Use Large Result!!");
- try (Connection conn = ConnectionFactory.createConnection(conf);
- Table table = conn.getTable(tableName);
- RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
- HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
- job.setMapperClass(KeyValueSortImporter.class);
- job.setReducerClass(KeyValueReducer.class);
- Path outputDir = new Path(hfileOutPath);
- FileOutputFormat.setOutputPath(job, outputDir);
- job.setMapOutputKeyClass(KeyValueWritableComparable.class);
- job.setMapOutputValueClass(KeyValue.class);
- job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class",
- KeyValueWritableComparable.KeyValueWritableComparator.class,
- RawComparator.class);
- Path partitionsPath =
- new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
- FileSystem fs = FileSystem.get(job.getConfiguration());
- fs.deleteOnExit(partitionsPath);
- job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
- job.setNumReduceTasks(regionLocator.getStartKeys().length);
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
- }
- } else if (hfileOutPath != null) {
- LOG.info("writing to hfiles for bulk load.");
- job.setMapperClass(KeyValueImporter.class);
- try (Connection conn = ConnectionFactory.createConnection(conf);
- Table table = conn.getTable(tableName);
- RegionLocator regionLocator = conn.getRegionLocator(tableName)){
- job.setReducerClass(KeyValueSortReducer.class);
- Path outputDir = new Path(hfileOutPath);
- FileOutputFormat.setOutputPath(job, outputDir);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
- job.setMapOutputValueClass(KeyValue.class);
- HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
- }
- } else {
- LOG.info("writing directly to table from Mapper.");
- // No reducers. Just write straight to table. Call initTableReducerJob
- // because it sets up the TableOutputFormat.
- job.setMapperClass(Importer.class);
- TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
- job.setNumReduceTasks(0);
- }
- return job;
- }
-
- /*
- * @param errorMsg Error message. Can be null.
- */
- private static void usage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- System.err.println("Usage: Import [options] <tablename> <inputdir>");
- System.err.println("By default Import will load data directly into HBase. To instead generate");
- System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
- System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
- System.err.println("If there is a large result that includes too much KeyValue "
- + "whitch can occur OOME caused by the memery sort in reducer, pass the option:");
- System.err.println(" -D" + HAS_LARGE_RESULT + "=true");
- System.err
- .println(" To apply a generic org.apache.hadoop.hbase.filter.Filter to the input, use");
- System.err.println(" -D" + FILTER_CLASS_CONF_KEY + "=<name of filter class>");
- System.err.println(" -D" + FILTER_ARGS_CONF_KEY + "=<comma separated list of args for filter");
- System.err.println(" NOTE: The filter will be applied BEFORE doing key renames via the "
- + CF_RENAME_PROP + " property. Futher, filters will only use the"
- + " Filter#filterRowKey(byte[] buffer, int offset, int length) method to identify "
- + " whether the current row needs to be ignored completely for processing and "
- + " Filter#filterKeyValue(KeyValue) method to determine if the KeyValue should be added;"
- + " Filter.ReturnCode#INCLUDE and #INCLUDE_AND_NEXT_COL will be considered as including"
- + " the KeyValue.");
- System.err.println("To import data exported from HBase 0.94, use");
- System.err.println(" -Dhbase.import.version=0.94");
- System.err.println(" -D " + JOB_NAME_CONF_KEY
- + "=jobName - use the specified mapreduce job name for the import");
- System.err.println("For performance consider the following options:\n"
- + " -Dmapreduce.map.speculative=false\n"
- + " -Dmapreduce.reduce.speculative=false\n"
- + " -D" + WAL_DURABILITY + "=<Used while writing data to hbase."
- +" Allowed values are the supported durability values"
- +" like SKIP_WAL/ASYNC_WAL/SYNC_WAL/...>");
- }
-
- /**
- * If the durability is set to {@link Durability#SKIP_WAL} and the data is imported to hbase, we
- * need to flush all the regions of the table as the data is held in memory and is also not
- * present in the Write Ahead Log to replay in scenarios of a crash. This method flushes all the
- * regions of the table in the scenarios of import data to hbase with {@link Durability#SKIP_WAL}
- */
- public static void flushRegionsIfNecessary(Configuration conf) throws IOException,
- InterruptedException {
- String tableName = conf.get(TABLE_NAME);
- Admin hAdmin = null;
- Connection connection = null;
- String durability = conf.get(WAL_DURABILITY);
- // Need to flush if the data is written to hbase and skip wal is enabled.
- if (conf.get(BULK_OUTPUT_CONF_KEY) == null && durability != null
- && Durability.SKIP_WAL.name().equalsIgnoreCase(durability)) {
- LOG.info("Flushing all data that skipped the WAL.");
- try {
- connection = ConnectionFactory.createConnection(conf);
- hAdmin = connection.getAdmin();
- hAdmin.flush(TableName.valueOf(tableName));
- } finally {
- if (hAdmin != null) {
- hAdmin.close();
- }
- if (connection != null) {
- connection.close();
- }
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 2) {
- usage("Wrong number of arguments: " + args.length);
- return -1;
- }
- String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER);
- if (inputVersionString != null) {
- getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString);
- }
- Job job = createSubmittableJob(getConf(), args);
- boolean isJobSuccessful = job.waitForCompletion(true);
- if(isJobSuccessful){
- // Flush all the regions of the table
- flushRegionsIfNecessary(getConf());
- }
- long inputRecords = job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
- long outputRecords = job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue();
- if (outputRecords < inputRecords) {
- System.err.println("Warning, not all records were imported (maybe filtered out).");
- if (outputRecords == 0) {
- System.err.println("If the data was exported from HBase 0.94 "+
- "consider using -Dhbase.import.version=0.94.");
- }
- }
-
- return (isJobSuccessful ? 0 : 1);
- }
-
- /**
- * Main entry point.
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int errCode = ToolRunner.run(HBaseConfiguration.create(), new Import(), args);
- System.exit(errCode);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
deleted file mode 100644
index b64271e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
+++ /dev/null
@@ -1,793 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static java.lang.String.format;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-/**
- * Tool to import data from a TSV file.
- *
- * This tool is rather simplistic - it doesn't do any quoting or
- * escaping, but is useful for many data loads.
- *
- * @see ImportTsv#usage(String)
- */
-@InterfaceAudience.Public
-public class ImportTsv extends Configured implements Tool {
-
- protected static final Log LOG = LogFactory.getLog(ImportTsv.class);
-
- final static String NAME = "importtsv";
-
- public final static String MAPPER_CONF_KEY = "importtsv.mapper.class";
- public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output";
- public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp";
- public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
- // TODO: the rest of these configs are used exclusively by TsvImporterMapper.
- // Move them out of the tool and let the mapper handle its own validation.
- public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run";
- // If true, bad lines are logged to stderr. Default: false.
- public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines";
- public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines";
- public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns";
- public final static String COLUMNS_CONF_KEY = "importtsv.columns";
- public final static String SEPARATOR_CONF_KEY = "importtsv.separator";
- public final static String ATTRIBUTE_SEPERATOR_CONF_KEY = "attributes.seperator";
- //This config is used to propagate credentials from parent MR jobs which launch
- //ImportTSV jobs. SEE IntegrationTestImportTsv.
- public final static String CREDENTIALS_LOCATION = "credentials_location";
- final static String DEFAULT_SEPARATOR = "\t";
- final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>";
- final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";
- final static Class DEFAULT_MAPPER = TsvImporterMapper.class;
- public final static String CREATE_TABLE_CONF_KEY = "create.table";
- public final static String NO_STRICT_COL_FAMILY = "no.strict";
- /**
- * If table didn't exist and was created in dry-run mode, this flag is
- * flipped to delete it when MR ends.
- */
- private static boolean DRY_RUN_TABLE_CREATED;
-
- public static class TsvParser {
- /**
- * Column families and qualifiers mapped to the TSV columns
- */
- private final byte[][] families;
- private final byte[][] qualifiers;
-
- private final byte separatorByte;
-
- private int rowKeyColumnIndex;
-
- private int maxColumnCount;
-
- // Default value must be negative
- public static final int DEFAULT_TIMESTAMP_COLUMN_INDEX = -1;
-
- private int timestampKeyColumnIndex = DEFAULT_TIMESTAMP_COLUMN_INDEX;
-
- public static final String ROWKEY_COLUMN_SPEC = "HBASE_ROW_KEY";
-
- public static final String TIMESTAMPKEY_COLUMN_SPEC = "HBASE_TS_KEY";
-
- public static final String ATTRIBUTES_COLUMN_SPEC = "HBASE_ATTRIBUTES_KEY";
-
- public static final String CELL_VISIBILITY_COLUMN_SPEC = "HBASE_CELL_VISIBILITY";
-
- public static final String CELL_TTL_COLUMN_SPEC = "HBASE_CELL_TTL";
-
- private int attrKeyColumnIndex = DEFAULT_ATTRIBUTES_COLUMN_INDEX;
-
- public static final int DEFAULT_ATTRIBUTES_COLUMN_INDEX = -1;
-
- public static final int DEFAULT_CELL_VISIBILITY_COLUMN_INDEX = -1;
-
- public static final int DEFAULT_CELL_TTL_COLUMN_INDEX = -1;
-
- private int cellVisibilityColumnIndex = DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
-
- private int cellTTLColumnIndex = DEFAULT_CELL_TTL_COLUMN_INDEX;
-
- /**
- * @param columnsSpecification the list of columns to parser out, comma separated.
- * The row key should be the special token TsvParser.ROWKEY_COLUMN_SPEC
- * @param separatorStr
- */
- public TsvParser(String columnsSpecification, String separatorStr) {
- // Configure separator
- byte[] separator = Bytes.toBytes(separatorStr);
- Preconditions.checkArgument(separator.length == 1,
- "TsvParser only supports single-byte separators");
- separatorByte = separator[0];
-
- // Configure columns
- ArrayList<String> columnStrings = Lists.newArrayList(
- Splitter.on(',').trimResults().split(columnsSpecification));
-
- maxColumnCount = columnStrings.size();
- families = new byte[maxColumnCount][];
- qualifiers = new byte[maxColumnCount][];
-
- for (int i = 0; i < columnStrings.size(); i++) {
- String str = columnStrings.get(i);
- if (ROWKEY_COLUMN_SPEC.equals(str)) {
- rowKeyColumnIndex = i;
- continue;
- }
- if (TIMESTAMPKEY_COLUMN_SPEC.equals(str)) {
- timestampKeyColumnIndex = i;
- continue;
- }
- if (ATTRIBUTES_COLUMN_SPEC.equals(str)) {
- attrKeyColumnIndex = i;
- continue;
- }
- if (CELL_VISIBILITY_COLUMN_SPEC.equals(str)) {
- cellVisibilityColumnIndex = i;
- continue;
- }
- if (CELL_TTL_COLUMN_SPEC.equals(str)) {
- cellTTLColumnIndex = i;
- continue;
- }
- String[] parts = str.split(":", 2);
- if (parts.length == 1) {
- families[i] = str.getBytes();
- qualifiers[i] = HConstants.EMPTY_BYTE_ARRAY;
- } else {
- families[i] = parts[0].getBytes();
- qualifiers[i] = parts[1].getBytes();
- }
- }
- }
-
- public boolean hasTimestamp() {
- return timestampKeyColumnIndex != DEFAULT_TIMESTAMP_COLUMN_INDEX;
- }
-
- public int getTimestampKeyColumnIndex() {
- return timestampKeyColumnIndex;
- }
-
- public boolean hasAttributes() {
- return attrKeyColumnIndex != DEFAULT_ATTRIBUTES_COLUMN_INDEX;
- }
-
- public boolean hasCellVisibility() {
- return cellVisibilityColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
- }
-
- public boolean hasCellTTL() {
- return cellTTLColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
- }
-
- public int getAttributesKeyColumnIndex() {
- return attrKeyColumnIndex;
- }
-
- public int getCellVisibilityColumnIndex() {
- return cellVisibilityColumnIndex;
- }
-
- public int getCellTTLColumnIndex() {
- return cellTTLColumnIndex;
- }
-
- public int getRowKeyColumnIndex() {
- return rowKeyColumnIndex;
- }
-
- public byte[] getFamily(int idx) {
- return families[idx];
- }
- public byte[] getQualifier(int idx) {
- return qualifiers[idx];
- }
-
- public ParsedLine parse(byte[] lineBytes, int length)
- throws BadTsvLineException {
- // Enumerate separator offsets
- ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount);
- for (int i = 0; i < length; i++) {
- if (lineBytes[i] == separatorByte) {
- tabOffsets.add(i);
- }
- }
- if (tabOffsets.isEmpty()) {
- throw new BadTsvLineException("No delimiter");
- }
-
- tabOffsets.add(length);
-
- if (tabOffsets.size() > maxColumnCount) {
- throw new BadTsvLineException("Excessive columns");
- } else if (tabOffsets.size() <= getRowKeyColumnIndex()) {
- throw new BadTsvLineException("No row key");
- } else if (hasTimestamp()
- && tabOffsets.size() <= getTimestampKeyColumnIndex()) {
- throw new BadTsvLineException("No timestamp");
- } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) {
- throw new BadTsvLineException("No attributes specified");
- } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) {
- throw new BadTsvLineException("No cell visibility specified");
- } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) {
- throw new BadTsvLineException("No cell TTL specified");
- }
- return new ParsedLine(tabOffsets, lineBytes);
- }
-
- class ParsedLine {
- private final ArrayList<Integer> tabOffsets;
- private byte[] lineBytes;
-
- ParsedLine(ArrayList<Integer> tabOffsets, byte[] lineBytes) {
- this.tabOffsets = tabOffsets;
- this.lineBytes = lineBytes;
- }
-
- public int getRowKeyOffset() {
- return getColumnOffset(rowKeyColumnIndex);
- }
- public int getRowKeyLength() {
- return getColumnLength(rowKeyColumnIndex);
- }
-
- public long getTimestamp(long ts) throws BadTsvLineException {
- // Return ts if HBASE_TS_KEY is not configured in column spec
- if (!hasTimestamp()) {
- return ts;
- }
-
- String timeStampStr = Bytes.toString(lineBytes,
- getColumnOffset(timestampKeyColumnIndex),
- getColumnLength(timestampKeyColumnIndex));
- try {
- return Long.parseLong(timeStampStr);
- } catch (NumberFormatException nfe) {
- // treat this record as bad record
- throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
- }
- }
-
- private String getAttributes() {
- if (!hasAttributes()) {
- return null;
- } else {
- return Bytes.toString(lineBytes, getColumnOffset(attrKeyColumnIndex),
- getColumnLength(attrKeyColumnIndex));
- }
- }
-
- public String[] getIndividualAttributes() {
- String attributes = getAttributes();
- if (attributes != null) {
- return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR);
- } else {
- return null;
- }
- }
-
- public int getAttributeKeyOffset() {
- if (hasAttributes()) {
- return getColumnOffset(attrKeyColumnIndex);
- } else {
- return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
- }
- }
-
- public int getAttributeKeyLength() {
- if (hasAttributes()) {
- return getColumnLength(attrKeyColumnIndex);
- } else {
- return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
- }
- }
-
- public int getCellVisibilityColumnOffset() {
- if (hasCellVisibility()) {
- return getColumnOffset(cellVisibilityColumnIndex);
- } else {
- return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
- }
- }
-
- public int getCellVisibilityColumnLength() {
- if (hasCellVisibility()) {
- return getColumnLength(cellVisibilityColumnIndex);
- } else {
- return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
- }
- }
-
- public String getCellVisibility() {
- if (!hasCellVisibility()) {
- return null;
- } else {
- return Bytes.toString(lineBytes, getColumnOffset(cellVisibilityColumnIndex),
- getColumnLength(cellVisibilityColumnIndex));
- }
- }
-
- public int getCellTTLColumnOffset() {
- if (hasCellTTL()) {
- return getColumnOffset(cellTTLColumnIndex);
- } else {
- return DEFAULT_CELL_TTL_COLUMN_INDEX;
- }
- }
-
- public int getCellTTLColumnLength() {
- if (hasCellTTL()) {
- return getColumnLength(cellTTLColumnIndex);
- } else {
- return DEFAULT_CELL_TTL_COLUMN_INDEX;
- }
- }
-
- public long getCellTTL() {
- if (!hasCellTTL()) {
- return 0;
- } else {
- return Bytes.toLong(lineBytes, getColumnOffset(cellTTLColumnIndex),
- getColumnLength(cellTTLColumnIndex));
- }
- }
-
- public int getColumnOffset(int idx) {
- if (idx > 0)
- return tabOffsets.get(idx - 1) + 1;
- else
- return 0;
- }
- public int getColumnLength(int idx) {
- return tabOffsets.get(idx) - getColumnOffset(idx);
- }
- public int getColumnCount() {
- return tabOffsets.size();
- }
- public byte[] getLineBytes() {
- return lineBytes;
- }
- }
-
- public static class BadTsvLineException extends Exception {
- public BadTsvLineException(String err) {
- super(err);
- }
- private static final long serialVersionUID = 1L;
- }
-
- /**
- * Return starting position and length of row key from the specified line bytes.
- * @param lineBytes
- * @param length
- * @return Pair of row key offset and length.
- * @throws BadTsvLineException
- */
- public Pair<Integer, Integer> parseRowKey(byte[] lineBytes, int length)
- throws BadTsvLineException {
- int rkColumnIndex = 0;
- int startPos = 0, endPos = 0;
- for (int i = 0; i <= length; i++) {
- if (i == length || lineBytes[i] == separatorByte) {
- endPos = i - 1;
- if (rkColumnIndex++ == getRowKeyColumnIndex()) {
- if ((endPos + 1) == startPos) {
- throw new BadTsvLineException("Empty value for ROW KEY.");
- }
- break;
- } else {
- startPos = endPos + 2;
- }
- }
- if (i == length) {
- throw new BadTsvLineException(
- "Row key does not exist as number of columns in the line"
- + " are less than row key position.");
- }
- }
- return new Pair<>(startPos, endPos - startPos + 1);
- }
- }
-
- /**
- * Sets up the actual job.
- *
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- protected static Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException, ClassNotFoundException {
- Job job = null;
- boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
- try (Connection connection = ConnectionFactory.createConnection(conf)) {
- try (Admin admin = connection.getAdmin()) {
- // Support non-XML supported characters
- // by re-encoding the passed separator as a Base64 string.
- String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
- if (actualSeparator != null) {
- conf.set(SEPARATOR_CONF_KEY,
- Base64.encodeBytes(actualSeparator.getBytes()));
- }
-
- // See if a non-default Mapper was set
- String mapperClassName = conf.get(MAPPER_CONF_KEY);
- Class mapperClass = mapperClassName != null? Class.forName(mapperClassName): DEFAULT_MAPPER;
-
- TableName tableName = TableName.valueOf(args[0]);
- Path inputDir = new Path(args[1]);
- String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
- job = Job.getInstance(conf, jobName);
- job.setJarByClass(mapperClass);
- FileInputFormat.setInputPaths(job, inputDir);
- job.setInputFormatClass(TextInputFormat.class);
- job.setMapperClass(mapperClass);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
- String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
- String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
- if(StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
- String fileLoc = conf.get(CREDENTIALS_LOCATION);
- Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
- job.getCredentials().addAll(cred);
- }
-
- if (hfileOutPath != null) {
- if (!admin.tableExists(tableName)) {
- LOG.warn(format("Table '%s' does not exist.", tableName));
- if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
- // TODO: this is backwards. Instead of depending on the existence of a table,
- // create a sane splits file for HFileOutputFormat based on data sampling.
- createTable(admin, tableName, columns);
- if (isDryRun) {
- LOG.warn("Dry run: Table will be deleted at end of dry run.");
- synchronized (ImportTsv.class) {
- DRY_RUN_TABLE_CREATED = true;
- }
- }
- } else {
- String errorMsg =
- format("Table '%s' does not exist and '%s' is set to no.", tableName,
- CREATE_TABLE_CONF_KEY);
- LOG.error(errorMsg);
- throw new TableNotFoundException(errorMsg);
- }
- }
- try (Table table = connection.getTable(tableName);
- RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
- boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
- // if no.strict is false then check column family
- if(!noStrict) {
- ArrayList<String> unmatchedFamilies = new ArrayList<>();
- Set<String> cfSet = getColumnFamilies(columns);
- TableDescriptor tDesc = table.getDescriptor();
- for (String cf : cfSet) {
- if(!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
- unmatchedFamilies.add(cf);
- }
- }
- if(unmatchedFamilies.size() > 0) {
- ArrayList<String> familyNames = new ArrayList<>();
- for (ColumnFamilyDescriptor family : table.getDescriptor().getColumnFamilies()) {
- familyNames.add(family.getNameAsString());
- }
- String msg =
- "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY
- + " does not match with any of the table " + tableName
- + " column families " + familyNames + ".\n"
- + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
- + "=true.\n";
- usage(msg);
- System.exit(-1);
- }
- }
- if (mapperClass.equals(TsvImporterTextMapper.class)) {
- job.setMapOutputValueClass(Text.class);
- job.setReducerClass(TextSortReducer.class);
- } else {
- job.setMapOutputValueClass(Put.class);
- job.setCombinerClass(PutCombiner.class);
- job.setReducerClass(PutSortReducer.class);
- }
- if (!isDryRun) {
- Path outputDir = new Path(hfileOutPath);
- FileOutputFormat.setOutputPath(job, outputDir);
- HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
- regionLocator);
- }
- }
- } else {
- if (!admin.tableExists(tableName)) {
- String errorMsg = format("Table '%s' does not exist.", tableName);
- LOG.error(errorMsg);
- throw new TableNotFoundException(errorMsg);
- }
- if (mapperClass.equals(TsvImporterTextMapper.class)) {
- usage(TsvImporterTextMapper.class.toString()
- + " should not be used for non bulkloading case. use "
- + TsvImporterMapper.class.toString()
- + " or custom mapper whose value type is Put.");
- System.exit(-1);
- }
- if (!isDryRun) {
- // No reducers. Just write straight to table. Call initTableReducerJob
- // to set up the TableOutputFormat.
- TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
- }
- job.setNumReduceTasks(0);
- }
- if (isDryRun) {
- job.setOutputFormatClass(NullOutputFormat.class);
- job.getConfiguration().setStrings("io.serializations",
- job.getConfiguration().get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName(),
- KeyValueSerialization.class.getName());
- }
- TableMapReduceUtil.addDependencyJars(job);
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- org.apache.hadoop.hbase.shaded.com.google.common.base.Function.class /* Guava used by TsvParser */);
- }
- }
- return job;
- }
-
- private static void createTable(Admin admin, TableName tableName, String[] columns)
- throws IOException {
- HTableDescriptor htd = new HTableDescriptor(tableName);
- Set<String> cfSet = getColumnFamilies(columns);
- for (String cf : cfSet) {
- HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
- htd.addFamily(hcd);
- }
- LOG.warn(format("Creating table '%s' with '%s' columns and default descriptors.",
- tableName, cfSet));
- admin.createTable(htd);
- }
-
- private static void deleteTable(Configuration conf, String[] args) {
- TableName tableName = TableName.valueOf(args[0]);
- try (Connection connection = ConnectionFactory.createConnection(conf);
- Admin admin = connection.getAdmin()) {
- try {
- admin.disableTable(tableName);
- } catch (TableNotEnabledException e) {
- LOG.debug("Dry mode: Table: " + tableName + " already disabled, so just deleting it.");
- }
- admin.deleteTable(tableName);
- } catch (IOException e) {
- LOG.error(format("***Dry run: Failed to delete table '%s'.***%n%s", tableName,
- e.toString()));
- return;
- }
- LOG.info(format("Dry run: Deleted table '%s'.", tableName));
- }
-
- private static Set<String> getColumnFamilies(String[] columns) {
- Set<String> cfSet = new HashSet<>();
- for (String aColumn : columns) {
- if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)
- || TsvParser.TIMESTAMPKEY_COLUMN_SPEC.equals(aColumn)
- || TsvParser.CELL_VISIBILITY_COLUMN_SPEC.equals(aColumn)
- || TsvParser.CELL_TTL_COLUMN_SPEC.equals(aColumn)
- || TsvParser.ATTRIBUTES_COLUMN_SPEC.equals(aColumn))
- continue;
- // we are only concerned with the first one (in case this is a cf:cq)
- cfSet.add(aColumn.split(":", 2)[0]);
- }
- return cfSet;
- }
-
- /*
- * @param errorMsg Error message. Can be null.
- */
- private static void usage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- String usage =
- "Usage: " + NAME + " -D"+ COLUMNS_CONF_KEY + "=a,b,c <tablename> <inputdir>\n" +
- "\n" +
- "Imports the given input directory of TSV data into the specified table.\n" +
- "\n" +
- "The column names of the TSV data must be specified using the -D" + COLUMNS_CONF_KEY + "\n" +
- "option. This option takes the form of comma-separated column names, where each\n" +
- "column name is either a simple column family, or a columnfamily:qualifier. The special\n" +
- "column name " + TsvParser.ROWKEY_COLUMN_SPEC + " is used to designate that this column should be used\n" +
- "as the row key for each imported record. You must specify exactly one column\n" +
- "to be the row key, and you must specify a column name for every column that exists in the\n" +
- "input data. Another special column" + TsvParser.TIMESTAMPKEY_COLUMN_SPEC +
- " designates that this column should be\n" +
- "used as timestamp for each record. Unlike " + TsvParser.ROWKEY_COLUMN_SPEC + ", " +
- TsvParser.TIMESTAMPKEY_COLUMN_SPEC + " is optional." + "\n" +
- "You must specify at most one column as timestamp key for each imported record.\n" +
- "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n" +
- "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n" +
- "\n" +
- "Other special columns that can be specified are " + TsvParser.CELL_TTL_COLUMN_SPEC +
- " and " + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + ".\n" +
- TsvParser.CELL_TTL_COLUMN_SPEC + " designates that this column will be used " +
- "as a Cell's Time To Live (TTL) attribute.\n" +
- TsvParser.CELL_VISIBILITY_COLUMN_SPEC + " designates that this column contains the " +
- "visibility label expression.\n" +
- "\n" +
- TsvParser.ATTRIBUTES_COLUMN_SPEC+" can be used to specify Operation Attributes per record.\n"+
- " Should be specified as key=>value where "+TsvParser.DEFAULT_ATTRIBUTES_COLUMN_INDEX+ " is used \n"+
- " as the seperator. Note that more than one OperationAttributes can be specified.\n"+
- "By default importtsv will load data directly into HBase. To instead generate\n" +
- "HFiles of data to prepare for a bulk data load, pass the option:\n" +
- " -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output\n" +
- " Note: if you do not use this option, then the target table must already exist in HBase\n" +
- "\n" +
- "Other options that may be specified with -D include:\n" +
- " -D" + DRY_RUN_CONF_KEY + "=true - Dry run mode. Data is not actually populated into" +
- " table. If table does not exist, it is created but deleted in the end.\n" +
- " -D" + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" +
- " -D" + LOG_BAD_LINES_CONF_KEY + "=true - logs invalid lines to stderr\n" +
- " -D" + SKIP_EMPTY_COLUMNS + "=false - If true then skip empty columns in bulk import\n" +
- " '-D" + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" +
- " -D" + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n" +
- " -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " +
- DEFAULT_MAPPER.getName() + "\n" +
- " -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" +
- " -D" + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n" +
- " Note: if you set this to 'no', then the target table must already exist in HBase\n" +
- " -D" + NO_STRICT_COL_FAMILY + "=true - ignore column family check in hbase table. " +
- "Default is false\n\n" +
- "For performance consider the following options:\n" +
- " -Dmapreduce.map.speculative=false\n" +
- " -Dmapreduce.reduce.speculative=false";
-
- System.err.println(usage);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 2) {
- usage("Wrong number of arguments: " + args.length);
- return -1;
- }
-
- // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
- // perform validation on these additional args. When it's not null, user has provided their
- // own mapper, thus these validation are not relevant.
- // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
- if (null == getConf().get(MAPPER_CONF_KEY)) {
- // Make sure columns are specified
- String[] columns = getConf().getStrings(COLUMNS_CONF_KEY);
- if (columns == null) {
- usage("No columns specified. Please specify with -D" +
- COLUMNS_CONF_KEY+"=...");
- return -1;
- }
-
- // Make sure they specify exactly one column as the row key
- int rowkeysFound = 0;
- for (String col : columns) {
- if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++;
- }
- if (rowkeysFound != 1) {
- usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
- return -1;
- }
-
- // Make sure we have at most one column as the timestamp key
- int tskeysFound = 0;
- for (String col : columns) {
- if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
- tskeysFound++;
- }
- if (tskeysFound > 1) {
- usage("Must specify at most one column as "
- + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
- return -1;
- }
-
- int attrKeysFound = 0;
- for (String col : columns) {
- if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
- attrKeysFound++;
- }
- if (attrKeysFound > 1) {
- usage("Must specify at most one column as "
- + TsvParser.ATTRIBUTES_COLUMN_SPEC);
- return -1;
- }
-
- // Make sure one or more columns are specified excluding rowkey and
- // timestamp key
- if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
- usage("One or more columns in addition to the row key and timestamp(optional) are required");
- return -1;
- }
- }
-
- // If timestamp option is not specified, use current system time.
- long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());
-
- // Set it back to replace invalid timestamp (non-numeric) with current
- // system time
- getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);
-
- synchronized (ImportTsv.class) {
- DRY_RUN_TABLE_CREATED = false;
- }
- Job job = createSubmittableJob(getConf(), args);
- boolean success = job.waitForCompletion(true);
- boolean delete = false;
- synchronized (ImportTsv.class) {
- delete = DRY_RUN_TABLE_CREATED;
- }
- if (delete) {
- deleteTable(getConf(), args);
- }
- return success ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- int status = ToolRunner.run(HBaseConfiguration.create(), new ImportTsv(), args);
- System.exit(status);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
deleted file mode 100644
index 953df62..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.text.MessageFormat;
-import java.util.Enumeration;
-import java.util.jar.JarFile;
-import java.util.jar.JarOutputStream;
-import java.util.jar.Manifest;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
-
-/**
- * Finds the Jar for a class. If the class is in a directory in the
- * classpath, it creates a Jar on the fly with the contents of the directory
- * and returns the path to that Jar. If a Jar is created, it is created in
- * the system temporary directory.
- *
- * This file was forked from hadoop/common/branches/branch-2@1377176.
- */
-public class JarFinder {
-
- private static void copyToZipStream(File file, ZipEntry entry,
- ZipOutputStream zos) throws IOException {
- InputStream is = new FileInputStream(file);
- try {
- zos.putNextEntry(entry);
- byte[] arr = new byte[4096];
- int read = is.read(arr);
- while (read > -1) {
- zos.write(arr, 0, read);
- read = is.read(arr);
- }
- } finally {
- try {
- is.close();
- } finally {
- zos.closeEntry();
- }
- }
- }
-
- public static void jarDir(File dir, String relativePath, ZipOutputStream zos)
- throws IOException {
- Preconditions.checkNotNull(relativePath, "relativePath");
- Preconditions.checkNotNull(zos, "zos");
-
- // by JAR spec, if there is a manifest, it must be the first entry in the
- // ZIP.
- File manifestFile = new File(dir, JarFile.MANIFEST_NAME);
- ZipEntry manifestEntry = new ZipEntry(JarFile.MANIFEST_NAME);
- if (!manifestFile.exists()) {
- zos.putNextEntry(manifestEntry);
- new Manifest().write(new BufferedOutputStream(zos));
- zos.closeEntry();
- } else {
- copyToZipStream(manifestFile, manifestEntry, zos);
- }
- zos.closeEntry();
- zipDir(dir, relativePath, zos, true);
- zos.close();
- }
-
- private static void zipDir(File dir, String relativePath, ZipOutputStream zos,
- boolean start) throws IOException {
- String[] dirList = dir.list();
- if (dirList == null) {
- return;
- }
- for (String aDirList : dirList) {
- File f = new File(dir, aDirList);
- if (!f.isHidden()) {
- if (f.isDirectory()) {
- if (!start) {
- ZipEntry dirEntry = new ZipEntry(relativePath + f.getName() + "/");
- zos.putNextEntry(dirEntry);
- zos.closeEntry();
- }
- String filePath = f.getPath();
- File file = new File(filePath);
- zipDir(file, relativePath + f.getName() + "/", zos, false);
- }
- else {
- String path = relativePath + f.getName();
- if (!path.equals(JarFile.MANIFEST_NAME)) {
- ZipEntry anEntry = new ZipEntry(path);
- copyToZipStream(f, anEntry, zos);
- }
- }
- }
- }
- }
-
- private static void createJar(File dir, File jarFile) throws IOException {
- Preconditions.checkNotNull(dir, "dir");
- Preconditions.checkNotNull(jarFile, "jarFile");
- File jarDir = jarFile.getParentFile();
- if (!jarDir.exists()) {
- if (!jarDir.mkdirs()) {
- throw new IOException(MessageFormat.format("could not create dir [{0}]",
- jarDir));
- }
- }
- try (FileOutputStream fos = new FileOutputStream(jarFile);
- JarOutputStream jos = new JarOutputStream(fos)) {
- jarDir(dir, "", jos);
- }
- }
-
- /**
- * Returns the full path to the Jar containing the class. It always return a
- * JAR.
- *
- * @param klass class.
- *
- * @return path to the Jar containing the class.
- */
- public static String getJar(Class klass) {
- Preconditions.checkNotNull(klass, "klass");
- ClassLoader loader = klass.getClassLoader();
- if (loader != null) {
- String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
- try {
- for (Enumeration itr = loader.getResources(class_file);
- itr.hasMoreElements(); ) {
- URL url = (URL) itr.nextElement();
- String path = url.getPath();
- if (path.startsWith("file:")) {
- path = path.substring("file:".length());
- }
- path = URLDecoder.decode(path, "UTF-8");
- if ("jar".equals(url.getProtocol())) {
- path = URLDecoder.decode(path, "UTF-8");
- return path.replaceAll("!.*$", "");
- }
- else if ("file".equals(url.getProtocol())) {
- String klassName = klass.getName();
- klassName = klassName.replace(".", "/") + ".class";
- path = path.substring(0, path.length() - klassName.length());
- File baseDir = new File(path);
- File testDir = new File(System.getProperty("test.build.dir", "target/test-dir"));
- testDir = testDir.getAbsoluteFile();
- if (!testDir.exists()) {
- testDir.mkdirs();
- }
- File tempJar = File.createTempFile("hadoop-", "", testDir);
- tempJar = new File(tempJar.getAbsolutePath() + ".jar");
- tempJar.deleteOnExit();
- createJar(baseDir, tempJar);
- return tempJar.getAbsolutePath();
- }
- }
- }
- catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- return null;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
deleted file mode 100644
index 241608b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class KeyValueSerialization implements Serialization<KeyValue> {
- @Override
- public boolean accept(Class<?> c) {
- return KeyValue.class.isAssignableFrom(c);
- }
-
- @Override
- public KeyValueDeserializer getDeserializer(Class<KeyValue> t) {
- return new KeyValueDeserializer();
- }
-
- @Override
- public KeyValueSerializer getSerializer(Class<KeyValue> c) {
- return new KeyValueSerializer();
- }
-
- public static class KeyValueDeserializer implements Deserializer<KeyValue> {
- private DataInputStream dis;
-
- @Override
- public void close() throws IOException {
- this.dis.close();
- }
-
- @Override
- public KeyValue deserialize(KeyValue ignore) throws IOException {
- // I can't overwrite the passed in KV, not from a proto kv, not just yet. TODO
- return KeyValueUtil.create(this.dis);
- }
-
- @Override
- public void open(InputStream is) throws IOException {
- this.dis = new DataInputStream(is);
- }
- }
-
- public static class KeyValueSerializer implements Serializer<KeyValue> {
- private DataOutputStream dos;
-
- @Override
- public void close() throws IOException {
- this.dos.close();
- }
-
- @Override
- public void open(OutputStream os) throws IOException {
- this.dos = new DataOutputStream(os);
- }
-
- @Override
- public void serialize(KeyValue kv) throws IOException {
- KeyValueUtil.write(kv, this.dos);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
deleted file mode 100644
index 5c7ace2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.util.TreeSet;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Emits sorted KeyValues.
- * Reads in all KeyValues from passed Iterator, sorts them, then emits
- * KeyValues in sorted order. If lots of columns per row, it will use lots of
- * memory sorting.
- * @see HFileOutputFormat2
- */
-@InterfaceAudience.Public
-public class KeyValueSortReducer extends Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue> {
- protected void reduce(ImmutableBytesWritable row, java.lang.Iterable<KeyValue> kvs,
- org.apache.hadoop.mapreduce.Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
- throws java.io.IOException, InterruptedException {
- TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
- for (KeyValue kv: kvs) {
- try {
- map.add(kv.clone());
- } catch (CloneNotSupportedException e) {
- throw new java.io.IOException(e);
- }
- }
- context.setStatus("Read " + map.getClass());
- int index = 0;
- for (KeyValue kv: map) {
- context.write(row, kv);
- if (++index % 100 == 0) context.setStatus("Wrote " + index);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
deleted file mode 100644
index d7c7cc0..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.List;
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Create 3 level tree directory, first level is using table name as parent
- * directory and then use family name as child directory, and all related HFiles
- * for one family are under child directory
- * -tableName1
- * -columnFamilyName1
- * -columnFamilyName2
- * -HFiles
- * -tableName2
- * -columnFamilyName1
- * -HFiles
- * -columnFamilyName2
- */
-@InterfaceAudience.Public
-@VisibleForTesting
-public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
- private static final Log LOG = LogFactory.getLog(MultiTableHFileOutputFormat.class);
-
- /**
- * Creates a composite key to use as a mapper output key when using
- * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
- *
- * @param tableName Name of the Table - Eg: TableName.getNameAsString()
- * @param suffix Usually represents a rowkey when creating a mapper key or column family
- * @return byte[] representation of composite key
- */
- public static byte[] createCompositeKey(byte[] tableName,
- byte[] suffix) {
- return combineTableNameSuffix(tableName, suffix);
- }
-
- /**
- * Alternate api which accepts an ImmutableBytesWritable for the suffix
- * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
- */
- public static byte[] createCompositeKey(byte[] tableName,
- ImmutableBytesWritable suffix) {
- return combineTableNameSuffix(tableName, suffix.get());
- }
-
- /**
- * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
- * suffix
- * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
- */
- public static byte[] createCompositeKey(String tableName,
- ImmutableBytesWritable suffix) {
- return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
- }
-
- /**
- * Analogous to
- * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
- * this function will configure the requisite number of reducers to write HFiles for multple
- * tables simultaneously
- *
- * @param job See {@link org.apache.hadoop.mapreduce.Job}
- * @param multiTableDescriptors Table descriptor and region locator pairs
- * @throws IOException
- */
- public static void configureIncrementalLoad(Job job, List<TableInfo>
- multiTableDescriptors)
- throws IOException {
- MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
- MultiTableHFileOutputFormat.class);
- }
-
- final private static int validateCompositeKey(byte[] keyBytes) {
-
- int separatorIdx = Bytes.indexOf(keyBytes, HFileOutputFormat2.tableSeparator);
-
- // Either the separator was not found or a tablename wasn't present or a key wasn't present
- if (separatorIdx == -1) {
- throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
- .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
- }
- return separatorIdx;
- }
-
- protected static byte[] getTableName(byte[] keyBytes) {
- int separatorIdx = validateCompositeKey(keyBytes);
- return Bytes.copy(keyBytes, 0, separatorIdx);
- }
-
- protected static byte[] getSuffix(byte[] keyBytes) {
- int separatorIdx = validateCompositeKey(keyBytes);
- return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
deleted file mode 100644
index a8e6837..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
-
-/**
- * Convert HBase tabular data from multiple scanners into a format that
- * is consumable by Map/Reduce.
- *
- * <p>
- * Usage example
- * </p>
- *
- * <pre>
- * List<Scan> scans = new ArrayList<Scan>();
- *
- * Scan scan1 = new Scan();
- * scan1.setStartRow(firstRow1);
- * scan1.setStopRow(lastRow1);
- * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
- * scans.add(scan1);
- *
- * Scan scan2 = new Scan();
- * scan2.setStartRow(firstRow2);
- * scan2.setStopRow(lastRow2);
- * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
- * scans.add(scan2);
- *
- * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
- * IntWritable.class, job);
- * </pre>
- */
-@InterfaceAudience.Public
-public class MultiTableInputFormat extends MultiTableInputFormatBase implements
- Configurable {
-
- /** Job parameter that specifies the scan list. */
- public static final String SCANS = "hbase.mapreduce.scans";
-
- /** The configuration. */
- private Configuration conf = null;
-
- /**
- * Returns the current configuration.
- *
- * @return The current configuration.
- * @see org.apache.hadoop.conf.Configurable#getConf()
- */
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * Sets the configuration. This is used to set the details for the tables to
- * be scanned.
- *
- * @param configuration The configuration to set.
- * @see org.apache.hadoop.conf.Configurable#setConf(
- * org.apache.hadoop.conf.Configuration)
- */
- @Override
- public void setConf(Configuration configuration) {
- this.conf = configuration;
- String[] rawScans = conf.getStrings(SCANS);
- if (rawScans.length <= 0) {
- throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
- + SCANS);
- }
- List<Scan> scans = new ArrayList<>();
-
- for (int i = 0; i < rawScans.length; i++) {
- try {
- scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
- } catch (IOException e) {
- throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
- }
- }
- this.setScans(scans);
- }
-}
[24/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
new file mode 100644
index 0000000..694a359
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestMultithreadedTableMapper {
+ private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
+ private static final HBaseTestingUtility UTIL =
+ new HBaseTestingUtility();
+ static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
+ static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+ static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
+ static final int NUMBER_OF_THREADS = 10;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // Up the handlers; this test needs more than usual.
+ UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
+ UTIL.startMiniCluster();
+ Table table =
+ UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
+ OUTPUT_FAMILY });
+ UTIL.loadTable(table, INPUT_FAMILY, false);
+ UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME);
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Pass the given key and processed record reduce
+ */
+ public static class ProcessContentsMapper
+ extends TableMapper<ImmutableBytesWritable, Put> {
+
+ /**
+ * Pass the key, and reversed value to reduce
+ *
+ * @param key
+ * @param value
+ * @param context
+ * @throws IOException
+ */
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ Context context)
+ throws IOException, InterruptedException {
+ if (value.size() != 1) {
+ throw new IOException("There should only be one input column");
+ }
+ Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+ cf = value.getMap();
+ if(!cf.containsKey(INPUT_FAMILY)) {
+ throw new IOException("Wrong input columns. Missing: '" +
+ Bytes.toString(INPUT_FAMILY) + "'.");
+ }
+ // Get the original value and reverse it
+ String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+ StringBuilder newValue = new StringBuilder(originalValue);
+ newValue.reverse();
+ // Now set the value to be collected
+ Put outval = new Put(key.get());
+ outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+ context.write(key, outval);
+ }
+ }
+
+ /**
+ * Test multithreadedTableMappper map/reduce against a multi-region table
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testMultithreadedTableMapper()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+ }
+
+ private void runTestOnTable(Table table)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ Job job = null;
+ try {
+ LOG.info("Before map/reduce startup");
+ job = new Job(table.getConfiguration(), "process column contents");
+ job.setNumReduceTasks(1);
+ Scan scan = new Scan();
+ scan.addFamily(INPUT_FAMILY);
+ TableMapReduceUtil.initTableMapperJob(
+ table.getName(), scan,
+ MultithreadedTableMapper.class, ImmutableBytesWritable.class,
+ Put.class, job);
+ MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
+ MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
+ TableMapReduceUtil.initTableReducerJob(
+ table.getName().getNameAsString(),
+ IdentityTableReducer.class, job);
+ FileOutputFormat.setOutputPath(job, new Path("test"));
+ LOG.info("Started " + table.getName());
+ assertTrue(job.waitForCompletion(true));
+ LOG.info("After map/reduce completion");
+ // verify map-reduce results
+ verify(table.getName());
+ } finally {
+ table.close();
+ if (job != null) {
+ FileUtil.fullyDelete(
+ new File(job.getConfiguration().get("hadoop.tmp.dir")));
+ }
+ }
+ }
+
+ private void verify(TableName tableName) throws IOException {
+ Table table = UTIL.getConnection().getTable(tableName);
+ boolean verified = false;
+ long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ LOG.info("Verification attempt #" + i);
+ verifyAttempt(table);
+ verified = true;
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ LOG.debug("Verification attempt failed: " + e.getMessage());
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ assertTrue(verified);
+ table.close();
+ }
+
+ /**
+ * Looks at every value of the mapreduce output and verifies that indeed
+ * the values have been reversed.
+ *
+ * @param table Table to scan.
+ * @throws IOException
+ * @throws NullPointerException if we failed to find a cell value
+ */
+ private void verifyAttempt(final Table table)
+ throws IOException, NullPointerException {
+ Scan scan = new Scan();
+ scan.addFamily(INPUT_FAMILY);
+ scan.addFamily(OUTPUT_FAMILY);
+ ResultScanner scanner = table.getScanner(scan);
+ try {
+ Iterator<Result> itr = scanner.iterator();
+ assertTrue(itr.hasNext());
+ while(itr.hasNext()) {
+ Result r = itr.next();
+ if (LOG.isDebugEnabled()) {
+ if (r.size() > 2 ) {
+ throw new IOException("Too many results, expected 2 got " +
+ r.size());
+ }
+ }
+ byte[] firstValue = null;
+ byte[] secondValue = null;
+ int count = 0;
+ for(Cell kv : r.listCells()) {
+ if (count == 0) {
+ firstValue = CellUtil.cloneValue(kv);
+ }else if (count == 1) {
+ secondValue = CellUtil.cloneValue(kv);
+ }else if (count == 2) {
+ break;
+ }
+ count++;
+ }
+ String first = "";
+ if (firstValue == null) {
+ throw new NullPointerException(Bytes.toString(r.getRow()) +
+ ": first value is null");
+ }
+ first = Bytes.toString(firstValue);
+ String second = "";
+ if (secondValue == null) {
+ throw new NullPointerException(Bytes.toString(r.getRow()) +
+ ": second value is null");
+ }
+ byte[] secondReversed = new byte[secondValue.length];
+ for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
+ secondReversed[i] = secondValue[j];
+ }
+ second = Bytes.toString(secondReversed);
+ if (first.compareTo(second) != 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("second key is not the reverse of first. row=" +
+ Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
+ ", second value=" + second);
+ }
+ fail();
+ }
+ }
+ } finally {
+ scanner.close();
+ }
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
new file mode 100644
index 0000000..301cfef
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRegionSizeCalculator.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static org.apache.hadoop.hbase.HConstants.DEFAULT_REGIONSERVER_PORT;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.when;
+
+@Category({MiscTests.class, SmallTests.class})
+public class TestRegionSizeCalculator {
+
+ private Configuration configuration = new Configuration();
+ private final long megabyte = 1024L * 1024L;
+ private final ServerName sn = ServerName.valueOf("local-rs", DEFAULT_REGIONSERVER_PORT,
+ ServerName.NON_STARTCODE);
+
+ @Test
+ public void testSimpleTestCase() throws Exception {
+
+ RegionLocator regionLocator = mockRegionLocator("region1", "region2", "region3");
+
+ Admin admin = mockAdmin(
+ mockRegion("region1", 123),
+ mockRegion("region3", 1232),
+ mockRegion("region2", 54321)
+ );
+
+ RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
+
+ assertEquals(123 * megabyte, calculator.getRegionSize("region1".getBytes()));
+ assertEquals(54321 * megabyte, calculator.getRegionSize("region2".getBytes()));
+ assertEquals(1232 * megabyte, calculator.getRegionSize("region3".getBytes()));
+ // if regionCalculator does not know about a region, it should return 0
+ assertEquals(0 * megabyte, calculator.getRegionSize("otherTableRegion".getBytes()));
+
+ assertEquals(3, calculator.getRegionSizeMap().size());
+ }
+
+
+ /**
+ * When size of region in megabytes is larger than largest possible integer there could be
+ * error caused by lost of precision.
+ * */
+ @Test
+ public void testLargeRegion() throws Exception {
+
+ RegionLocator regionLocator = mockRegionLocator("largeRegion");
+
+ Admin admin = mockAdmin(
+ mockRegion("largeRegion", Integer.MAX_VALUE)
+ );
+
+ RegionSizeCalculator calculator = new RegionSizeCalculator(regionLocator, admin);
+
+ assertEquals(((long) Integer.MAX_VALUE) * megabyte, calculator.getRegionSize("largeRegion".getBytes()));
+ }
+
+ /** When calculator is disabled, it should return 0 for each request.*/
+ @Test
+ public void testDisabled() throws Exception {
+ String regionName = "cz.goout:/index.html";
+ RegionLocator table = mockRegionLocator(regionName);
+
+ Admin admin = mockAdmin(
+ mockRegion(regionName, 999)
+ );
+
+ //first request on enabled calculator
+ RegionSizeCalculator calculator = new RegionSizeCalculator(table, admin);
+ assertEquals(999 * megabyte, calculator.getRegionSize(regionName.getBytes()));
+
+ //then disabled calculator.
+ configuration.setBoolean(RegionSizeCalculator.ENABLE_REGIONSIZECALCULATOR, false);
+ RegionSizeCalculator disabledCalculator = new RegionSizeCalculator(table, admin);
+ assertEquals(0 * megabyte, disabledCalculator.getRegionSize(regionName.getBytes()));
+
+ assertEquals(0, disabledCalculator.getRegionSizeMap().size());
+ }
+
+ /**
+ * Makes some table with given region names.
+ * */
+ private RegionLocator mockRegionLocator(String... regionNames) throws IOException {
+ RegionLocator mockedTable = Mockito.mock(RegionLocator.class);
+ when(mockedTable.getName()).thenReturn(TableName.valueOf("sizeTestTable"));
+ List<HRegionLocation> regionLocations = new ArrayList<>(regionNames.length);
+ when(mockedTable.getAllRegionLocations()).thenReturn(regionLocations);
+
+ for (String regionName : regionNames) {
+ HRegionInfo info = Mockito.mock(HRegionInfo.class);
+ when(info.getRegionName()).thenReturn(regionName.getBytes());
+ regionLocations.add(new HRegionLocation(info, sn));
+ }
+
+ return mockedTable;
+ }
+
+ /**
+ * Creates mock returning RegionLoad info about given servers.
+ */
+ private Admin mockAdmin(RegionLoad... regionLoadArray) throws Exception {
+ Admin mockAdmin = Mockito.mock(Admin.class);
+ Map<byte[], RegionLoad> regionLoads = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (RegionLoad regionLoad : regionLoadArray) {
+ regionLoads.put(regionLoad.getName(), regionLoad);
+ }
+ when(mockAdmin.getConfiguration()).thenReturn(configuration);
+ when(mockAdmin.getRegionLoad(sn, TableName.valueOf("sizeTestTable"))).thenReturn(regionLoads);
+ return mockAdmin;
+ }
+
+ /**
+ * Creates mock of region with given name and size.
+ *
+ * @param fileSizeMb number of megabytes occupied by region in file store in megabytes
+ * */
+ private RegionLoad mockRegion(String regionName, int fileSizeMb) {
+ RegionLoad region = Mockito.mock(RegionLoad.class);
+ when(region.getName()).thenReturn(regionName.getBytes());
+ when(region.getNameAsString()).thenReturn(regionName);
+ when(region.getStorefileSizeMB()).thenReturn(fileSizeMb);
+ return region;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
new file mode 100644
index 0000000..3b84e2d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -0,0 +1,400 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+
+/**
+ * Test the rowcounter map reduce job.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestRowCounter {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private static final Log LOG = LogFactory.getLog(TestRowCounter.class);
+ private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private final static String TABLE_NAME = "testRowCounter";
+ private final static String TABLE_NAME_TS_RANGE = "testRowCounter_ts_range";
+ private final static String COL_FAM = "col_fam";
+ private final static String COL1 = "c1";
+ private final static String COL2 = "c2";
+ private final static String COMPOSITE_COLUMN = "C:A:A";
+ private final static int TOTAL_ROWS = 10;
+ private final static int ROWS_WITH_ONE_COL = 2;
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster();
+ Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes(COL_FAM));
+ writeRows(table, TOTAL_ROWS, ROWS_WITH_ONE_COL);
+ table.close();
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test a case when no column was specified in command line arguments.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterNoColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME
+ };
+ runRowCount(args, 10);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * exclusive for few rows.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterExclusiveColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COL1
+ };
+ runRowCount(args, 8);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * one for which the qualifier contains colons.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterColumnWithColonInQualifier() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
+ };
+ runRowCount(args, 8);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is not part
+ * of first KV for a row.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterHiddenColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COL2
+ };
+ runRowCount(args, 10);
+ }
+
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * exclusive for few rows and also a row range filter is specified
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterColumnAndRowRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
+ };
+ runRowCount(args, 8);
+ }
+
+ /**
+ * Test a case when a range is specified with single range of start-end keys
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterRowSingleRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3"
+ };
+ runRowCount(args, 2);
+ }
+
+ /**
+ * Test a case when a range is specified with single range with end key only
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterRowSingleRangeUpperBound() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=,\\x00row3"
+ };
+ runRowCount(args, 3);
+ }
+
+ /**
+ * Test a case when a range is specified with two ranges where one range is with end key only
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterRowMultiRangeUpperBound() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7"
+ };
+ runRowCount(args, 5);
+ }
+
+ /**
+ * Test a case when a range is specified with multiple ranges of start-end keys
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterRowMultiRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
+ };
+ runRowCount(args, 5);
+ }
+
+ /**
+ * Test a case when a range is specified with multiple ranges of start-end keys;
+ * one range is filled, another two are not
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterRowMultiEmptyRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
+ };
+ runRowCount(args, 2);
+ }
+
+ @Test
+ public void testRowCounter10kRowRange() throws Exception {
+ String tableName = TABLE_NAME + "10k";
+
+ try (Table table = TEST_UTIL.createTable(
+ TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
+ writeRows(table, 10000, 0);
+ }
+ String[] args = new String[] {
+ tableName, "--range=\\x00row9872,\\x00row9875"
+ };
+ runRowCount(args, 3);
+ }
+
+ /**
+ * Test a case when the timerange is specified with --starttime and --endtime options
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRowCounterTimeRange() throws Exception {
+ final byte[] family = Bytes.toBytes(COL_FAM);
+ final byte[] col1 = Bytes.toBytes(COL1);
+ Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
+ Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
+ Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
+
+ long ts;
+
+ // clean up content of TABLE_NAME
+ Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_TS_RANGE), Bytes.toBytes(COL_FAM));
+
+ ts = System.currentTimeMillis();
+ put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
+ table.put(put1);
+ Thread.sleep(100);
+
+ ts = System.currentTimeMillis();
+ put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
+ put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
+ table.put(put2);
+ table.put(put3);
+ table.close();
+
+ String[] args = new String[] {
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + ts
+ };
+ runRowCount(args, 1);
+
+ args = new String[] {
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + (ts - 10)
+ };
+ runRowCount(args, 1);
+
+ args = new String[] {
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + ts,
+ "--endtime=" + (ts + 1000)
+ };
+ runRowCount(args, 2);
+
+ args = new String[] {
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + (ts - 30 * 1000),
+ "--endtime=" + (ts + 30 * 1000),
+ };
+ runRowCount(args, 3);
+ }
+
+ /**
+ * Run the RowCounter map reduce job and verify the row count.
+ *
+ * @param args the command line arguments to be used for rowcounter job.
+ * @param expectedCount the expected row count (result of map reduce job).
+ * @throws Exception
+ */
+ private void runRowCount(String[] args, int expectedCount) throws Exception {
+ Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
+ long start = System.currentTimeMillis();
+ job.waitForCompletion(true);
+ long duration = System.currentTimeMillis() - start;
+ LOG.debug("row count duration (ms): " + duration);
+ assertTrue(job.isSuccessful());
+ Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
+ assertEquals(expectedCount, counter.getValue());
+ }
+
+ /**
+ * Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have
+ * two columns, Few have one.
+ *
+ * @param table
+ * @throws IOException
+ */
+ private static void writeRows(Table table, int totalRows, int rowsWithOneCol) throws IOException {
+ final byte[] family = Bytes.toBytes(COL_FAM);
+ final byte[] value = Bytes.toBytes("abcd");
+ final byte[] col1 = Bytes.toBytes(COL1);
+ final byte[] col2 = Bytes.toBytes(COL2);
+ final byte[] col3 = Bytes.toBytes(COMPOSITE_COLUMN);
+ ArrayList<Put> rowsUpdate = new ArrayList<>();
+ // write few rows with two columns
+ int i = 0;
+ for (; i < totalRows - rowsWithOneCol; i++) {
+ // Use binary rows values to test for HBASE-15287.
+ byte[] row = Bytes.toBytesBinary("\\x00row" + i);
+ Put put = new Put(row);
+ put.addColumn(family, col1, value);
+ put.addColumn(family, col2, value);
+ put.addColumn(family, col3, value);
+ rowsUpdate.add(put);
+ }
+
+ // write few rows with only one column
+ for (; i < totalRows; i++) {
+ byte[] row = Bytes.toBytes("row" + i);
+ Put put = new Put(row);
+ put.addColumn(family, col2, value);
+ rowsUpdate.add(put);
+ }
+ table.put(rowsUpdate);
+ }
+
+ /**
+ * test main method. Import should print help and call System.exit
+ */
+ @Test
+ public void testImportMain() throws Exception {
+ PrintStream oldPrintStream = System.err;
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ String[] args = {};
+ System.setErr(new PrintStream(data));
+ try {
+ System.setErr(new PrintStream(data));
+
+ try {
+ RowCounter.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ assertTrue(data.toString().contains("Wrong number of parameters:"));
+ assertTrue(data.toString().contains(
+ "Usage: RowCounter [options] <tablename> " +
+ "[--starttime=[start] --endtime=[end] " +
+ "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
+ "[<column1> <column2>...]"));
+ assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
+ assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
+ }
+ data.reset();
+ try {
+ args = new String[2];
+ args[0] = "table";
+ args[1] = "--range=1";
+ RowCounter.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ assertTrue(data.toString().contains(
+ "Please specify range in such format as \"--range=a,b\" or, with only one boundary," +
+ " \"--range=,b\" or \"--range=a,\""));
+ assertTrue(data.toString().contains(
+ "Usage: RowCounter [options] <tablename> " +
+ "[--starttime=[start] --endtime=[end] " +
+ "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
+ "[<column1> <column2>...]"));
+ }
+
+ } finally {
+ System.setErr(oldPrintStream);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
new file mode 100644
index 0000000..78fddbc
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Reruns TestLoadIncrementalHFiles using LoadIncrementalHFiles in secure mode.
+ * This suite is unable to verify the security handoff/turnover
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestSecureLoadIncrementalHFiles extends TestLoadIncrementalHFiles{
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // set the always on security provider
+ UserProvider.setUserProviderForTesting(util.getConfiguration(),
+ HadoopSecurityEnabledUserProviderForTesting.class);
+ // setup configuration
+ SecureTestUtil.enableSecurity(util.getConfiguration());
+ util.getConfiguration().setInt(
+ LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
+ MAX_FILES_PER_REGION_PER_FAMILY);
+ // change default behavior so that tag values are returned with normal rpcs
+ util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
+ KeyValueCodecWithTags.class.getCanonicalName());
+
+ util.startMiniCluster();
+
+ // Wait for the ACL table to become available
+ util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+
+ setupNamespace();
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
new file mode 100644
index 0000000..0e877ad
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.security.access.AccessControlLists;
+import org.apache.hadoop.hbase.security.access.SecureTestUtil;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+
+/**
+ * Reruns TestSecureLoadIncrementalHFilesSplitRecovery
+ * using LoadIncrementalHFiles in secure mode.
+ * This suite is unable to verify the security handoff/turnove
+ * as miniCluster is running as system user thus has root privileges
+ * and delegation tokens don't seem to work on miniDFS.
+ *
+ * Thus SecureBulkload can only be completely verified by running
+ * integration tests against a secure cluster. This suite is still
+ * invaluable as it verifies the other mechanisms that need to be
+ * supported as part of a LoadIncrementalFiles call.
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestSecureLoadIncrementalHFilesSplitRecovery extends TestLoadIncrementalHFilesSplitRecovery {
+
+ //This "overrides" the parent static method
+ //make sure they are in sync
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ util = new HBaseTestingUtility();
+ // set the always on security provider
+ UserProvider.setUserProviderForTesting(util.getConfiguration(),
+ HadoopSecurityEnabledUserProviderForTesting.class);
+ // setup configuration
+ SecureTestUtil.enableSecurity(util.getConfiguration());
+
+ util.startMiniCluster();
+
+ // Wait for the ACL table to become available
+ util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
+ }
+
+ //Disabling this test as it does not work in secure mode
+ @Test (timeout=180000)
+ @Override
+ public void testBulkLoadPhaseFailure() {
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
new file mode 100644
index 0000000..5629cb4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
@@ -0,0 +1,81 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.experimental.categories.Category;
+
+import org.junit.Test;
+
+/**
+ * Test of simple partitioner.
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestSimpleTotalOrderPartitioner {
+ protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ Configuration conf = TEST_UTIL.getConfiguration();
+
+ @Test
+ public void testSplit() throws Exception {
+ String start = "a";
+ String end = "{";
+ SimpleTotalOrderPartitioner<byte []> p = new SimpleTotalOrderPartitioner<>();
+
+ this.conf.set(SimpleTotalOrderPartitioner.START, start);
+ this.conf.set(SimpleTotalOrderPartitioner.END, end);
+ p.setConf(this.conf);
+ ImmutableBytesWritable c = new ImmutableBytesWritable(Bytes.toBytes("c"));
+ // If one reduce, partition should be 0.
+ int partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 1);
+ assertEquals(0, partition);
+ // If two reduces, partition should be 0.
+ partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 2);
+ assertEquals(0, partition);
+ // Divide in 3.
+ partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 3);
+ assertEquals(0, partition);
+ ImmutableBytesWritable q = new ImmutableBytesWritable(Bytes.toBytes("q"));
+ partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 2);
+ assertEquals(1, partition);
+ partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 3);
+ assertEquals(2, partition);
+ // What about end and start keys.
+ ImmutableBytesWritable startBytes =
+ new ImmutableBytesWritable(Bytes.toBytes(start));
+ partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
+ assertEquals(0, partition);
+ partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
+ assertEquals(0, partition);
+ ImmutableBytesWritable endBytes =
+ new ImmutableBytesWritable(Bytes.toBytes("z"));
+ partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
+ assertEquals(1, partition);
+ partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
+ assertEquals(2, partition);
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
new file mode 100644
index 0000000..9a0c160
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
@@ -0,0 +1,339 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+
+/**
+ * Basic test for the SyncTable M/R tool
+ */
+@Category(LargeTests.class)
+public class TestSyncTable {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
+
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster(3);
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ private static byte[][] generateSplits(int numRows, int numRegions) {
+ byte[][] splitRows = new byte[numRegions-1][];
+ for (int i = 1; i < numRegions; i++) {
+ splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
+ }
+ return splitRows;
+ }
+
+ @Test
+ public void testSyncTable() throws Exception {
+ final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source");
+ final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target");
+ Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
+
+ writeTestData(sourceTableName, targetTableName);
+ hashSourceTable(sourceTableName, testDir);
+ Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
+ assertEqualTables(90, sourceTableName, targetTableName);
+
+ assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
+ assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
+ assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
+ assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
+ assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
+ assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
+
+ TEST_UTIL.deleteTable(sourceTableName);
+ TEST_UTIL.deleteTable(targetTableName);
+ TEST_UTIL.cleanupDataTestDirOnTestFS();
+ }
+
+ private void assertEqualTables(int expectedRows, TableName sourceTableName,
+ TableName targetTableName) throws Exception {
+ Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName);
+ Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName);
+
+ ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
+ ResultScanner targetScanner = targetTable.getScanner(new Scan());
+
+ for (int i = 0; i < expectedRows; i++) {
+ Result sourceRow = sourceScanner.next();
+ Result targetRow = targetScanner.next();
+
+ LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
+ + " cells:" + sourceRow);
+ LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
+ + " cells:" + targetRow);
+
+ if (sourceRow == null) {
+ Assert.fail("Expected " + expectedRows
+ + " source rows but only found " + i);
+ }
+ if (targetRow == null) {
+ Assert.fail("Expected " + expectedRows
+ + " target rows but only found " + i);
+ }
+ Cell[] sourceCells = sourceRow.rawCells();
+ Cell[] targetCells = targetRow.rawCells();
+ if (sourceCells.length != targetCells.length) {
+ LOG.debug("Source cells: " + Arrays.toString(sourceCells));
+ LOG.debug("Target cells: " + Arrays.toString(targetCells));
+ Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
+ + " has " + sourceCells.length
+ + " cells in source table but " + targetCells.length
+ + " cells in target table");
+ }
+ for (int j = 0; j < sourceCells.length; j++) {
+ Cell sourceCell = sourceCells[j];
+ Cell targetCell = targetCells[j];
+ try {
+ if (!CellUtil.matchingRow(sourceCell, targetCell)) {
+ Assert.fail("Rows don't match");
+ }
+ if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
+ Assert.fail("Families don't match");
+ }
+ if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
+ Assert.fail("Qualifiers don't match");
+ }
+ if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
+ Assert.fail("Timestamps don't match");
+ }
+ if (!CellUtil.matchingValue(sourceCell, targetCell)) {
+ Assert.fail("Values don't match");
+ }
+ } catch (Throwable t) {
+ LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
+ Throwables.propagate(t);
+ }
+ }
+ }
+ Result sourceRow = sourceScanner.next();
+ if (sourceRow != null) {
+ Assert.fail("Source table has more than " + expectedRows
+ + " rows. Next row: " + Bytes.toInt(sourceRow.getRow()));
+ }
+ Result targetRow = targetScanner.next();
+ if (targetRow != null) {
+ Assert.fail("Target table has more than " + expectedRows
+ + " rows. Next row: " + Bytes.toInt(targetRow.getRow()));
+ }
+ sourceScanner.close();
+ targetScanner.close();
+ sourceTable.close();
+ targetTable.close();
+ }
+
+ private Counters syncTables(TableName sourceTableName, TableName targetTableName,
+ Path testDir) throws Exception {
+ SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
+ int code = syncTable.run(new String[] {
+ testDir.toString(),
+ sourceTableName.getNameAsString(),
+ targetTableName.getNameAsString()
+ });
+ assertEquals("sync table job failed", 0, code);
+
+ LOG.info("Sync tables completed");
+ return syncTable.counters;
+ }
+
+ private void hashSourceTable(TableName sourceTableName, Path testDir)
+ throws Exception, IOException {
+ int numHashFiles = 3;
+ long batchSize = 100; // should be 2 batches per region
+ int scanBatch = 1;
+ HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
+ int code = hashTable.run(new String[] {
+ "--batchsize=" + batchSize,
+ "--numhashfiles=" + numHashFiles,
+ "--scanbatch=" + scanBatch,
+ sourceTableName.getNameAsString(),
+ testDir.toString()});
+ assertEquals("hash table job failed", 0, code);
+
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+
+ HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
+ assertEquals(sourceTableName.getNameAsString(), tableHash.tableName);
+ assertEquals(batchSize, tableHash.batchSize);
+ assertEquals(numHashFiles, tableHash.numHashFiles);
+ assertEquals(numHashFiles - 1, tableHash.partitions.size());
+
+ LOG.info("Hash table completed");
+ }
+
+ private void writeTestData(TableName sourceTableName, TableName targetTableName)
+ throws Exception {
+ final byte[] family = Bytes.toBytes("family");
+ final byte[] column1 = Bytes.toBytes("c1");
+ final byte[] column2 = Bytes.toBytes("c2");
+ final byte[] value1 = Bytes.toBytes("val1");
+ final byte[] value2 = Bytes.toBytes("val2");
+ final byte[] value3 = Bytes.toBytes("val3");
+
+ int numRows = 100;
+ int sourceRegions = 10;
+ int targetRegions = 6;
+
+ Table sourceTable = TEST_UTIL.createTable(sourceTableName,
+ family, generateSplits(numRows, sourceRegions));
+
+ Table targetTable = TEST_UTIL.createTable(targetTableName,
+ family, generateSplits(numRows, targetRegions));
+
+ long timestamp = 1430764183454L;
+
+ int rowIndex = 0;
+ // a bunch of identical rows
+ for (; rowIndex < 40; rowIndex++) {
+ Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+ sourcePut.addColumn(family, column1, timestamp, value1);
+ sourcePut.addColumn(family, column2, timestamp, value2);
+ sourceTable.put(sourcePut);
+
+ Put targetPut = new Put(Bytes.toBytes(rowIndex));
+ targetPut.addColumn(family, column1, timestamp, value1);
+ targetPut.addColumn(family, column2, timestamp, value2);
+ targetTable.put(targetPut);
+ }
+ // some rows only in the source table
+ // ROWSWITHDIFFS: 10
+ // TARGETMISSINGROWS: 10
+ // TARGETMISSINGCELLS: 20
+ for (; rowIndex < 50; rowIndex++) {
+ Put put = new Put(Bytes.toBytes(rowIndex));
+ put.addColumn(family, column1, timestamp, value1);
+ put.addColumn(family, column2, timestamp, value2);
+ sourceTable.put(put);
+ }
+ // some rows only in the target table
+ // ROWSWITHDIFFS: 10
+ // SOURCEMISSINGROWS: 10
+ // SOURCEMISSINGCELLS: 20
+ for (; rowIndex < 60; rowIndex++) {
+ Put put = new Put(Bytes.toBytes(rowIndex));
+ put.addColumn(family, column1, timestamp, value1);
+ put.addColumn(family, column2, timestamp, value2);
+ targetTable.put(put);
+ }
+ // some rows with 1 missing cell in target table
+ // ROWSWITHDIFFS: 10
+ // TARGETMISSINGCELLS: 10
+ for (; rowIndex < 70; rowIndex++) {
+ Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+ sourcePut.addColumn(family, column1, timestamp, value1);
+ sourcePut.addColumn(family, column2, timestamp, value2);
+ sourceTable.put(sourcePut);
+
+ Put targetPut = new Put(Bytes.toBytes(rowIndex));
+ targetPut.addColumn(family, column1, timestamp, value1);
+ targetTable.put(targetPut);
+ }
+ // some rows with 1 missing cell in source table
+ // ROWSWITHDIFFS: 10
+ // SOURCEMISSINGCELLS: 10
+ for (; rowIndex < 80; rowIndex++) {
+ Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+ sourcePut.addColumn(family, column1, timestamp, value1);
+ sourceTable.put(sourcePut);
+
+ Put targetPut = new Put(Bytes.toBytes(rowIndex));
+ targetPut.addColumn(family, column1, timestamp, value1);
+ targetPut.addColumn(family, column2, timestamp, value2);
+ targetTable.put(targetPut);
+ }
+ // some rows differing only in timestamp
+ // ROWSWITHDIFFS: 10
+ // SOURCEMISSINGCELLS: 20
+ // TARGETMISSINGCELLS: 20
+ for (; rowIndex < 90; rowIndex++) {
+ Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+ sourcePut.addColumn(family, column1, timestamp, column1);
+ sourcePut.addColumn(family, column2, timestamp, value2);
+ sourceTable.put(sourcePut);
+
+ Put targetPut = new Put(Bytes.toBytes(rowIndex));
+ targetPut.addColumn(family, column1, timestamp+1, column1);
+ targetPut.addColumn(family, column2, timestamp-1, value2);
+ targetTable.put(targetPut);
+ }
+ // some rows with different values
+ // ROWSWITHDIFFS: 10
+ // DIFFERENTCELLVALUES: 20
+ for (; rowIndex < numRows; rowIndex++) {
+ Put sourcePut = new Put(Bytes.toBytes(rowIndex));
+ sourcePut.addColumn(family, column1, timestamp, value1);
+ sourcePut.addColumn(family, column2, timestamp, value2);
+ sourceTable.put(sourcePut);
+
+ Put targetPut = new Put(Bytes.toBytes(rowIndex));
+ targetPut.addColumn(family, column1, timestamp, value3);
+ targetPut.addColumn(family, column2, timestamp, value3);
+ targetTable.put(targetPut);
+ }
+
+ sourceTable.close();
+ targetTable.close();
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
new file mode 100644
index 0000000..b4c6ab9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
@@ -0,0 +1,481 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * This tests the TableInputFormat and its recovery semantics
+ *
+ */
+@Category(LargeTests.class)
+public class TestTableInputFormat {
+
+ private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
+
+ private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ private static MiniMRCluster mrCluster;
+ static final byte[] FAMILY = Bytes.toBytes("family");
+
+ private static final byte[][] columns = new byte[][] { FAMILY };
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws IOException {
+ LOG.info("before");
+ UTIL.ensureSomeRegionServersAvailable(1);
+ LOG.info("before done");
+ }
+
+ /**
+ * Setup a table with two rows and values.
+ *
+ * @param tableName
+ * @return
+ * @throws IOException
+ */
+ public static Table createTable(byte[] tableName) throws IOException {
+ return createTable(tableName, new byte[][] { FAMILY });
+ }
+
+ /**
+ * Setup a table with two rows and values per column family.
+ *
+ * @param tableName
+ * @return
+ * @throws IOException
+ */
+ public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
+ Table table = UTIL.createTable(TableName.valueOf(tableName), families);
+ Put p = new Put("aaa".getBytes());
+ for (byte[] family : families) {
+ p.addColumn(family, null, "value aaa".getBytes());
+ }
+ table.put(p);
+ p = new Put("bbb".getBytes());
+ for (byte[] family : families) {
+ p.addColumn(family, null, "value bbb".getBytes());
+ }
+ table.put(p);
+ return table;
+ }
+
+ /**
+ * Verify that the result and key have expected values.
+ *
+ * @param r
+ * @param key
+ * @param expectedKey
+ * @param expectedValue
+ * @return
+ */
+ static boolean checkResult(Result r, ImmutableBytesWritable key,
+ byte[] expectedKey, byte[] expectedValue) {
+ assertEquals(0, key.compareTo(expectedKey));
+ Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
+ byte[] value = vals.values().iterator().next();
+ assertTrue(Arrays.equals(value, expectedValue));
+ return true; // if succeed
+ }
+
+ /**
+ * Create table data and run tests on specified htable using the
+ * o.a.h.hbase.mapreduce API.
+ *
+ * @param table
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ static void runTestMapreduce(Table table) throws IOException,
+ InterruptedException {
+ org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
+ new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
+ Scan s = new Scan();
+ s.setStartRow("aaa".getBytes());
+ s.setStopRow("zzz".getBytes());
+ s.addFamily(FAMILY);
+ trr.setScan(s);
+ trr.setHTable(table);
+
+ trr.initialize(null, null);
+ Result r = new Result();
+ ImmutableBytesWritable key = new ImmutableBytesWritable();
+
+ boolean more = trr.nextKeyValue();
+ assertTrue(more);
+ key = trr.getCurrentKey();
+ r = trr.getCurrentValue();
+ checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
+
+ more = trr.nextKeyValue();
+ assertTrue(more);
+ key = trr.getCurrentKey();
+ r = trr.getCurrentValue();
+ checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
+
+ // no more data
+ more = trr.nextKeyValue();
+ assertFalse(more);
+ }
+
+ /**
+ * Create a table that IOE's on first scanner next call
+ *
+ * @throws IOException
+ */
+ static Table createIOEScannerTable(byte[] name, final int failCnt)
+ throws IOException {
+ // build up a mock scanner stuff to fail the first time
+ Answer<ResultScanner> a = new Answer<ResultScanner>() {
+ int cnt = 0;
+
+ @Override
+ public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+ // first invocation return the busted mock scanner
+ if (cnt++ < failCnt) {
+ // create mock ResultScanner that always fails.
+ Scan scan = mock(Scan.class);
+ doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+ ResultScanner scanner = mock(ResultScanner.class);
+ // simulate TimeoutException / IOException
+ doThrow(new IOException("Injected exception")).when(scanner).next();
+ return scanner;
+ }
+
+ // otherwise return the real scanner.
+ return (ResultScanner) invocation.callRealMethod();
+ }
+ };
+
+ Table htable = spy(createTable(name));
+ doAnswer(a).when(htable).getScanner((Scan) anyObject());
+ return htable;
+ }
+
+ /**
+ * Create a table that throws a NotServingRegionException on first scanner
+ * next call
+ *
+ * @throws IOException
+ */
+ static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
+ throws IOException {
+ // build up a mock scanner stuff to fail the first time
+ Answer<ResultScanner> a = new Answer<ResultScanner>() {
+ int cnt = 0;
+
+ @Override
+ public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
+ // first invocation return the busted mock scanner
+ if (cnt++ < failCnt) {
+ // create mock ResultScanner that always fails.
+ Scan scan = mock(Scan.class);
+ doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
+ ResultScanner scanner = mock(ResultScanner.class);
+
+ invocation.callRealMethod(); // simulate NotServingRegionException
+ doThrow(
+ new NotServingRegionException("Injected simulated TimeoutException"))
+ .when(scanner).next();
+ return scanner;
+ }
+
+ // otherwise return the real scanner.
+ return (ResultScanner) invocation.callRealMethod();
+ }
+ };
+
+ Table htable = spy(createTable(name));
+ doAnswer(a).when(htable).getScanner((Scan) anyObject());
+ return htable;
+ }
+
+ /**
+ * Run test assuming no errors using newer mapreduce api
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testTableRecordReaderMapreduce() throws IOException,
+ InterruptedException {
+ Table table = createTable("table1-mr".getBytes());
+ runTestMapreduce(table);
+ }
+
+ /**
+ * Run test assuming Scanner IOException failure using newer mapreduce api
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testTableRecordReaderScannerFailMapreduce() throws IOException,
+ InterruptedException {
+ Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
+ runTestMapreduce(htable);
+ }
+
+ /**
+ * Run test assuming Scanner IOException failure using newer mapreduce api
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test(expected = IOException.class)
+ public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
+ InterruptedException {
+ Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
+ runTestMapreduce(htable);
+ }
+
+ /**
+ * Run test assuming NotServingRegionException using newer mapreduce api
+ *
+ * @throws InterruptedException
+ * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+ */
+ @Test
+ public void testTableRecordReaderScannerTimeoutMapreduce()
+ throws IOException, InterruptedException {
+ Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
+ runTestMapreduce(htable);
+ }
+
+ /**
+ * Run test assuming NotServingRegionException using newer mapreduce api
+ *
+ * @throws InterruptedException
+ * @throws org.apache.hadoop.hbase.NotServingRegionException
+ */
+ @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
+ public void testTableRecordReaderScannerTimeoutMapreduceTwice()
+ throws IOException, InterruptedException {
+ Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
+ runTestMapreduce(htable);
+ }
+
+ /**
+ * Verify the example we present in javadocs on TableInputFormatBase
+ */
+ @Test
+ public void testExtensionOfTableInputFormatBase()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase");
+ final Table htable = createTable(Bytes.toBytes("exampleTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleTIF.class);
+ }
+
+ @Test
+ public void testJobConfigurableExtensionOfTableInputFormatBase()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
+ "using JobConfigurable.");
+ final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleJobConfigurableTIF.class);
+ }
+
+ @Test
+ public void testDeprecatedExtensionOfTableInputFormatBase()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
+ "using the approach documented in 0.98.");
+ final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
+ new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
+ testInputFormat(ExampleDeprecatedTIF.class);
+ }
+
+ void testInputFormat(Class<? extends InputFormat> clazz)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
+ job.setInputFormatClass(clazz);
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.setMapperClass(ExampleVerifier.class);
+ job.setNumReduceTasks(0);
+
+ LOG.debug("submitting job.");
+ assertTrue("job failed!", job.waitForCompletion(true));
+ assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
+ assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
+ assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
+ assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
+ assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
+ assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
+ .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
+ }
+
+ public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException {
+ for (Cell cell : value.listCells()) {
+ context.getCounter(TestTableInputFormat.class.getName() + ":row",
+ Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
+ .increment(1l);
+ context.getCounter(TestTableInputFormat.class.getName() + ":family",
+ Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
+ .increment(1l);
+ context.getCounter(TestTableInputFormat.class.getName() + ":value",
+ Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
+ .increment(1l);
+ }
+ }
+
+ }
+
+ public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
+
+ @Override
+ public void configure(JobConf job) {
+ try {
+ Connection connection = ConnectionFactory.createConnection(job);
+ Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
+ // mandatory
+ initializeTable(connection, exampleTable.getName());
+ byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ Bytes.toBytes("columnB") };
+ // optional
+ Scan scan = new Scan();
+ for (byte[] family : inputColumns) {
+ scan.addFamily(family);
+ }
+ Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ scan.setFilter(exampleFilter);
+ setScan(scan);
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to configure for job.", exception);
+ }
+ }
+
+ }
+
+
+ public static class ExampleJobConfigurableTIF extends TableInputFormatBase
+ implements JobConfigurable {
+
+ @Override
+ public void configure(JobConf job) {
+ try {
+ Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
+ // mandatory
+ initializeTable(connection, tableName);
+ byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ Bytes.toBytes("columnB") };
+ //optional
+ Scan scan = new Scan();
+ for (byte[] family : inputColumns) {
+ scan.addFamily(family);
+ }
+ Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ scan.setFilter(exampleFilter);
+ setScan(scan);
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to initialize.", exception);
+ }
+ }
+ }
+
+
+ public static class ExampleTIF extends TableInputFormatBase {
+
+ @Override
+ protected void initialize(JobContext job) throws IOException {
+ Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
+ job.getConfiguration()));
+ TableName tableName = TableName.valueOf("exampleTable");
+ // mandatory
+ initializeTable(connection, tableName);
+ byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ Bytes.toBytes("columnB") };
+ //optional
+ Scan scan = new Scan();
+ for (byte[] family : inputColumns) {
+ scan.addFamily(family);
+ }
+ Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
+ scan.setFilter(exampleFilter);
+ setScan(scan);
+ }
+
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
new file mode 100644
index 0000000..699e773
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
@@ -0,0 +1,53 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.*;
+
+import java.net.Inet6Address;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({SmallTests.class})
+public class TestTableInputFormatBase {
+ @Test
+ public void testTableInputFormatBaseReverseDNSForIPv6()
+ throws UnknownHostException {
+ String address = "ipv6.google.com";
+ String localhost = null;
+ InetAddress addr = null;
+ TableInputFormat inputFormat = new TableInputFormat();
+ try {
+ localhost = InetAddress.getByName(address).getCanonicalHostName();
+ addr = Inet6Address.getByName(address);
+ } catch (UnknownHostException e) {
+ // google.com is down, we can probably forgive this test.
+ return;
+ }
+ System.out.println("Should retrun the hostname for this host " +
+ localhost + " addr : " + addr);
+ String actualHostName = inputFormat.reverseDNS(addr);
+ assertEquals("Should retrun the hostname for this host. Expected : " +
+ localhost + " Actual : " + actualHostName, localhost, actualHostName);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
new file mode 100644
index 0000000..99b40b9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
@@ -0,0 +1,200 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * TestTableInputFormatScan part 1.
+ * @see TestTableInputFormatScanBase
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableInputFormatScan1 extends TestTableInputFormatScanBase {
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanEmptyToEmpty()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan(null, null, null);
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanEmptyToAPP()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan(null, "app", "apo");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanEmptyToBBA()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan(null, "bba", "baz");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanEmptyToBBB()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan(null, "bbb", "bba");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanEmptyToOPP()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan(null, "opp", "opo");
+ }
+
+ /**
+ * Tests a MR scan using specific number of mappers. The test table has 25 regions,
+ * and all region sizes are set as 0 as default. The average region size is 1 (the smallest
+ * positive). When we set hbase.mapreduce.input.ratio as -1, all regions will be cut into two
+ * MapRedcue input splits, the number of MR input splits should be 50; when we set hbase
+ * .mapreduce.input.ratio as 100, the sum of all region sizes is less then the average region
+ * size, all regions will be combined into 1 MapRedcue input split.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testGetSplits() throws IOException, InterruptedException, ClassNotFoundException {
+ testNumOfSplits("-1", 52);
+ testNumOfSplits("100", 1);
+ }
+
+ /**
+ * Tests the getSplitKey() method in TableInputFormatBase.java
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testGetSplitsPoint() throws IOException, InterruptedException,
+ ClassNotFoundException {
+ byte[] start1 = { 'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f' };
+ byte[] end1 = { 'a', 'a', 'a', 'f', 'f' };
+ byte[] splitPoint1 = { 'a', 'a', 'a', 'd', 'd', -78, 50, -77 };
+ testGetSplitKey(start1, end1, splitPoint1, true);
+
+ byte[] start2 = { '1', '1', '1', '0', '0', '0' };
+ byte[] end2 = { '1', '1', '2', '5', '7', '9', '0' };
+ byte[] splitPoint2 = { '1', '1', '1', -78, -77, -76, -104 };
+ testGetSplitKey(start2, end2, splitPoint2, true);
+
+ byte[] start3 = { 'a', 'a', 'a', 'a', 'a', 'a' };
+ byte[] end3 = { 'a', 'a', 'b' };
+ byte[] splitPoint3 = { 'a', 'a', 'a', -80, -80, -80 };
+ testGetSplitKey(start3, end3, splitPoint3, true);
+
+ byte[] start4 = { 'a', 'a', 'a' };
+ byte[] end4 = { 'a', 'a', 'a', 'z' };
+ byte[] splitPoint4 = { 'a', 'a', 'a', '=' };
+ testGetSplitKey(start4, end4, splitPoint4, true);
+
+ byte[] start5 = { 'a', 'a', 'a' };
+ byte[] end5 = { 'a', 'a', 'b', 'a' };
+ byte[] splitPoint5 = { 'a', 'a', 'a', -80 };
+ testGetSplitKey(start5, end5, splitPoint5, true);
+
+ // Test Case 6: empty key and "hhhqqqwww", split point is "h"
+ byte[] start6 = {};
+ byte[] end6 = { 'h', 'h', 'h', 'q', 'q', 'q', 'w', 'w' };
+ byte[] splitPointText6 = { 'h' };
+ byte[] splitPointBinary6 = { 104 };
+ testGetSplitKey(start6, end6, splitPointText6, true);
+ testGetSplitKey(start6, end6, splitPointBinary6, false);
+
+ // Test Case 7: "ffffaaa" and empty key, split point depends on the mode we choose(text key or
+ // binary key).
+ byte[] start7 = { 'f', 'f', 'f', 'f', 'a', 'a', 'a' };
+ byte[] end7 = {};
+ byte[] splitPointText7 = { 'f', '~', '~', '~', '~', '~', '~' };
+ byte[] splitPointBinary7 = { 'f', -1, -1, -1, -1, -1, -1 };
+ testGetSplitKey(start7, end7, splitPointText7, true);
+ testGetSplitKey(start7, end7, splitPointBinary7, false);
+
+ // Test Case 8: both start key and end key are empty. Split point depends on the mode we
+ // choose (text key or binary key).
+ byte[] start8 = {};
+ byte[] end8 = {};
+ byte[] splitPointText8 = { 'O' };
+ byte[] splitPointBinary8 = { 0 };
+ testGetSplitKey(start8, end8, splitPointText8, true);
+ testGetSplitKey(start8, end8, splitPointBinary8, false);
+
+ // Test Case 9: Binary Key example
+ byte[] start9 = { 13, -19, 126, 127 };
+ byte[] end9 = { 13, -19, 127, 0 };
+ byte[] splitPoint9 = { 13, -19, 126, -65 };
+ testGetSplitKey(start9, end9, splitPoint9, false);
+
+ // Test Case 10: Binary key split when the start key is an unsigned byte and the end byte is a
+ // signed byte
+ byte[] start10 = { 'x' };
+ byte[] end10 = { -128 };
+ byte[] splitPoint10 = { '|' };
+ testGetSplitKey(start10, end10, splitPoint10, false);
+
+ // Test Case 11: Binary key split when the start key is an signed byte and the end byte is a
+ // signed byte
+ byte[] start11 = { -100 };
+ byte[] end11 = { -90 };
+ byte[] splitPoint11 = { -95 };
+ testGetSplitKey(start11, end11, splitPoint11, false);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
new file mode 100644
index 0000000..02f893f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
@@ -0,0 +1,118 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * TestTableInputFormatScan part 2.
+ * @see TestTableInputFormatScanBase
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableInputFormatScan2 extends TestTableInputFormatScanBase {
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanOBBToOPP()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("obb", "opp", "opo");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanOBBToQPP()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("obb", "qpp", "qpo");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanOPPToEmpty()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("opp", null, "zzz");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanYYXToEmpty()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("yyx", null, "zzz");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanYYYToEmpty()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("yyy", null, "zzz");
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testScanYZYToEmpty()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScan("yzy", null, "zzz");
+ }
+
+ @Test
+ public void testScanFromConfiguration()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ testScanFromConfiguration("bba", "bbd", "bbc");
+ }
+}
[16/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
deleted file mode 100644
index e18b3aa..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.text.MessageFormat;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.RegionSizeCalculator;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Iterator;
-/**
- * A base for {@link MultiTableInputFormat}s. Receives a list of
- * {@link Scan} instances that define the input tables and
- * filters etc. Subclasses may use other TableRecordReader implementations.
- */
-@InterfaceAudience.Public
-public abstract class MultiTableInputFormatBase extends
- InputFormat<ImmutableBytesWritable, Result> {
-
- private static final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
-
- /** Holds the set of scans used to define the input. */
- private List<Scan> scans;
-
- /** The reader scanning the table, can be a custom one. */
- private TableRecordReader tableRecordReader = null;
-
- /**
- * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
- * default.
- *
- * @param split The split to work with.
- * @param context The current context.
- * @return The newly created record reader.
- * @throws IOException When creating the reader fails.
- * @throws InterruptedException when record reader initialization fails
- * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
- * org.apache.hadoop.mapreduce.InputSplit,
- * org.apache.hadoop.mapreduce.TaskAttemptContext)
- */
- @Override
- public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
- InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- TableSplit tSplit = (TableSplit) split;
- LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
-
- if (tSplit.getTable() == null) {
- throw new IOException("Cannot create a record reader because of a"
- + " previous error. Please look at the previous logs lines from"
- + " the task's full log for more details.");
- }
- final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
- Table table = connection.getTable(tSplit.getTable());
-
- if (this.tableRecordReader == null) {
- this.tableRecordReader = new TableRecordReader();
- }
- final TableRecordReader trr = this.tableRecordReader;
-
- try {
- Scan sc = tSplit.getScan();
- sc.setStartRow(tSplit.getStartRow());
- sc.setStopRow(tSplit.getEndRow());
- trr.setScan(sc);
- trr.setTable(table);
- return new RecordReader<ImmutableBytesWritable, Result>() {
-
- @Override
- public void close() throws IOException {
- trr.close();
- connection.close();
- }
-
- @Override
- public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
- return trr.getCurrentKey();
- }
-
- @Override
- public Result getCurrentValue() throws IOException, InterruptedException {
- return trr.getCurrentValue();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- return trr.getProgress();
- }
-
- @Override
- public void initialize(InputSplit inputsplit, TaskAttemptContext context)
- throws IOException, InterruptedException {
- trr.initialize(inputsplit, context);
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- return trr.nextKeyValue();
- }
- };
- } catch (IOException ioe) {
- // If there is an exception make sure that all
- // resources are closed and released.
- trr.close();
- connection.close();
- throw ioe;
- }
- }
-
- /**
- * Calculates the splits that will serve as input for the map tasks. The
- * number of splits matches the number of regions in a table.
- *
- * @param context The current job context.
- * @return The list of input splits.
- * @throws IOException When creating the list of splits fails.
- * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
- */
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- if (scans.isEmpty()) {
- throw new IOException("No scans were provided.");
- }
-
- Map<TableName, List<Scan>> tableMaps = new HashMap<>();
- for (Scan scan : scans) {
- byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
- if (tableNameBytes == null)
- throw new IOException("A scan object did not have a table name");
-
- TableName tableName = TableName.valueOf(tableNameBytes);
-
- List<Scan> scanList = tableMaps.get(tableName);
- if (scanList == null) {
- scanList = new ArrayList<>();
- tableMaps.put(tableName, scanList);
- }
- scanList.add(scan);
- }
-
- List<InputSplit> splits = new ArrayList<>();
- Iterator iter = tableMaps.entrySet().iterator();
- while (iter.hasNext()) {
- Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
- TableName tableName = entry.getKey();
- List<Scan> scanList = entry.getValue();
-
- try (Connection conn = ConnectionFactory.createConnection(context.getConfiguration());
- Table table = conn.getTable(tableName);
- RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
- RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
- regionLocator, conn.getAdmin());
- Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
- for (Scan scan : scanList) {
- if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
- throw new IOException("Expecting at least one region for table : "
- + tableName.getNameAsString());
- }
- int count = 0;
-
- byte[] startRow = scan.getStartRow();
- byte[] stopRow = scan.getStopRow();
-
- for (int i = 0; i < keys.getFirst().length; i++) {
- if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
- continue;
- }
-
- if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
- Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
- (stopRow.length == 0 || Bytes.compareTo(stopRow,
- keys.getFirst()[i]) > 0)) {
- byte[] splitStart = startRow.length == 0 ||
- Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
- keys.getFirst()[i] : startRow;
- byte[] splitStop = (stopRow.length == 0 ||
- Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
- keys.getSecond()[i].length > 0 ?
- keys.getSecond()[i] : stopRow;
-
- HRegionLocation hregionLocation = regionLocator.getRegionLocation(
- keys.getFirst()[i], false);
- String regionHostname = hregionLocation.getHostname();
- HRegionInfo regionInfo = hregionLocation.getRegionInfo();
- String encodedRegionName = regionInfo.getEncodedName();
- long regionSize = sizeCalculator.getRegionSize(
- regionInfo.getRegionName());
-
- TableSplit split = new TableSplit(table.getName(),
- scan, splitStart, splitStop, regionHostname,
- encodedRegionName, regionSize);
-
- splits.add(split);
-
- if (LOG.isDebugEnabled())
- LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
- }
- }
- }
- }
- }
-
- return splits;
- }
-
- /**
- * Test if the given region is to be included in the InputSplit while
- * splitting the regions of a table.
- * <p>
- * This optimization is effective when there is a specific reasoning to
- * exclude an entire region from the M-R job, (and hence, not contributing to
- * the InputSplit), given the start and end keys of the same. <br>
- * Useful when we need to remember the last-processed top record and revisit
- * the [last, current) interval for M-R processing, continuously. In addition
- * to reducing InputSplits, reduces the load on the region server as well, due
- * to the ordering of the keys. <br>
- * <br>
- * Note: It is possible that <code>endKey.length() == 0 </code> , for the last
- * (recent) region. <br>
- * Override this method, if you want to bulk exclude regions altogether from
- * M-R. By default, no region is excluded( i.e. all regions are included).
- *
- * @param startKey Start key of the region
- * @param endKey End key of the region
- * @return true, if this region needs to be included as part of the input
- * (default).
- */
- protected boolean includeRegionInSplit(final byte[] startKey,
- final byte[] endKey) {
- return true;
- }
-
- /**
- * Allows subclasses to get the list of {@link Scan} objects.
- */
- protected List<Scan> getScans() {
- return this.scans;
- }
-
- /**
- * Allows subclasses to set the list of {@link Scan} objects.
- *
- * @param scans The list of {@link Scan} used to define the input
- */
- protected void setScans(List<Scan> scans) {
- this.scans = scans;
- }
-
- /**
- * Allows subclasses to set the {@link TableRecordReader}.
- *
- * @param tableRecordReader A different {@link TableRecordReader}
- * implementation.
- */
- protected void setTableRecordReader(TableRecordReader tableRecordReader) {
- this.tableRecordReader = tableRecordReader;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
deleted file mode 100644
index 4cc784f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * <p>
- * Hadoop output format that writes to one or more HBase tables. The key is
- * taken to be the table name while the output value <em>must</em> be either a
- * {@link Put} or a {@link Delete} instance. All tables must already exist, and
- * all Puts and Deletes must reference only valid column families.
- * </p>
- *
- * <p>
- * Write-ahead logging (WAL) for Puts can be disabled by setting
- * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
- * Note that disabling write-ahead logging is only appropriate for jobs where
- * loss of data due to region server failure can be tolerated (for example,
- * because it is easy to rerun a bulk import).
- * </p>
- */
-@InterfaceAudience.Public
-public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
- /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (WAL) */
- public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
- /** Property value to use write-ahead logging */
- public static final boolean WAL_ON = true;
- /** Property value to disable write-ahead logging */
- public static final boolean WAL_OFF = false;
- /**
- * Record writer for outputting to multiple HTables.
- */
- protected static class MultiTableRecordWriter extends
- RecordWriter<ImmutableBytesWritable, Mutation> {
- private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
- Connection connection;
- Map<ImmutableBytesWritable, BufferedMutator> mutatorMap = new HashMap<>();
- Configuration conf;
- boolean useWriteAheadLogging;
-
- /**
- * @param conf
- * HBaseConfiguration to used
- * @param useWriteAheadLogging
- * whether to use write ahead logging. This can be turned off (
- * <tt>false</tt>) to improve performance when bulk loading data.
- */
- public MultiTableRecordWriter(Configuration conf,
- boolean useWriteAheadLogging) throws IOException {
- LOG.debug("Created new MultiTableRecordReader with WAL "
- + (useWriteAheadLogging ? "on" : "off"));
- this.conf = conf;
- this.useWriteAheadLogging = useWriteAheadLogging;
- }
-
- /**
- * @param tableName
- * the name of the table, as a string
- * @return the named mutator
- * @throws IOException
- * if there is a problem opening a table
- */
- BufferedMutator getBufferedMutator(ImmutableBytesWritable tableName) throws IOException {
- if(this.connection == null){
- this.connection = ConnectionFactory.createConnection(conf);
- }
- if (!mutatorMap.containsKey(tableName)) {
- LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
-
- BufferedMutator mutator =
- connection.getBufferedMutator(TableName.valueOf(tableName.get()));
- mutatorMap.put(tableName, mutator);
- }
- return mutatorMap.get(tableName);
- }
-
- @Override
- public void close(TaskAttemptContext context) throws IOException {
- for (BufferedMutator mutator : mutatorMap.values()) {
- mutator.close();
- }
- if (connection != null) {
- connection.close();
- }
- }
-
- /**
- * Writes an action (Put or Delete) to the specified table.
- *
- * @param tableName
- * the table being updated.
- * @param action
- * the update, either a put or a delete.
- * @throws IllegalArgumentException
- * if the action is not a put or a delete.
- */
- @Override
- public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
- BufferedMutator mutator = getBufferedMutator(tableName);
- // The actions are not immutable, so we defensively copy them
- if (action instanceof Put) {
- Put put = new Put((Put) action);
- put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
- : Durability.SKIP_WAL);
- mutator.mutate(put);
- } else if (action instanceof Delete) {
- Delete delete = new Delete((Delete) action);
- mutator.mutate(delete);
- } else
- throw new IllegalArgumentException(
- "action must be either Delete or Put");
- }
- }
-
- @Override
- public void checkOutputSpecs(JobContext context) throws IOException,
- InterruptedException {
- // we can't know ahead of time if it's going to blow up when the user
- // passes a table name that doesn't exist, so nothing useful here.
- }
-
- @Override
- public OutputCommitter getOutputCommitter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TableOutputCommitter();
- }
-
- @Override
- public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- Configuration conf = context.getConfiguration();
- return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
- conf.getBoolean(WAL_PROPERTY, WAL_ON));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
deleted file mode 100644
index 0f07a58..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * MultiTableSnapshotInputFormat generalizes
- * {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}
- * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
- * configured for each.
- * Internally, the input format delegates to
- * {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}
- * and thus has the same performance advantages;
- * see {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for
- * more details.
- * Usage is similar to TableSnapshotInputFormat, with the following exception:
- * initMultiTableSnapshotMapperJob takes in a map
- * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
- * scan will be applied;
- * the overall dataset for the job is defined by the concatenation of the regions and tables
- * included in each snapshot/scan
- * pair.
- * {@link org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initMultiTableSnapshotMapperJob
- * (java.util.Map, Class, Class, Class, org.apache.hadoop.mapreduce.Job, boolean, org.apache
- * .hadoop.fs.Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
- * "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
- * "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
- * );
- * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
- * TableMapReduceUtil.initTableSnapshotMapperJob(
- * snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
- * MyMapOutputValueWritable.class, job, true, restoreDir);
- * }
- * </pre>
- * Internally, this input format restores each snapshot into a subdirectory of the given tmp
- * directory. Input splits and
- * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * (one per region).
- * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
- * permissioning; the
- * same caveats apply here.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat {
-
- private final MultiTableSnapshotInputFormatImpl delegate;
-
- public MultiTableSnapshotInputFormat() {
- this.delegate = new MultiTableSnapshotInputFormatImpl();
- }
-
- @Override
- public List<InputSplit> getSplits(JobContext jobContext)
- throws IOException, InterruptedException {
- List<TableSnapshotInputFormatImpl.InputSplit> splits =
- delegate.getSplits(jobContext.getConfiguration());
- List<InputSplit> rtn = Lists.newArrayListWithCapacity(splits.size());
-
- for (TableSnapshotInputFormatImpl.InputSplit split : splits) {
- rtn.add(new TableSnapshotInputFormat.TableSnapshotRegionSplit(split));
- }
-
- return rtn;
- }
-
- public static void setInput(Configuration configuration,
- Map<String, Collection<Scan>> snapshotScans, Path tmpRestoreDir) throws IOException {
- new MultiTableSnapshotInputFormatImpl().setInput(configuration, snapshotScans, tmpRestoreDir);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
deleted file mode 100644
index 4331c0f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
-import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
-import org.apache.hadoop.hbase.util.ConfigurationUtil;
-import org.apache.hadoop.hbase.util.FSUtils;
-
-import java.io.IOException;
-import java.util.AbstractMap;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-
-/**
- * Shared implementation of mapreduce code over multiple table snapshots.
- * Utilized by both mapreduce ({@link org.apache.hadoop.hbase.mapreduce
- * .MultiTableSnapshotInputFormat} and mapred
- * ({@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
- */
-@InterfaceAudience.LimitedPrivate({ "HBase" })
-@InterfaceStability.Evolving
-public class MultiTableSnapshotInputFormatImpl {
-
- private static final Log LOG = LogFactory.getLog(MultiTableSnapshotInputFormatImpl.class);
-
- public static final String RESTORE_DIRS_KEY =
- "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
- public static final String SNAPSHOT_TO_SCANS_KEY =
- "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";
-
- /**
- * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
- * restoreDir.
- * Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY}
- *
- * @param conf
- * @param snapshotScans
- * @param restoreDir
- * @throws IOException
- */
- public void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
- Path restoreDir) throws IOException {
- Path rootDir = FSUtils.getRootDir(conf);
- FileSystem fs = rootDir.getFileSystem(conf);
-
- setSnapshotToScans(conf, snapshotScans);
- Map<String, Path> restoreDirs =
- generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir);
- setSnapshotDirs(conf, restoreDirs);
- restoreSnapshots(conf, restoreDirs, fs);
- }
-
- /**
- * Return the list of splits extracted from the scans/snapshots pushed to conf by
- * {@link
- * #setInput(org.apache.hadoop.conf.Configuration, java.util.Map, org.apache.hadoop.fs.Path)}
- *
- * @param conf Configuration to determine splits from
- * @return Return the list of splits extracted from the scans/snapshots pushed to conf
- * @throws IOException
- */
- public List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Configuration conf)
- throws IOException {
- Path rootDir = FSUtils.getRootDir(conf);
- FileSystem fs = rootDir.getFileSystem(conf);
-
- List<TableSnapshotInputFormatImpl.InputSplit> rtn = Lists.newArrayList();
-
- Map<String, Collection<Scan>> snapshotsToScans = getSnapshotsToScans(conf);
- Map<String, Path> snapshotsToRestoreDirs = getSnapshotDirs(conf);
- for (Map.Entry<String, Collection<Scan>> entry : snapshotsToScans.entrySet()) {
- String snapshotName = entry.getKey();
-
- Path restoreDir = snapshotsToRestoreDirs.get(snapshotName);
-
- SnapshotManifest manifest =
- TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs);
- List<HRegionInfo> regionInfos =
- TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest);
-
- for (Scan scan : entry.getValue()) {
- List<TableSnapshotInputFormatImpl.InputSplit> splits =
- TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf);
- rtn.addAll(splits);
- }
- }
- return rtn;
- }
-
- /**
- * Retrieve the snapshot name -> list<scan> mapping pushed to configuration by
- * {@link #setSnapshotToScans(org.apache.hadoop.conf.Configuration, java.util.Map)}
- *
- * @param conf Configuration to extract name -> list<scan> mappings from.
- * @return the snapshot name -> list<scan> mapping pushed to configuration
- * @throws IOException
- */
- public Map<String, Collection<Scan>> getSnapshotsToScans(Configuration conf) throws IOException {
-
- Map<String, Collection<Scan>> rtn = Maps.newHashMap();
-
- for (Map.Entry<String, String> entry : ConfigurationUtil
- .getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) {
- String snapshotName = entry.getKey();
- String scan = entry.getValue();
-
- Collection<Scan> snapshotScans = rtn.get(snapshotName);
- if (snapshotScans == null) {
- snapshotScans = Lists.newArrayList();
- rtn.put(snapshotName, snapshotScans);
- }
-
- snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan));
- }
-
- return rtn;
- }
-
- /**
- * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY})
- *
- * @param conf
- * @param snapshotScans
- * @throws IOException
- */
- public void setSnapshotToScans(Configuration conf, Map<String, Collection<Scan>> snapshotScans)
- throws IOException {
- // flatten out snapshotScans for serialization to the job conf
- List<Map.Entry<String, String>> snapshotToSerializedScans = Lists.newArrayList();
-
- for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
- String snapshotName = entry.getKey();
- Collection<Scan> scans = entry.getValue();
-
- // serialize all scans and map them to the appropriate snapshot
- for (Scan scan : scans) {
- snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry<>(snapshotName,
- TableMapReduceUtil.convertScanToString(scan)));
- }
- }
-
- ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans);
- }
-
- /**
- * Retrieve the directories into which snapshots have been restored from
- * ({@link #RESTORE_DIRS_KEY})
- *
- * @param conf Configuration to extract restore directories from
- * @return the directories into which snapshots have been restored from
- * @throws IOException
- */
- public Map<String, Path> getSnapshotDirs(Configuration conf) throws IOException {
- List<Map.Entry<String, String>> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY);
- Map<String, Path> rtn = Maps.newHashMapWithExpectedSize(kvps.size());
-
- for (Map.Entry<String, String> kvp : kvps) {
- rtn.put(kvp.getKey(), new Path(kvp.getValue()));
- }
-
- return rtn;
- }
-
- public void setSnapshotDirs(Configuration conf, Map<String, Path> snapshotDirs) {
- Map<String, String> toSet = Maps.newHashMap();
-
- for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
- toSet.put(entry.getKey(), entry.getValue().toString());
- }
-
- ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet());
- }
-
- /**
- * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and
- * return a map from the snapshot to the restore directory.
- *
- * @param snapshots collection of snapshot names to restore
- * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored
- * @return a mapping from snapshot name to the directory in which that snapshot has been restored
- */
- private Map<String, Path> generateSnapshotToRestoreDirMapping(Collection<String> snapshots,
- Path baseRestoreDir) {
- Map<String, Path> rtn = Maps.newHashMap();
-
- for (String snapshotName : snapshots) {
- Path restoreSnapshotDir =
- new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString());
- rtn.put(snapshotName, restoreSnapshotDir);
- }
-
- return rtn;
- }
-
- /**
- * Restore each (snapshot name, restore directory) pair in snapshotToDir
- *
- * @param conf configuration to restore with
- * @param snapshotToDir mapping from snapshot names to restore directories
- * @param fs filesystem to do snapshot restoration on
- * @throws IOException
- */
- public void restoreSnapshots(Configuration conf, Map<String, Path> snapshotToDir, FileSystem fs)
- throws IOException {
- // TODO: restore from record readers to parallelize.
- Path rootDir = FSUtils.getRootDir(conf);
-
- for (Map.Entry<String, Path> entry : snapshotToDir.entrySet()) {
- String snapshotName = entry.getKey();
- Path restoreDir = entry.getValue();
- LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir
- + " for MultiTableSnapshotInputFormat");
- restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs);
- }
- }
-
- void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir,
- FileSystem fs) throws IOException {
- RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
deleted file mode 100644
index d1dba1d..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.MapContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.StatusReporter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.util.ReflectionUtils;
-
-
-/**
- * Multithreaded implementation for @link org.apache.hbase.mapreduce.TableMapper
- * <p>
- * It can be used instead when the Map operation is not CPU
- * bound in order to improve throughput.
- * <p>
- * Mapper implementations using this MapRunnable must be thread-safe.
- * <p>
- * The Map-Reduce job has to be configured with the mapper to use via
- * {@link #setMapperClass} and the number of thread the thread-pool can use with the
- * {@link #getNumberOfThreads} method. The default value is 10 threads.
- * <p>
- */
-
-public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
- private static final Log LOG = LogFactory.getLog(MultithreadedTableMapper.class);
- private Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> mapClass;
- private Context outer;
- private ExecutorService executor;
- public static final String NUMBER_OF_THREADS = "hbase.mapreduce.multithreadedmapper.threads";
- public static final String MAPPER_CLASS = "hbase.mapreduce.multithreadedmapper.mapclass";
-
- /**
- * The number of threads in the thread pool that will run the map function.
- * @param job the job
- * @return the number of threads
- */
- public static int getNumberOfThreads(JobContext job) {
- return job.getConfiguration().
- getInt(NUMBER_OF_THREADS, 10);
- }
-
- /**
- * Set the number of threads in the pool for running maps.
- * @param job the job to modify
- * @param threads the new number of threads
- */
- public static void setNumberOfThreads(Job job, int threads) {
- job.getConfiguration().setInt(NUMBER_OF_THREADS,
- threads);
- }
-
- /**
- * Get the application's mapper class.
- * @param <K2> the map's output key type
- * @param <V2> the map's output value type
- * @param job the job
- * @return the mapper class to run
- */
- @SuppressWarnings("unchecked")
- public static <K2,V2>
- Class<Mapper<ImmutableBytesWritable, Result,K2,V2>> getMapperClass(JobContext job) {
- return (Class<Mapper<ImmutableBytesWritable, Result,K2,V2>>)
- job.getConfiguration().getClass( MAPPER_CLASS,
- Mapper.class);
- }
-
- /**
- * Set the application's mapper class.
- * @param <K2> the map output key type
- * @param <V2> the map output value type
- * @param job the job to modify
- * @param cls the class to use as the mapper
- */
- public static <K2,V2>
- void setMapperClass(Job job,
- Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
- if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
- throw new IllegalArgumentException("Can't have recursive " +
- "MultithreadedTableMapper instances.");
- }
- job.getConfiguration().setClass(MAPPER_CLASS,
- cls, Mapper.class);
- }
-
- /**
- * Run the application's maps using a thread pool.
- */
- @Override
- public void run(Context context) throws IOException, InterruptedException {
- outer = context;
- int numberOfThreads = getNumberOfThreads(context);
- mapClass = getMapperClass(context);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Configuring multithread runner to use " + numberOfThreads +
- " threads");
- }
- executor = Executors.newFixedThreadPool(numberOfThreads);
- for(int i=0; i < numberOfThreads; ++i) {
- MapRunner thread = new MapRunner(context);
- executor.execute(thread);
- }
- executor.shutdown();
- while (!executor.isTerminated()) {
- // wait till all the threads are done
- Thread.sleep(1000);
- }
- }
-
- private class SubMapRecordReader
- extends RecordReader<ImmutableBytesWritable, Result> {
- private ImmutableBytesWritable key;
- private Result value;
- private Configuration conf;
-
- @Override
- public void close() throws IOException {
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- return 0;
- }
-
- @Override
- public void initialize(InputSplit split,
- TaskAttemptContext context
- ) throws IOException, InterruptedException {
- conf = context.getConfiguration();
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- synchronized (outer) {
- if (!outer.nextKeyValue()) {
- return false;
- }
- key = ReflectionUtils.copy(outer.getConfiguration(),
- outer.getCurrentKey(), key);
- value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
- return true;
- }
- }
-
- public ImmutableBytesWritable getCurrentKey() {
- return key;
- }
-
- @Override
- public Result getCurrentValue() {
- return value;
- }
- }
-
- private class SubMapRecordWriter extends RecordWriter<K2,V2> {
-
- @Override
- public void close(TaskAttemptContext context) throws IOException,
- InterruptedException {
- }
-
- @Override
- public void write(K2 key, V2 value) throws IOException,
- InterruptedException {
- synchronized (outer) {
- outer.write(key, value);
- }
- }
- }
-
- private class SubMapStatusReporter extends StatusReporter {
-
- @Override
- public Counter getCounter(Enum<?> name) {
- return outer.getCounter(name);
- }
-
- @Override
- public Counter getCounter(String group, String name) {
- return outer.getCounter(group, name);
- }
-
- @Override
- public void progress() {
- outer.progress();
- }
-
- @Override
- public void setStatus(String status) {
- outer.setStatus(status);
- }
-
- public float getProgress() {
- return 0;
- }
- }
-
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
- justification="Don't understand why FB is complaining about this one. We do throw exception")
- private class MapRunner implements Runnable {
- private Mapper<ImmutableBytesWritable, Result, K2,V2> mapper;
- private Context subcontext;
-
- @SuppressWarnings({ "rawtypes", "unchecked" })
- MapRunner(Context context) throws IOException, InterruptedException {
- mapper = ReflectionUtils.newInstance(mapClass,
- context.getConfiguration());
- try {
- Constructor c = context.getClass().getConstructor(
- Mapper.class,
- Configuration.class,
- TaskAttemptID.class,
- RecordReader.class,
- RecordWriter.class,
- OutputCommitter.class,
- StatusReporter.class,
- InputSplit.class);
- c.setAccessible(true);
- subcontext = (Context) c.newInstance(
- mapper,
- outer.getConfiguration(),
- outer.getTaskAttemptID(),
- new SubMapRecordReader(),
- new SubMapRecordWriter(),
- context.getOutputCommitter(),
- new SubMapStatusReporter(),
- outer.getInputSplit());
- } catch (Exception e) {
- try {
- Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
- Configuration.class,
- TaskAttemptID.class,
- RecordReader.class,
- RecordWriter.class,
- OutputCommitter.class,
- StatusReporter.class,
- InputSplit.class);
- c.setAccessible(true);
- MapContext mc = (MapContext) c.newInstance(
- outer.getConfiguration(),
- outer.getTaskAttemptID(),
- new SubMapRecordReader(),
- new SubMapRecordWriter(),
- context.getOutputCommitter(),
- new SubMapStatusReporter(),
- outer.getInputSplit());
- Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
- Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
- subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
- } catch (Exception ee) { // FindBugs: REC_CATCH_EXCEPTION
- // rethrow as IOE
- throw new IOException(e);
- }
- }
- }
-
- @Override
- public void run() {
- try {
- mapper.run(subcontext);
- } catch (Throwable ie) {
- LOG.error("Problem in running map.", ie);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
deleted file mode 100644
index 8997da9..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MutationSerialization.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class MutationSerialization implements Serialization<Mutation> {
- @Override
- public boolean accept(Class<?> c) {
- return Mutation.class.isAssignableFrom(c);
- }
-
- @Override
- public Deserializer<Mutation> getDeserializer(Class<Mutation> c) {
- return new MutationDeserializer();
- }
-
- @Override
- public Serializer<Mutation> getSerializer(Class<Mutation> c) {
- return new MutationSerializer();
- }
-
- private static class MutationDeserializer implements Deserializer<Mutation> {
- private InputStream in;
-
- @Override
- public void close() throws IOException {
- in.close();
- }
-
- @Override
- public Mutation deserialize(Mutation mutation) throws IOException {
- MutationProto proto = MutationProto.parseDelimitedFrom(in);
- return ProtobufUtil.toMutation(proto);
- }
-
- @Override
- public void open(InputStream in) throws IOException {
- this.in = in;
- }
-
- }
- private static class MutationSerializer implements Serializer<Mutation> {
- private OutputStream out;
-
- @Override
- public void close() throws IOException {
- out.close();
- }
-
- @Override
- public void open(OutputStream out) throws IOException {
- this.out = out;
- }
-
- @Override
- public void serialize(Mutation mutation) throws IOException {
- MutationType type;
- if (mutation instanceof Put) {
- type = MutationType.PUT;
- } else if (mutation instanceof Delete) {
- type = MutationType.DELETE;
- } else {
- throw new IllegalArgumentException("Only Put and Delete are supported");
- }
- ProtobufUtil.toMutation(type, mutation).writeDelimitedTo(out);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
deleted file mode 100644
index f01e84f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutCombiner.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Combine Puts. Merges Put instances grouped by <code>K</code> into a single
- * instance.
- * @see TableMapReduceUtil
- */
-@InterfaceAudience.Public
-public class PutCombiner<K> extends Reducer<K, Put, K, Put> {
- private static final Log LOG = LogFactory.getLog(PutCombiner.class);
-
- @Override
- protected void reduce(K row, Iterable<Put> vals, Context context)
- throws IOException, InterruptedException {
- // Using HeapSize to create an upper bound on the memory size of
- // the puts and flush some portion of the content while looping. This
- // flush could result in multiple Puts for a single rowkey. That is
- // acceptable because Combiner is run as an optimization and it's not
- // critical that all Puts are grouped perfectly.
- long threshold = context.getConfiguration().getLong(
- "putcombiner.row.threshold", 1L * (1<<30));
- int cnt = 0;
- long curSize = 0;
- Put put = null;
- Map<byte[], List<Cell>> familyMap = null;
- for (Put p : vals) {
- cnt++;
- if (put == null) {
- put = p;
- familyMap = put.getFamilyCellMap();
- } else {
- for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap()
- .entrySet()) {
- List<Cell> cells = familyMap.get(entry.getKey());
- List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
- for (Cell cell : entry.getValue()) {
- KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
- curSize += kv.heapSize();
- if (kvs != null) {
- kvs.add(kv);
- }
- }
- if (cells == null) {
- familyMap.put(entry.getKey(), entry.getValue());
- }
- }
- if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
- if (curSize > threshold) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
- }
- context.write(row, put);
- put = null;
- curSize = 0;
- cnt = 0;
- }
- }
- }
- if (put != null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
- }
- context.write(row, put);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
deleted file mode 100644
index 17ab9cb..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.TreeSet;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.TagUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.exceptions.DeserializationException;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Emits sorted Puts.
- * Reads in all Puts from passed Iterator, sorts them, then emits
- * Puts in sorted order. If lots of columns per row, it will use lots of
- * memory sorting.
- * @see HFileOutputFormat2
- * @see KeyValueSortReducer
- */
-@InterfaceAudience.Public
-public class PutSortReducer extends
- Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
- // the cell creator
- private CellCreator kvCreator;
-
- @Override
- protected void
- setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
- throws IOException, InterruptedException {
- Configuration conf = context.getConfiguration();
- this.kvCreator = new CellCreator(conf);
- }
-
- @Override
- protected void reduce(
- ImmutableBytesWritable row,
- java.lang.Iterable<Put> puts,
- Reducer<ImmutableBytesWritable, Put,
- ImmutableBytesWritable, KeyValue>.Context context)
- throws java.io.IOException, InterruptedException
- {
- // although reduce() is called per-row, handle pathological case
- long threshold = context.getConfiguration().getLong(
- "putsortreducer.row.threshold", 1L * (1<<30));
- Iterator<Put> iter = puts.iterator();
- while (iter.hasNext()) {
- TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
- long curSize = 0;
- // stop at the end or the RAM threshold
- List<Tag> tags = new ArrayList<>();
- while (iter.hasNext() && curSize < threshold) {
- // clear the tags
- tags.clear();
- Put p = iter.next();
- long t = p.getTTL();
- if (t != Long.MAX_VALUE) {
- // add TTL tag if found
- tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
- }
- byte[] acl = p.getACL();
- if (acl != null) {
- // add ACL tag if found
- tags.add(new ArrayBackedTag(TagType.ACL_TAG_TYPE, acl));
- }
- try {
- CellVisibility cellVisibility = p.getCellVisibility();
- if (cellVisibility != null) {
- // add the visibility labels if any
- tags.addAll(kvCreator.getVisibilityExpressionResolver()
- .createVisibilityExpTags(cellVisibility.getExpression()));
- }
- } catch (DeserializationException e) {
- // We just throw exception here. Should we allow other mutations to proceed by
- // just ignoring the bad one?
- throw new IOException("Invalid visibility expression found in mutation " + p, e);
- }
- for (List<Cell> cells: p.getFamilyCellMap().values()) {
- for (Cell cell: cells) {
- // Creating the KV which needs to be directly written to HFiles. Using the Facade
- // KVCreator for creation of kvs.
- KeyValue kv = null;
- TagUtil.carryForwardTags(tags, cell);
- if (!tags.isEmpty()) {
- kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
- cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
- cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
- cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
- cell.getValueOffset(), cell.getValueLength(), tags);
- } else {
- kv = KeyValueUtil.ensureKeyValue(cell);
- }
- if (map.add(kv)) {// don't count duplicated kv into size
- curSize += kv.heapSize();
- }
- }
- }
- }
- context.setStatus("Read " + map.size() + " entries of " + map.getClass()
- + "(" + StringUtils.humanReadableInt(curSize) + ")");
- int index = 0;
- for (KeyValue kv : map) {
- context.write(row, kv);
- if (++index % 100 == 0)
- context.setStatus("Wrote " + index);
- }
-
- // if we have more entries to process
- if (iter.hasNext()) {
- // force flush because we cannot guarantee intra-row sorted order
- context.write(null, null);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
deleted file mode 100644
index dff04b6..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/ResultSerialization.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-
-@InterfaceAudience.Public
-public class ResultSerialization extends Configured implements Serialization<Result> {
- private static final Log LOG = LogFactory.getLog(ResultSerialization.class);
- // The following configuration property indicates import file format version.
- public static final String IMPORT_FORMAT_VER = "hbase.import.version";
-
- @Override
- public boolean accept(Class<?> c) {
- return Result.class.isAssignableFrom(c);
- }
-
- @Override
- public Deserializer<Result> getDeserializer(Class<Result> c) {
- // check input format version
- Configuration conf = getConf();
- if (conf != null) {
- String inputVersion = conf.get(IMPORT_FORMAT_VER);
- if (inputVersion != null && inputVersion.equals("0.94")) {
- LOG.info("Load exported file using deserializer for HBase 0.94 format");
- return new Result94Deserializer();
- }
- }
-
- return new ResultDeserializer();
- }
-
- @Override
- public Serializer<Result> getSerializer(Class<Result> c) {
- return new ResultSerializer();
- }
-
- /**
- * The following deserializer class is used to load exported file of 0.94
- */
- private static class Result94Deserializer implements Deserializer<Result> {
- private DataInputStream in;
-
- @Override
- public void close() throws IOException {
- in.close();
- }
-
- @Override
- public Result deserialize(Result mutation) throws IOException {
- int totalBuffer = in.readInt();
- if (totalBuffer == 0) {
- return Result.EMPTY_RESULT;
- }
- byte[] buf = new byte[totalBuffer];
- readChunked(in, buf, 0, totalBuffer);
- List<Cell> kvs = new ArrayList<>();
- int offset = 0;
- while (offset < totalBuffer) {
- int keyLength = Bytes.toInt(buf, offset);
- offset += Bytes.SIZEOF_INT;
- kvs.add(new KeyValue(buf, offset, keyLength));
- offset += keyLength;
- }
- return Result.create(kvs);
- }
-
- @Override
- public void open(InputStream in) throws IOException {
- if (!(in instanceof DataInputStream)) {
- throw new IOException("Wrong input stream instance passed in");
- }
- this.in = (DataInputStream) in;
- }
-
- private void readChunked(final DataInput in, byte[] dest, int ofs, int len) throws IOException {
- int maxRead = 8192;
-
- for (; ofs < len; ofs += maxRead)
- in.readFully(dest, ofs, Math.min(len - ofs, maxRead));
- }
- }
-
- private static class ResultDeserializer implements Deserializer<Result> {
- private InputStream in;
-
- @Override
- public void close() throws IOException {
- in.close();
- }
-
- @Override
- public Result deserialize(Result mutation) throws IOException {
- ClientProtos.Result proto = ClientProtos.Result.parseDelimitedFrom(in);
- return ProtobufUtil.toResult(proto);
- }
-
- @Override
- public void open(InputStream in) throws IOException {
- this.in = in;
- }
- }
-
- private static class ResultSerializer implements Serializer<Result> {
- private OutputStream out;
-
- @Override
- public void close() throws IOException {
- out.close();
- }
-
- @Override
- public void open(OutputStream out) throws IOException {
- this.out = out;
- }
-
- @Override
- public void serialize(Result result) throws IOException {
- ProtobufUtil.toResult(result).writeDelimitedTo(out);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
deleted file mode 100644
index 2e0591e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.FilterBase;
-import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
-import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A job with a just a map phase to count rows. Map outputs table rows IF the
- * input row has columns that have content.
- */
-@InterfaceAudience.Public
-public class RowCounter extends Configured implements Tool {
-
- private static final Log LOG = LogFactory.getLog(RowCounter.class);
-
- /** Name of this 'program'. */
- static final String NAME = "rowcounter";
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
- private final static String EXPECTED_COUNT_KEY = RowCounter.class.getName() + ".expected_count";
-
- /**
- * Mapper that runs the count.
- */
- static class RowCounterMapper
- extends TableMapper<ImmutableBytesWritable, Result> {
-
- /** Counter enumeration to count the actual rows. */
- public static enum Counters {ROWS}
-
- /**
- * Maps the data.
- *
- * @param row The current table row key.
- * @param values The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- * @see org.apache.hadoop.mapreduce.Mapper#map(Object, Object, Context)
- */
- @Override
- public void map(ImmutableBytesWritable row, Result values,
- Context context)
- throws IOException {
- // Count every row containing data, whether it's in qualifiers or values
- context.getCounter(Counters.ROWS).increment(1);
- }
- }
-
- /**
- * Sets up the actual job.
- *
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public static Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException {
- String tableName = args[0];
- List<MultiRowRangeFilter.RowRange> rowRangeList = null;
- long startTime = 0;
- long endTime = 0;
-
- StringBuilder sb = new StringBuilder();
-
- final String rangeSwitch = "--range=";
- final String startTimeArgKey = "--starttime=";
- final String endTimeArgKey = "--endtime=";
- final String expectedCountArg = "--expected-count=";
-
- // First argument is table name, starting from second
- for (int i = 1; i < args.length; i++) {
- if (args[i].startsWith(rangeSwitch)) {
- try {
- rowRangeList = parseRowRangeParameter(args[i], rangeSwitch);
- } catch (IllegalArgumentException e) {
- return null;
- }
- continue;
- }
- if (args[i].startsWith(startTimeArgKey)) {
- startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
- continue;
- }
- if (args[i].startsWith(endTimeArgKey)) {
- endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
- continue;
- }
- if (args[i].startsWith(expectedCountArg)) {
- conf.setLong(EXPECTED_COUNT_KEY,
- Long.parseLong(args[i].substring(expectedCountArg.length())));
- continue;
- }
- // if no switch, assume column names
- sb.append(args[i]);
- sb.append(" ");
- }
- if (endTime < startTime) {
- printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
- return null;
- }
-
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJarByClass(RowCounter.class);
- Scan scan = new Scan();
- scan.setCacheBlocks(false);
- setScanFilter(scan, rowRangeList);
- if (sb.length() > 0) {
- for (String columnName : sb.toString().trim().split(" ")) {
- String family = StringUtils.substringBefore(columnName, ":");
- String qualifier = StringUtils.substringAfter(columnName, ":");
-
- if (StringUtils.isBlank(qualifier)) {
- scan.addFamily(Bytes.toBytes(family));
- }
- else {
- scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
- }
- }
- }
- scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
- job.setOutputFormatClass(NullOutputFormat.class);
- TableMapReduceUtil.initTableMapperJob(tableName, scan,
- RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
- job.setNumReduceTasks(0);
- return job;
- }
-
- private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(
- String arg, String rangeSwitch) {
- final String[] ranges = arg.substring(rangeSwitch.length()).split(";");
- final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
- for (String range : ranges) {
- String[] startEnd = range.split(",", 2);
- if (startEnd.length != 2 || startEnd[1].contains(",")) {
- printUsage("Please specify range in such format as \"--range=a,b\" " +
- "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
- throw new IllegalArgumentException("Wrong range specification: " + range);
- }
- String startKey = startEnd[0];
- String endKey = startEnd[1];
- rangeList.add(new MultiRowRangeFilter.RowRange(
- Bytes.toBytesBinary(startKey), true,
- Bytes.toBytesBinary(endKey), false));
- }
- return rangeList;
- }
-
- /**
- * Sets filter {@link FilterBase} to the {@link Scan} instance.
- * If provided rowRangeList contains more than one element,
- * method sets filter which is instance of {@link MultiRowRangeFilter}.
- * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}.
- * If rowRangeList contains exactly one element, startRow and stopRow are set to the scan.
- * @param scan
- * @param rowRangeList
- */
- private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) {
- final int size = rowRangeList == null ? 0 : rowRangeList.size();
- if (size <= 1) {
- scan.setFilter(new FirstKeyOnlyFilter());
- }
- if (size == 1) {
- MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
- scan.setStartRow(range.getStartRow()); //inclusive
- scan.setStopRow(range.getStopRow()); //exclusive
- } else if (size > 1) {
- scan.setFilter(new MultiRowRangeFilter(rowRangeList));
- }
- }
-
- /*
- * @param errorMessage Can attach a message when error occurs.
- */
- private static void printUsage(String errorMessage) {
- System.err.println("ERROR: " + errorMessage);
- printUsage();
- }
-
- /**
- * Prints usage without error message.
- * Note that we don't document --expected-count, because it's intended for test.
- */
- private static void printUsage() {
- System.err.println("Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
- System.err.println("For performance consider the following options:\n"
- + "-Dhbase.client.scanner.caching=100\n"
- + "-Dmapreduce.map.speculative=false");
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 1) {
- printUsage("Wrong number of parameters: " + args.length);
- return -1;
- }
- Job job = createSubmittableJob(getConf(), args);
- if (job == null) {
- return -1;
- }
- boolean success = job.waitForCompletion(true);
- final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
- if (success && expectedCount != -1) {
- final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
- success = expectedCount == counter.getValue();
- if (!success) {
- LOG.error("Failing job because count of '" + counter.getValue() +
- "' does not match expected count of '" + expectedCount + "'");
- }
- }
- return (success ? 0 : 1);
- }
-
- /**
- * Main entry point.
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
- System.exit(errCode);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
deleted file mode 100644
index 4ba1088..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * A partitioner that takes start and end keys and uses bigdecimal to figure
- * which reduce a key belongs to. Pass the start and end
- * keys in the Configuration using <code>hbase.simpletotalorder.start</code>
- * and <code>hbase.simpletotalorder.end</code>. The end key needs to be
- * exclusive; i.e. one larger than the biggest key in your key space.
- * You may be surprised at how this class partitions the space; it may not
- * align with preconceptions; e.g. a start key of zero and an end key of 100
- * divided in ten will not make regions whose range is 0-10, 10-20, and so on.
- * Make your own partitioner if you need the region spacing to come out a
- * particular way.
- * @param <VALUE>
- * @see #START
- * @see #END
- */
-@InterfaceAudience.Public
-public class SimpleTotalOrderPartitioner<VALUE> extends Partitioner<ImmutableBytesWritable, VALUE>
-implements Configurable {
- private final static Log LOG = LogFactory.getLog(SimpleTotalOrderPartitioner.class);
-
- @Deprecated
- public static final String START = "hbase.simpletotalorder.start";
- @Deprecated
- public static final String END = "hbase.simpletotalorder.end";
-
- static final String START_BASE64 = "hbase.simpletotalorder.start.base64";
- static final String END_BASE64 = "hbase.simpletotalorder.end.base64";
-
- private Configuration c;
- private byte [] startkey;
- private byte [] endkey;
- private byte [][] splits;
- private int lastReduces = -1;
-
- public static void setStartKey(Configuration conf, byte[] startKey) {
- conf.set(START_BASE64, Base64.encodeBytes(startKey));
- }
-
- public static void setEndKey(Configuration conf, byte[] endKey) {
- conf.set(END_BASE64, Base64.encodeBytes(endKey));
- }
-
- @SuppressWarnings("deprecation")
- static byte[] getStartKey(Configuration conf) {
- return getKeyFromConf(conf, START_BASE64, START);
- }
-
- @SuppressWarnings("deprecation")
- static byte[] getEndKey(Configuration conf) {
- return getKeyFromConf(conf, END_BASE64, END);
- }
-
- private static byte[] getKeyFromConf(Configuration conf,
- String base64Key, String deprecatedKey) {
- String encoded = conf.get(base64Key);
- if (encoded != null) {
- return Base64.decode(encoded);
- }
- String oldStyleVal = conf.get(deprecatedKey);
- if (oldStyleVal == null) {
- return null;
- }
- LOG.warn("Using deprecated configuration " + deprecatedKey +
- " - please use static accessor methods instead.");
- return Bytes.toBytesBinary(oldStyleVal);
- }
-
- @Override
- public int getPartition(final ImmutableBytesWritable key, final VALUE value,
- final int reduces) {
- if (reduces == 1) return 0;
- if (this.lastReduces != reduces) {
- this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
- for (int i = 0; i < splits.length; i++) {
- LOG.info(Bytes.toStringBinary(splits[i]));
- }
- this.lastReduces = reduces;
- }
- int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
- key.getLength());
- // Below code is from hfile index search.
- if (pos < 0) {
- pos++;
- pos *= -1;
- if (pos == 0) {
- // falls before the beginning of the file.
- throw new RuntimeException("Key outside start/stop range: " +
- key.toString());
- }
- pos--;
- }
- return pos;
- }
-
- @Override
- public Configuration getConf() {
- return this.c;
- }
-
- @Override
- public void setConf(Configuration conf) {
- this.c = conf;
- this.startkey = getStartKey(conf);
- this.endkey = getEndKey(conf);
- if (startkey == null || endkey == null) {
- throw new RuntimeException(this.getClass() + " not configured");
- }
- LOG.info("startkey=" + Bytes.toStringBinary(startkey) +
- ", endkey=" + Bytes.toStringBinary(endkey));
- // Reset last reduces count on change of Start / End key
- this.lastReduces = -1;
- }
-}
[04/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
deleted file mode 100644
index 694a359..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestMultithreadedTableMapper {
- private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
- private static final HBaseTestingUtility UTIL =
- new HBaseTestingUtility();
- static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
- static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
- static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
- static final int NUMBER_OF_THREADS = 10;
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- // Up the handlers; this test needs more than usual.
- UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
- UTIL.startMiniCluster();
- Table table =
- UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
- OUTPUT_FAMILY });
- UTIL.loadTable(table, INPUT_FAMILY, false);
- UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- /**
- * Pass the given key and processed record reduce
- */
- public static class ProcessContentsMapper
- extends TableMapper<ImmutableBytesWritable, Put> {
-
- /**
- * Pass the key, and reversed value to reduce
- *
- * @param key
- * @param value
- * @param context
- * @throws IOException
- */
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- Context context)
- throws IOException, InterruptedException {
- if (value.size() != 1) {
- throw new IOException("There should only be one input column");
- }
- Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
- cf = value.getMap();
- if(!cf.containsKey(INPUT_FAMILY)) {
- throw new IOException("Wrong input columns. Missing: '" +
- Bytes.toString(INPUT_FAMILY) + "'.");
- }
- // Get the original value and reverse it
- String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
- StringBuilder newValue = new StringBuilder(originalValue);
- newValue.reverse();
- // Now set the value to be collected
- Put outval = new Put(key.get());
- outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
- context.write(key, outval);
- }
- }
-
- /**
- * Test multithreadedTableMappper map/reduce against a multi-region table
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testMultithreadedTableMapper()
- throws IOException, InterruptedException, ClassNotFoundException {
- runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
- }
-
- private void runTestOnTable(Table table)
- throws IOException, InterruptedException, ClassNotFoundException {
- Job job = null;
- try {
- LOG.info("Before map/reduce startup");
- job = new Job(table.getConfiguration(), "process column contents");
- job.setNumReduceTasks(1);
- Scan scan = new Scan();
- scan.addFamily(INPUT_FAMILY);
- TableMapReduceUtil.initTableMapperJob(
- table.getName(), scan,
- MultithreadedTableMapper.class, ImmutableBytesWritable.class,
- Put.class, job);
- MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
- MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
- TableMapReduceUtil.initTableReducerJob(
- table.getName().getNameAsString(),
- IdentityTableReducer.class, job);
- FileOutputFormat.setOutputPath(job, new Path("test"));
- LOG.info("Started " + table.getName());
- assertTrue(job.waitForCompletion(true));
- LOG.info("After map/reduce completion");
- // verify map-reduce results
- verify(table.getName());
- } finally {
- table.close();
- if (job != null) {
- FileUtil.fullyDelete(
- new File(job.getConfiguration().get("hadoop.tmp.dir")));
- }
- }
- }
-
- private void verify(TableName tableName) throws IOException {
- Table table = UTIL.getConnection().getTable(tableName);
- boolean verified = false;
- long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
- int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- LOG.info("Verification attempt #" + i);
- verifyAttempt(table);
- verified = true;
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- LOG.debug("Verification attempt failed: " + e.getMessage());
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- assertTrue(verified);
- table.close();
- }
-
- /**
- * Looks at every value of the mapreduce output and verifies that indeed
- * the values have been reversed.
- *
- * @param table Table to scan.
- * @throws IOException
- * @throws NullPointerException if we failed to find a cell value
- */
- private void verifyAttempt(final Table table)
- throws IOException, NullPointerException {
- Scan scan = new Scan();
- scan.addFamily(INPUT_FAMILY);
- scan.addFamily(OUTPUT_FAMILY);
- ResultScanner scanner = table.getScanner(scan);
- try {
- Iterator<Result> itr = scanner.iterator();
- assertTrue(itr.hasNext());
- while(itr.hasNext()) {
- Result r = itr.next();
- if (LOG.isDebugEnabled()) {
- if (r.size() > 2 ) {
- throw new IOException("Too many results, expected 2 got " +
- r.size());
- }
- }
- byte[] firstValue = null;
- byte[] secondValue = null;
- int count = 0;
- for(Cell kv : r.listCells()) {
- if (count == 0) {
- firstValue = CellUtil.cloneValue(kv);
- }else if (count == 1) {
- secondValue = CellUtil.cloneValue(kv);
- }else if (count == 2) {
- break;
- }
- count++;
- }
- String first = "";
- if (firstValue == null) {
- throw new NullPointerException(Bytes.toString(r.getRow()) +
- ": first value is null");
- }
- first = Bytes.toString(firstValue);
- String second = "";
- if (secondValue == null) {
- throw new NullPointerException(Bytes.toString(r.getRow()) +
- ": second value is null");
- }
- byte[] secondReversed = new byte[secondValue.length];
- for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
- secondReversed[i] = secondValue[j];
- }
- second = Bytes.toString(secondReversed);
- if (first.compareTo(second) != 0) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("second key is not the reverse of first. row=" +
- Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
- ", second value=" + second);
- }
- fail();
- }
- }
- } finally {
- scanner.close();
- }
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
deleted file mode 100644
index 3b84e2d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ /dev/null
@@ -1,400 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Test the rowcounter map reduce job.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestRowCounter {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private static final Log LOG = LogFactory.getLog(TestRowCounter.class);
- private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- private final static String TABLE_NAME = "testRowCounter";
- private final static String TABLE_NAME_TS_RANGE = "testRowCounter_ts_range";
- private final static String COL_FAM = "col_fam";
- private final static String COL1 = "c1";
- private final static String COL2 = "c2";
- private final static String COMPOSITE_COLUMN = "C:A:A";
- private final static int TOTAL_ROWS = 10;
- private final static int ROWS_WITH_ONE_COL = 2;
-
- /**
- * @throws java.lang.Exception
- */
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- TEST_UTIL.startMiniCluster();
- Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes(COL_FAM));
- writeRows(table, TOTAL_ROWS, ROWS_WITH_ONE_COL);
- table.close();
- }
-
- /**
- * @throws java.lang.Exception
- */
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- /**
- * Test a case when no column was specified in command line arguments.
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterNoColumn() throws Exception {
- String[] args = new String[] {
- TABLE_NAME
- };
- runRowCount(args, 10);
- }
-
- /**
- * Test a case when the column specified in command line arguments is
- * exclusive for few rows.
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterExclusiveColumn() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COL1
- };
- runRowCount(args, 8);
- }
-
- /**
- * Test a case when the column specified in command line arguments is
- * one for which the qualifier contains colons.
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterColumnWithColonInQualifier() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
- };
- runRowCount(args, 8);
- }
-
- /**
- * Test a case when the column specified in command line arguments is not part
- * of first KV for a row.
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterHiddenColumn() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COL2
- };
- runRowCount(args, 10);
- }
-
-
- /**
- * Test a case when the column specified in command line arguments is
- * exclusive for few rows and also a row range filter is specified
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterColumnAndRowRange() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
- };
- runRowCount(args, 8);
- }
-
- /**
- * Test a case when a range is specified with single range of start-end keys
- * @throws Exception
- */
- @Test
- public void testRowCounterRowSingleRange() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3"
- };
- runRowCount(args, 2);
- }
-
- /**
- * Test a case when a range is specified with single range with end key only
- * @throws Exception
- */
- @Test
- public void testRowCounterRowSingleRangeUpperBound() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=,\\x00row3"
- };
- runRowCount(args, 3);
- }
-
- /**
- * Test a case when a range is specified with two ranges where one range is with end key only
- * @throws Exception
- */
- @Test
- public void testRowCounterRowMultiRangeUpperBound() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7"
- };
- runRowCount(args, 5);
- }
-
- /**
- * Test a case when a range is specified with multiple ranges of start-end keys
- * @throws Exception
- */
- @Test
- public void testRowCounterRowMultiRange() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
- };
- runRowCount(args, 5);
- }
-
- /**
- * Test a case when a range is specified with multiple ranges of start-end keys;
- * one range is filled, another two are not
- * @throws Exception
- */
- @Test
- public void testRowCounterRowMultiEmptyRange() throws Exception {
- String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
- };
- runRowCount(args, 2);
- }
-
- @Test
- public void testRowCounter10kRowRange() throws Exception {
- String tableName = TABLE_NAME + "10k";
-
- try (Table table = TEST_UTIL.createTable(
- TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
- writeRows(table, 10000, 0);
- }
- String[] args = new String[] {
- tableName, "--range=\\x00row9872,\\x00row9875"
- };
- runRowCount(args, 3);
- }
-
- /**
- * Test a case when the timerange is specified with --starttime and --endtime options
- *
- * @throws Exception
- */
- @Test
- public void testRowCounterTimeRange() throws Exception {
- final byte[] family = Bytes.toBytes(COL_FAM);
- final byte[] col1 = Bytes.toBytes(COL1);
- Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
- Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
- Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
-
- long ts;
-
- // clean up content of TABLE_NAME
- Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_TS_RANGE), Bytes.toBytes(COL_FAM));
-
- ts = System.currentTimeMillis();
- put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
- table.put(put1);
- Thread.sleep(100);
-
- ts = System.currentTimeMillis();
- put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
- put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
- table.put(put2);
- table.put(put3);
- table.close();
-
- String[] args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + 0,
- "--endtime=" + ts
- };
- runRowCount(args, 1);
-
- args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + 0,
- "--endtime=" + (ts - 10)
- };
- runRowCount(args, 1);
-
- args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + ts,
- "--endtime=" + (ts + 1000)
- };
- runRowCount(args, 2);
-
- args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + (ts - 30 * 1000),
- "--endtime=" + (ts + 30 * 1000),
- };
- runRowCount(args, 3);
- }
-
- /**
- * Run the RowCounter map reduce job and verify the row count.
- *
- * @param args the command line arguments to be used for rowcounter job.
- * @param expectedCount the expected row count (result of map reduce job).
- * @throws Exception
- */
- private void runRowCount(String[] args, int expectedCount) throws Exception {
- Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
- long start = System.currentTimeMillis();
- job.waitForCompletion(true);
- long duration = System.currentTimeMillis() - start;
- LOG.debug("row count duration (ms): " + duration);
- assertTrue(job.isSuccessful());
- Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
- assertEquals(expectedCount, counter.getValue());
- }
-
- /**
- * Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have
- * two columns, Few have one.
- *
- * @param table
- * @throws IOException
- */
- private static void writeRows(Table table, int totalRows, int rowsWithOneCol) throws IOException {
- final byte[] family = Bytes.toBytes(COL_FAM);
- final byte[] value = Bytes.toBytes("abcd");
- final byte[] col1 = Bytes.toBytes(COL1);
- final byte[] col2 = Bytes.toBytes(COL2);
- final byte[] col3 = Bytes.toBytes(COMPOSITE_COLUMN);
- ArrayList<Put> rowsUpdate = new ArrayList<>();
- // write few rows with two columns
- int i = 0;
- for (; i < totalRows - rowsWithOneCol; i++) {
- // Use binary rows values to test for HBASE-15287.
- byte[] row = Bytes.toBytesBinary("\\x00row" + i);
- Put put = new Put(row);
- put.addColumn(family, col1, value);
- put.addColumn(family, col2, value);
- put.addColumn(family, col3, value);
- rowsUpdate.add(put);
- }
-
- // write few rows with only one column
- for (; i < totalRows; i++) {
- byte[] row = Bytes.toBytes("row" + i);
- Put put = new Put(row);
- put.addColumn(family, col2, value);
- rowsUpdate.add(put);
- }
- table.put(rowsUpdate);
- }
-
- /**
- * test main method. Import should print help and call System.exit
- */
- @Test
- public void testImportMain() throws Exception {
- PrintStream oldPrintStream = System.err;
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- String[] args = {};
- System.setErr(new PrintStream(data));
- try {
- System.setErr(new PrintStream(data));
-
- try {
- RowCounter.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- assertTrue(data.toString().contains("Wrong number of parameters:"));
- assertTrue(data.toString().contains(
- "Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
- "[<column1> <column2>...]"));
- assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
- assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
- }
- data.reset();
- try {
- args = new String[2];
- args[0] = "table";
- args[1] = "--range=1";
- RowCounter.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- assertTrue(data.toString().contains(
- "Please specify range in such format as \"--range=a,b\" or, with only one boundary," +
- " \"--range=,b\" or \"--range=a,\""));
- assertTrue(data.toString().contains(
- "Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
- "[<column1> <column2>...]"));
- }
-
- } finally {
- System.setErr(oldPrintStream);
- System.setSecurityManager(SECURITY_MANAGER);
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
deleted file mode 100644
index 78fddbc..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFiles.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-/**
- * Reruns TestLoadIncrementalHFiles using LoadIncrementalHFiles in secure mode.
- * This suite is unable to verify the security handoff/turnover
- * as miniCluster is running as system user thus has root privileges
- * and delegation tokens don't seem to work on miniDFS.
- *
- * Thus SecureBulkload can only be completely verified by running
- * integration tests against a secure cluster. This suite is still
- * invaluable as it verifies the other mechanisms that need to be
- * supported as part of a LoadIncrementalFiles call.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestSecureLoadIncrementalHFiles extends TestLoadIncrementalHFiles{
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- // set the always on security provider
- UserProvider.setUserProviderForTesting(util.getConfiguration(),
- HadoopSecurityEnabledUserProviderForTesting.class);
- // setup configuration
- SecureTestUtil.enableSecurity(util.getConfiguration());
- util.getConfiguration().setInt(
- LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
- MAX_FILES_PER_REGION_PER_FAMILY);
- // change default behavior so that tag values are returned with normal rpcs
- util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
- KeyValueCodecWithTags.class.getCanonicalName());
-
- util.startMiniCluster();
-
- // Wait for the ACL table to become available
- util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
-
- setupNamespace();
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
deleted file mode 100644
index 0e877ad..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSecureLoadIncrementalHFilesSplitRecovery.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-
-/**
- * Reruns TestSecureLoadIncrementalHFilesSplitRecovery
- * using LoadIncrementalHFiles in secure mode.
- * This suite is unable to verify the security handoff/turnove
- * as miniCluster is running as system user thus has root privileges
- * and delegation tokens don't seem to work on miniDFS.
- *
- * Thus SecureBulkload can only be completely verified by running
- * integration tests against a secure cluster. This suite is still
- * invaluable as it verifies the other mechanisms that need to be
- * supported as part of a LoadIncrementalFiles call.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestSecureLoadIncrementalHFilesSplitRecovery extends TestLoadIncrementalHFilesSplitRecovery {
-
- //This "overrides" the parent static method
- //make sure they are in sync
- @BeforeClass
- public static void setupCluster() throws Exception {
- util = new HBaseTestingUtility();
- // set the always on security provider
- UserProvider.setUserProviderForTesting(util.getConfiguration(),
- HadoopSecurityEnabledUserProviderForTesting.class);
- // setup configuration
- SecureTestUtil.enableSecurity(util.getConfiguration());
-
- util.startMiniCluster();
-
- // Wait for the ACL table to become available
- util.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
- }
-
- //Disabling this test as it does not work in secure mode
- @Test (timeout=180000)
- @Override
- public void testBulkLoadPhaseFailure() {
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
deleted file mode 100644
index 0f41f33..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSimpleTotalOrderPartitioner.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.experimental.categories.Category;
-
-import org.junit.Test;
-
-/**
- * Test of simple partitioner.
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestSimpleTotalOrderPartitioner {
- protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- Configuration conf = TEST_UTIL.getConfiguration();
-
- @Test
- public void testSplit() throws Exception {
- String start = "a";
- String end = "{";
- SimpleTotalOrderPartitioner<byte []> p = new SimpleTotalOrderPartitioner<>();
-
- this.conf.set(SimpleTotalOrderPartitioner.START, start);
- this.conf.set(SimpleTotalOrderPartitioner.END, end);
- p.setConf(this.conf);
- ImmutableBytesWritable c = new ImmutableBytesWritable(Bytes.toBytes("c"));
- // If one reduce, partition should be 0.
- int partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 1);
- assertEquals(0, partition);
- // If two reduces, partition should be 0.
- partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 2);
- assertEquals(0, partition);
- // Divide in 3.
- partition = p.getPartition(c, HConstants.EMPTY_BYTE_ARRAY, 3);
- assertEquals(0, partition);
- ImmutableBytesWritable q = new ImmutableBytesWritable(Bytes.toBytes("q"));
- partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 2);
- assertEquals(1, partition);
- partition = p.getPartition(q, HConstants.EMPTY_BYTE_ARRAY, 3);
- assertEquals(2, partition);
- // What about end and start keys.
- ImmutableBytesWritable startBytes =
- new ImmutableBytesWritable(Bytes.toBytes(start));
- partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
- assertEquals(0, partition);
- partition = p.getPartition(startBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
- assertEquals(0, partition);
- ImmutableBytesWritable endBytes =
- new ImmutableBytesWritable(Bytes.toBytes("z"));
- partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 2);
- assertEquals(1, partition);
- partition = p.getPartition(endBytes, HConstants.EMPTY_BYTE_ARRAY, 3);
- assertEquals(2, partition);
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
deleted file mode 100644
index 79b2cf0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestSyncTable.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counters;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-
-/**
- * Basic test for the SyncTable M/R tool
- */
-@Category(LargeTests.class)
-public class TestSyncTable {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
-
- private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- TEST_UTIL.startMiniCluster(3);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- private static byte[][] generateSplits(int numRows, int numRegions) {
- byte[][] splitRows = new byte[numRegions-1][];
- for (int i = 1; i < numRegions; i++) {
- splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
- }
- return splitRows;
- }
-
- @Test
- public void testSyncTable() throws Exception {
- final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source");
- final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target");
- Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
-
- writeTestData(sourceTableName, targetTableName);
- hashSourceTable(sourceTableName, testDir);
- Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
- assertEqualTables(90, sourceTableName, targetTableName);
-
- assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
- assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
- assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
- assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
- assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
- assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
-
- TEST_UTIL.deleteTable(sourceTableName);
- TEST_UTIL.deleteTable(targetTableName);
- TEST_UTIL.cleanupDataTestDirOnTestFS();
- }
-
- private void assertEqualTables(int expectedRows, TableName sourceTableName,
- TableName targetTableName) throws Exception {
- Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName);
- Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName);
-
- ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
- ResultScanner targetScanner = targetTable.getScanner(new Scan());
-
- for (int i = 0; i < expectedRows; i++) {
- Result sourceRow = sourceScanner.next();
- Result targetRow = targetScanner.next();
-
- LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
- + " cells:" + sourceRow);
- LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
- + " cells:" + targetRow);
-
- if (sourceRow == null) {
- Assert.fail("Expected " + expectedRows
- + " source rows but only found " + i);
- }
- if (targetRow == null) {
- Assert.fail("Expected " + expectedRows
- + " target rows but only found " + i);
- }
- Cell[] sourceCells = sourceRow.rawCells();
- Cell[] targetCells = targetRow.rawCells();
- if (sourceCells.length != targetCells.length) {
- LOG.debug("Source cells: " + Arrays.toString(sourceCells));
- LOG.debug("Target cells: " + Arrays.toString(targetCells));
- Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
- + " has " + sourceCells.length
- + " cells in source table but " + targetCells.length
- + " cells in target table");
- }
- for (int j = 0; j < sourceCells.length; j++) {
- Cell sourceCell = sourceCells[j];
- Cell targetCell = targetCells[j];
- try {
- if (!CellUtil.matchingRow(sourceCell, targetCell)) {
- Assert.fail("Rows don't match");
- }
- if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
- Assert.fail("Families don't match");
- }
- if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
- Assert.fail("Qualifiers don't match");
- }
- if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
- Assert.fail("Timestamps don't match");
- }
- if (!CellUtil.matchingValue(sourceCell, targetCell)) {
- Assert.fail("Values don't match");
- }
- } catch (Throwable t) {
- LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
- Throwables.propagate(t);
- }
- }
- }
- Result sourceRow = sourceScanner.next();
- if (sourceRow != null) {
- Assert.fail("Source table has more than " + expectedRows
- + " rows. Next row: " + Bytes.toInt(sourceRow.getRow()));
- }
- Result targetRow = targetScanner.next();
- if (targetRow != null) {
- Assert.fail("Target table has more than " + expectedRows
- + " rows. Next row: " + Bytes.toInt(targetRow.getRow()));
- }
- sourceScanner.close();
- targetScanner.close();
- sourceTable.close();
- targetTable.close();
- }
-
- private Counters syncTables(TableName sourceTableName, TableName targetTableName,
- Path testDir) throws Exception {
- SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
- int code = syncTable.run(new String[] {
- testDir.toString(),
- sourceTableName.getNameAsString(),
- targetTableName.getNameAsString()
- });
- assertEquals("sync table job failed", 0, code);
-
- LOG.info("Sync tables completed");
- return syncTable.counters;
- }
-
- private void hashSourceTable(TableName sourceTableName, Path testDir)
- throws Exception, IOException {
- int numHashFiles = 3;
- long batchSize = 100; // should be 2 batches per region
- int scanBatch = 1;
- HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
- int code = hashTable.run(new String[] {
- "--batchsize=" + batchSize,
- "--numhashfiles=" + numHashFiles,
- "--scanbatch=" + scanBatch,
- sourceTableName.getNameAsString(),
- testDir.toString()});
- assertEquals("hash table job failed", 0, code);
-
- FileSystem fs = TEST_UTIL.getTestFileSystem();
-
- HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
- assertEquals(sourceTableName.getNameAsString(), tableHash.tableName);
- assertEquals(batchSize, tableHash.batchSize);
- assertEquals(numHashFiles, tableHash.numHashFiles);
- assertEquals(numHashFiles - 1, tableHash.partitions.size());
-
- LOG.info("Hash table completed");
- }
-
- private void writeTestData(TableName sourceTableName, TableName targetTableName)
- throws Exception {
- final byte[] family = Bytes.toBytes("family");
- final byte[] column1 = Bytes.toBytes("c1");
- final byte[] column2 = Bytes.toBytes("c2");
- final byte[] value1 = Bytes.toBytes("val1");
- final byte[] value2 = Bytes.toBytes("val2");
- final byte[] value3 = Bytes.toBytes("val3");
-
- int numRows = 100;
- int sourceRegions = 10;
- int targetRegions = 6;
-
- Table sourceTable = TEST_UTIL.createTable(sourceTableName,
- family, generateSplits(numRows, sourceRegions));
-
- Table targetTable = TEST_UTIL.createTable(targetTableName,
- family, generateSplits(numRows, targetRegions));
-
- long timestamp = 1430764183454L;
-
- int rowIndex = 0;
- // a bunch of identical rows
- for (; rowIndex < 40; rowIndex++) {
- Put sourcePut = new Put(Bytes.toBytes(rowIndex));
- sourcePut.addColumn(family, column1, timestamp, value1);
- sourcePut.addColumn(family, column2, timestamp, value2);
- sourceTable.put(sourcePut);
-
- Put targetPut = new Put(Bytes.toBytes(rowIndex));
- targetPut.addColumn(family, column1, timestamp, value1);
- targetPut.addColumn(family, column2, timestamp, value2);
- targetTable.put(targetPut);
- }
- // some rows only in the source table
- // ROWSWITHDIFFS: 10
- // TARGETMISSINGROWS: 10
- // TARGETMISSINGCELLS: 20
- for (; rowIndex < 50; rowIndex++) {
- Put put = new Put(Bytes.toBytes(rowIndex));
- put.addColumn(family, column1, timestamp, value1);
- put.addColumn(family, column2, timestamp, value2);
- sourceTable.put(put);
- }
- // some rows only in the target table
- // ROWSWITHDIFFS: 10
- // SOURCEMISSINGROWS: 10
- // SOURCEMISSINGCELLS: 20
- for (; rowIndex < 60; rowIndex++) {
- Put put = new Put(Bytes.toBytes(rowIndex));
- put.addColumn(family, column1, timestamp, value1);
- put.addColumn(family, column2, timestamp, value2);
- targetTable.put(put);
- }
- // some rows with 1 missing cell in target table
- // ROWSWITHDIFFS: 10
- // TARGETMISSINGCELLS: 10
- for (; rowIndex < 70; rowIndex++) {
- Put sourcePut = new Put(Bytes.toBytes(rowIndex));
- sourcePut.addColumn(family, column1, timestamp, value1);
- sourcePut.addColumn(family, column2, timestamp, value2);
- sourceTable.put(sourcePut);
-
- Put targetPut = new Put(Bytes.toBytes(rowIndex));
- targetPut.addColumn(family, column1, timestamp, value1);
- targetTable.put(targetPut);
- }
- // some rows with 1 missing cell in source table
- // ROWSWITHDIFFS: 10
- // SOURCEMISSINGCELLS: 10
- for (; rowIndex < 80; rowIndex++) {
- Put sourcePut = new Put(Bytes.toBytes(rowIndex));
- sourcePut.addColumn(family, column1, timestamp, value1);
- sourceTable.put(sourcePut);
-
- Put targetPut = new Put(Bytes.toBytes(rowIndex));
- targetPut.addColumn(family, column1, timestamp, value1);
- targetPut.addColumn(family, column2, timestamp, value2);
- targetTable.put(targetPut);
- }
- // some rows differing only in timestamp
- // ROWSWITHDIFFS: 10
- // SOURCEMISSINGCELLS: 20
- // TARGETMISSINGCELLS: 20
- for (; rowIndex < 90; rowIndex++) {
- Put sourcePut = new Put(Bytes.toBytes(rowIndex));
- sourcePut.addColumn(family, column1, timestamp, column1);
- sourcePut.addColumn(family, column2, timestamp, value2);
- sourceTable.put(sourcePut);
-
- Put targetPut = new Put(Bytes.toBytes(rowIndex));
- targetPut.addColumn(family, column1, timestamp+1, column1);
- targetPut.addColumn(family, column2, timestamp-1, value2);
- targetTable.put(targetPut);
- }
- // some rows with different values
- // ROWSWITHDIFFS: 10
- // DIFFERENTCELLVALUES: 20
- for (; rowIndex < numRows; rowIndex++) {
- Put sourcePut = new Put(Bytes.toBytes(rowIndex));
- sourcePut.addColumn(family, column1, timestamp, value1);
- sourcePut.addColumn(family, column2, timestamp, value2);
- sourceTable.put(sourcePut);
-
- Put targetPut = new Put(Bytes.toBytes(rowIndex));
- targetPut.addColumn(family, column1, timestamp, value3);
- targetPut.addColumn(family, column2, timestamp, value3);
- targetTable.put(targetPut);
- }
-
- sourceTable.close();
- targetTable.close();
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
deleted file mode 100644
index 4693519..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
+++ /dev/null
@@ -1,481 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.anyObject;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.spy;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * This tests the TableInputFormat and its recovery semantics
- *
- */
-@Category(LargeTests.class)
-public class TestTableInputFormat {
-
- private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
-
- private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
- private static MiniMRCluster mrCluster;
- static final byte[] FAMILY = Bytes.toBytes("family");
-
- private static final byte[][] columns = new byte[][] { FAMILY };
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void before() throws IOException {
- LOG.info("before");
- UTIL.ensureSomeRegionServersAvailable(1);
- LOG.info("before done");
- }
-
- /**
- * Setup a table with two rows and values.
- *
- * @param tableName
- * @return
- * @throws IOException
- */
- public static Table createTable(byte[] tableName) throws IOException {
- return createTable(tableName, new byte[][] { FAMILY });
- }
-
- /**
- * Setup a table with two rows and values per column family.
- *
- * @param tableName
- * @return
- * @throws IOException
- */
- public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
- Table table = UTIL.createTable(TableName.valueOf(tableName), families);
- Put p = new Put("aaa".getBytes());
- for (byte[] family : families) {
- p.addColumn(family, null, "value aaa".getBytes());
- }
- table.put(p);
- p = new Put("bbb".getBytes());
- for (byte[] family : families) {
- p.addColumn(family, null, "value bbb".getBytes());
- }
- table.put(p);
- return table;
- }
-
- /**
- * Verify that the result and key have expected values.
- *
- * @param r
- * @param key
- * @param expectedKey
- * @param expectedValue
- * @return
- */
- static boolean checkResult(Result r, ImmutableBytesWritable key,
- byte[] expectedKey, byte[] expectedValue) {
- assertEquals(0, key.compareTo(expectedKey));
- Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
- byte[] value = vals.values().iterator().next();
- assertTrue(Arrays.equals(value, expectedValue));
- return true; // if succeed
- }
-
- /**
- * Create table data and run tests on specified htable using the
- * o.a.h.hbase.mapreduce API.
- *
- * @param table
- * @throws IOException
- * @throws InterruptedException
- */
- static void runTestMapreduce(Table table) throws IOException,
- InterruptedException {
- org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
- new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
- Scan s = new Scan();
- s.setStartRow("aaa".getBytes());
- s.setStopRow("zzz".getBytes());
- s.addFamily(FAMILY);
- trr.setScan(s);
- trr.setHTable(table);
-
- trr.initialize(null, null);
- Result r = new Result();
- ImmutableBytesWritable key = new ImmutableBytesWritable();
-
- boolean more = trr.nextKeyValue();
- assertTrue(more);
- key = trr.getCurrentKey();
- r = trr.getCurrentValue();
- checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
-
- more = trr.nextKeyValue();
- assertTrue(more);
- key = trr.getCurrentKey();
- r = trr.getCurrentValue();
- checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
-
- // no more data
- more = trr.nextKeyValue();
- assertFalse(more);
- }
-
- /**
- * Create a table that IOE's on first scanner next call
- *
- * @throws IOException
- */
- static Table createIOEScannerTable(byte[] name, final int failCnt)
- throws IOException {
- // build up a mock scanner stuff to fail the first time
- Answer<ResultScanner> a = new Answer<ResultScanner>() {
- int cnt = 0;
-
- @Override
- public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
- // first invocation return the busted mock scanner
- if (cnt++ < failCnt) {
- // create mock ResultScanner that always fails.
- Scan scan = mock(Scan.class);
- doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
- ResultScanner scanner = mock(ResultScanner.class);
- // simulate TimeoutException / IOException
- doThrow(new IOException("Injected exception")).when(scanner).next();
- return scanner;
- }
-
- // otherwise return the real scanner.
- return (ResultScanner) invocation.callRealMethod();
- }
- };
-
- Table htable = spy(createTable(name));
- doAnswer(a).when(htable).getScanner((Scan) anyObject());
- return htable;
- }
-
- /**
- * Create a table that throws a NotServingRegionException on first scanner
- * next call
- *
- * @throws IOException
- */
- static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
- throws IOException {
- // build up a mock scanner stuff to fail the first time
- Answer<ResultScanner> a = new Answer<ResultScanner>() {
- int cnt = 0;
-
- @Override
- public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
- // first invocation return the busted mock scanner
- if (cnt++ < failCnt) {
- // create mock ResultScanner that always fails.
- Scan scan = mock(Scan.class);
- doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
- ResultScanner scanner = mock(ResultScanner.class);
-
- invocation.callRealMethod(); // simulate NotServingRegionException
- doThrow(
- new NotServingRegionException("Injected simulated TimeoutException"))
- .when(scanner).next();
- return scanner;
- }
-
- // otherwise return the real scanner.
- return (ResultScanner) invocation.callRealMethod();
- }
- };
-
- Table htable = spy(createTable(name));
- doAnswer(a).when(htable).getScanner((Scan) anyObject());
- return htable;
- }
-
- /**
- * Run test assuming no errors using newer mapreduce api
- *
- * @throws IOException
- * @throws InterruptedException
- */
- @Test
- public void testTableRecordReaderMapreduce() throws IOException,
- InterruptedException {
- Table table = createTable("table1-mr".getBytes());
- runTestMapreduce(table);
- }
-
- /**
- * Run test assuming Scanner IOException failure using newer mapreduce api
- *
- * @throws IOException
- * @throws InterruptedException
- */
- @Test
- public void testTableRecordReaderScannerFailMapreduce() throws IOException,
- InterruptedException {
- Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
- runTestMapreduce(htable);
- }
-
- /**
- * Run test assuming Scanner IOException failure using newer mapreduce api
- *
- * @throws IOException
- * @throws InterruptedException
- */
- @Test(expected = IOException.class)
- public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
- InterruptedException {
- Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
- runTestMapreduce(htable);
- }
-
- /**
- * Run test assuming NotServingRegionException using newer mapreduce api
- *
- * @throws InterruptedException
- * @throws org.apache.hadoop.hbase.DoNotRetryIOException
- */
- @Test
- public void testTableRecordReaderScannerTimeoutMapreduce()
- throws IOException, InterruptedException {
- Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
- runTestMapreduce(htable);
- }
-
- /**
- * Run test assuming NotServingRegionException using newer mapreduce api
- *
- * @throws InterruptedException
- * @throws org.apache.hadoop.hbase.NotServingRegionException
- */
- @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
- public void testTableRecordReaderScannerTimeoutMapreduceTwice()
- throws IOException, InterruptedException {
- Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
- runTestMapreduce(htable);
- }
-
- /**
- * Verify the example we present in javadocs on TableInputFormatBase
- */
- @Test
- public void testExtensionOfTableInputFormatBase()
- throws IOException, InterruptedException, ClassNotFoundException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase");
- final Table htable = createTable(Bytes.toBytes("exampleTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleTIF.class);
- }
-
- @Test
- public void testJobConfigurableExtensionOfTableInputFormatBase()
- throws IOException, InterruptedException, ClassNotFoundException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
- "using JobConfigurable.");
- final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleJobConfigurableTIF.class);
- }
-
- @Test
- public void testDeprecatedExtensionOfTableInputFormatBase()
- throws IOException, InterruptedException, ClassNotFoundException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
- "using the approach documented in 0.98.");
- final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleDeprecatedTIF.class);
- }
-
- void testInputFormat(Class<? extends InputFormat> clazz)
- throws IOException, InterruptedException, ClassNotFoundException {
- final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
- job.setInputFormatClass(clazz);
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setMapperClass(ExampleVerifier.class);
- job.setNumReduceTasks(0);
-
- LOG.debug("submitting job.");
- assertTrue("job failed!", job.waitForCompletion(true));
- assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
- assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
- assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
- assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
- assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
- assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
- }
-
- public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
-
- @Override
- public void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException {
- for (Cell cell : value.listCells()) {
- context.getCounter(TestTableInputFormat.class.getName() + ":row",
- Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
- .increment(1l);
- context.getCounter(TestTableInputFormat.class.getName() + ":family",
- Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
- .increment(1l);
- context.getCounter(TestTableInputFormat.class.getName() + ":value",
- Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
- .increment(1l);
- }
- }
-
- }
-
- public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
-
- @Override
- public void configure(JobConf job) {
- try {
- Connection connection = ConnectionFactory.createConnection(job);
- Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
- // mandatory
- initializeTable(connection, exampleTable.getName());
- byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- Bytes.toBytes("columnB") };
- // optional
- Scan scan = new Scan();
- for (byte[] family : inputColumns) {
- scan.addFamily(family);
- }
- Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- scan.setFilter(exampleFilter);
- setScan(scan);
- } catch (IOException exception) {
- throw new RuntimeException("Failed to configure for job.", exception);
- }
- }
-
- }
-
-
- public static class ExampleJobConfigurableTIF extends TableInputFormatBase
- implements JobConfigurable {
-
- @Override
- public void configure(JobConf job) {
- try {
- Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
- TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
- // mandatory
- initializeTable(connection, tableName);
- byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- Bytes.toBytes("columnB") };
- //optional
- Scan scan = new Scan();
- for (byte[] family : inputColumns) {
- scan.addFamily(family);
- }
- Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- scan.setFilter(exampleFilter);
- setScan(scan);
- } catch (IOException exception) {
- throw new RuntimeException("Failed to initialize.", exception);
- }
- }
- }
-
-
- public static class ExampleTIF extends TableInputFormatBase {
-
- @Override
- protected void initialize(JobContext job) throws IOException {
- Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
- job.getConfiguration()));
- TableName tableName = TableName.valueOf("exampleTable");
- // mandatory
- initializeTable(connection, tableName);
- byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- Bytes.toBytes("columnB") };
- //optional
- Scan scan = new Scan();
- for (byte[] family : inputColumns) {
- scan.addFamily(family);
- }
- Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- scan.setFilter(exampleFilter);
- setScan(scan);
- }
-
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
deleted file mode 100644
index 699e773..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.*;
-
-import java.net.Inet6Address;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-@Category({SmallTests.class})
-public class TestTableInputFormatBase {
- @Test
- public void testTableInputFormatBaseReverseDNSForIPv6()
- throws UnknownHostException {
- String address = "ipv6.google.com";
- String localhost = null;
- InetAddress addr = null;
- TableInputFormat inputFormat = new TableInputFormat();
- try {
- localhost = InetAddress.getByName(address).getCanonicalHostName();
- addr = Inet6Address.getByName(address);
- } catch (UnknownHostException e) {
- // google.com is down, we can probably forgive this test.
- return;
- }
- System.out.println("Should retrun the hostname for this host " +
- localhost + " addr : " + addr);
- String actualHostName = inputFormat.reverseDNS(addr);
- assertEquals("Should retrun the hostname for this host. Expected : " +
- localhost + " Actual : " + actualHostName, localhost, actualHostName);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
deleted file mode 100644
index 99b40b9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan1.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * TestTableInputFormatScan part 1.
- * @see TestTableInputFormatScanBase
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableInputFormatScan1 extends TestTableInputFormatScanBase {
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanEmptyToEmpty()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan(null, null, null);
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanEmptyToAPP()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan(null, "app", "apo");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanEmptyToBBA()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan(null, "bba", "baz");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanEmptyToBBB()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan(null, "bbb", "bba");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanEmptyToOPP()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan(null, "opp", "opo");
- }
-
- /**
- * Tests a MR scan using specific number of mappers. The test table has 25 regions,
- * and all region sizes are set as 0 as default. The average region size is 1 (the smallest
- * positive). When we set hbase.mapreduce.input.ratio as -1, all regions will be cut into two
- * MapRedcue input splits, the number of MR input splits should be 50; when we set hbase
- * .mapreduce.input.ratio as 100, the sum of all region sizes is less then the average region
- * size, all regions will be combined into 1 MapRedcue input split.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testGetSplits() throws IOException, InterruptedException, ClassNotFoundException {
- testNumOfSplits("-1", 52);
- testNumOfSplits("100", 1);
- }
-
- /**
- * Tests the getSplitKey() method in TableInputFormatBase.java
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testGetSplitsPoint() throws IOException, InterruptedException,
- ClassNotFoundException {
- byte[] start1 = { 'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f' };
- byte[] end1 = { 'a', 'a', 'a', 'f', 'f' };
- byte[] splitPoint1 = { 'a', 'a', 'a', 'd', 'd', -78, 50, -77 };
- testGetSplitKey(start1, end1, splitPoint1, true);
-
- byte[] start2 = { '1', '1', '1', '0', '0', '0' };
- byte[] end2 = { '1', '1', '2', '5', '7', '9', '0' };
- byte[] splitPoint2 = { '1', '1', '1', -78, -77, -76, -104 };
- testGetSplitKey(start2, end2, splitPoint2, true);
-
- byte[] start3 = { 'a', 'a', 'a', 'a', 'a', 'a' };
- byte[] end3 = { 'a', 'a', 'b' };
- byte[] splitPoint3 = { 'a', 'a', 'a', -80, -80, -80 };
- testGetSplitKey(start3, end3, splitPoint3, true);
-
- byte[] start4 = { 'a', 'a', 'a' };
- byte[] end4 = { 'a', 'a', 'a', 'z' };
- byte[] splitPoint4 = { 'a', 'a', 'a', '=' };
- testGetSplitKey(start4, end4, splitPoint4, true);
-
- byte[] start5 = { 'a', 'a', 'a' };
- byte[] end5 = { 'a', 'a', 'b', 'a' };
- byte[] splitPoint5 = { 'a', 'a', 'a', -80 };
- testGetSplitKey(start5, end5, splitPoint5, true);
-
- // Test Case 6: empty key and "hhhqqqwww", split point is "h"
- byte[] start6 = {};
- byte[] end6 = { 'h', 'h', 'h', 'q', 'q', 'q', 'w', 'w' };
- byte[] splitPointText6 = { 'h' };
- byte[] splitPointBinary6 = { 104 };
- testGetSplitKey(start6, end6, splitPointText6, true);
- testGetSplitKey(start6, end6, splitPointBinary6, false);
-
- // Test Case 7: "ffffaaa" and empty key, split point depends on the mode we choose(text key or
- // binary key).
- byte[] start7 = { 'f', 'f', 'f', 'f', 'a', 'a', 'a' };
- byte[] end7 = {};
- byte[] splitPointText7 = { 'f', '~', '~', '~', '~', '~', '~' };
- byte[] splitPointBinary7 = { 'f', -1, -1, -1, -1, -1, -1 };
- testGetSplitKey(start7, end7, splitPointText7, true);
- testGetSplitKey(start7, end7, splitPointBinary7, false);
-
- // Test Case 8: both start key and end key are empty. Split point depends on the mode we
- // choose (text key or binary key).
- byte[] start8 = {};
- byte[] end8 = {};
- byte[] splitPointText8 = { 'O' };
- byte[] splitPointBinary8 = { 0 };
- testGetSplitKey(start8, end8, splitPointText8, true);
- testGetSplitKey(start8, end8, splitPointBinary8, false);
-
- // Test Case 9: Binary Key example
- byte[] start9 = { 13, -19, 126, 127 };
- byte[] end9 = { 13, -19, 127, 0 };
- byte[] splitPoint9 = { 13, -19, 126, -65 };
- testGetSplitKey(start9, end9, splitPoint9, false);
-
- // Test Case 10: Binary key split when the start key is an unsigned byte and the end byte is a
- // signed byte
- byte[] start10 = { 'x' };
- byte[] end10 = { -128 };
- byte[] splitPoint10 = { '|' };
- testGetSplitKey(start10, end10, splitPoint10, false);
-
- // Test Case 11: Binary key split when the start key is an signed byte and the end byte is a
- // signed byte
- byte[] start11 = { -100 };
- byte[] end11 = { -90 };
- byte[] splitPoint11 = { -95 };
- testGetSplitKey(start11, end11, splitPoint11, false);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
deleted file mode 100644
index 02f893f..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan2.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * TestTableInputFormatScan part 2.
- * @see TestTableInputFormatScanBase
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableInputFormatScan2 extends TestTableInputFormatScanBase {
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanOBBToOPP()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("obb", "opp", "opo");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanOBBToQPP()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("obb", "qpp", "qpo");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanOPPToEmpty()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("opp", null, "zzz");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanYYXToEmpty()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("yyx", null, "zzz");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanYYYToEmpty()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("yyy", null, "zzz");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- @Test
- public void testScanYZYToEmpty()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScan("yzy", null, "zzz");
- }
-
- @Test
- public void testScanFromConfiguration()
- throws IOException, InterruptedException, ClassNotFoundException {
- testScanFromConfiguration("bba", "bbd", "bbc");
- }
-}
[21/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
new file mode 100644
index 0000000..6b5cbe2
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
@@ -0,0 +1,915 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.lang.reflect.Constructor;
+import java.security.SecureRandom;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.crypto.spec.SecretKeySpec;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.crypto.Cipher;
+import org.apache.hadoop.hbase.io.crypto.Encryption;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.security.EncryptionUtil;
+import org.apache.hadoop.hbase.security.HBaseKerberosUtils;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.access.AccessControlClient;
+import org.apache.hadoop.hbase.security.access.Permission;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGeneratorWithACL;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A command-line utility that reads, writes, and verifies data. Unlike
+ * {@link org.apache.hadoop.hbase.PerformanceEvaluation}, this tool validates the data written,
+ * and supports simultaneously writing and reading the same set of keys.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class LoadTestTool extends AbstractHBaseTool {
+
+ private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
+ private static final String COLON = ":";
+
+ /** Table name for the test */
+ private TableName tableName;
+
+ /** Column families for the test */
+ private byte[][] families;
+
+ /** Table name to use of not overridden on the command line */
+ protected static final String DEFAULT_TABLE_NAME = "cluster_test";
+
+ /** The default data size if not specified */
+ protected static final int DEFAULT_DATA_SIZE = 64;
+
+ /** The number of reader/writer threads if not specified */
+ protected static final int DEFAULT_NUM_THREADS = 20;
+
+ /** Usage string for the load option */
+ protected static final String OPT_USAGE_LOAD =
+ "<avg_cols_per_key>:<avg_data_size>" +
+ "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
+
+ /** Usage string for the read option */
+ protected static final String OPT_USAGE_READ =
+ "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
+
+ /** Usage string for the update option */
+ protected static final String OPT_USAGE_UPDATE =
+ "<update_percent>[:<#threads=" + DEFAULT_NUM_THREADS
+ + ">][:<#whether to ignore nonce collisions=0>]";
+
+ protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
+ Arrays.toString(BloomType.values());
+
+ protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
+ "one of " + Arrays.toString(Compression.Algorithm.values());
+
+ public static final String OPT_BLOOM = "bloom";
+ public static final String OPT_COMPRESSION = "compression";
+ public static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
+ public static final String OPT_DEFERRED_LOG_FLUSH_USAGE = "Enable deferred log flush.";
+
+ public static final String OPT_INMEMORY = "in_memory";
+ public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
+ "inmemory as far as possible. Not guaranteed that reads are always served from inmemory";
+
+ public static final String OPT_GENERATOR = "generator";
+ public static final String OPT_GENERATOR_USAGE = "The class which generates load for the tool."
+ + " Any args for this class can be passed as colon separated after class name";
+
+ public static final String OPT_WRITER = "writer";
+ public static final String OPT_WRITER_USAGE = "The class for executing the write requests";
+
+ public static final String OPT_UPDATER = "updater";
+ public static final String OPT_UPDATER_USAGE = "The class for executing the update requests";
+
+ public static final String OPT_READER = "reader";
+ public static final String OPT_READER_USAGE = "The class for executing the read requests";
+
+ protected static final String OPT_KEY_WINDOW = "key_window";
+ protected static final String OPT_WRITE = "write";
+ protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
+ public static final String OPT_MULTIPUT = "multiput";
+ public static final String OPT_MULTIGET = "multiget_batchsize";
+ protected static final String OPT_NUM_KEYS = "num_keys";
+ protected static final String OPT_READ = "read";
+ protected static final String OPT_START_KEY = "start_key";
+ public static final String OPT_TABLE_NAME = "tn";
+ public static final String OPT_COLUMN_FAMILIES = "families";
+ protected static final String OPT_ZK_QUORUM = "zk";
+ protected static final String OPT_ZK_PARENT_NODE = "zk_root";
+ protected static final String OPT_SKIP_INIT = "skip_init";
+ protected static final String OPT_INIT_ONLY = "init_only";
+ protected static final String NUM_TABLES = "num_tables";
+ protected static final String OPT_REGIONS_PER_SERVER = "regions_per_server";
+ protected static final String OPT_BATCHUPDATE = "batchupdate";
+ protected static final String OPT_UPDATE = "update";
+
+ public static final String OPT_ENCRYPTION = "encryption";
+ protected static final String OPT_ENCRYPTION_USAGE =
+ "Enables transparent encryption on the test table, one of " +
+ Arrays.toString(Encryption.getSupportedCiphers());
+
+ public static final String OPT_NUM_REGIONS_PER_SERVER = "num_regions_per_server";
+ protected static final String OPT_NUM_REGIONS_PER_SERVER_USAGE
+ = "Desired number of regions per region server. Defaults to 5.";
+ public static int DEFAULT_NUM_REGIONS_PER_SERVER = 5;
+
+ public static final String OPT_REGION_REPLICATION = "region_replication";
+ protected static final String OPT_REGION_REPLICATION_USAGE =
+ "Desired number of replicas per region";
+
+ public static final String OPT_REGION_REPLICA_ID = "region_replica_id";
+ protected static final String OPT_REGION_REPLICA_ID_USAGE =
+ "Region replica id to do the reads from";
+
+ public static final String OPT_MOB_THRESHOLD = "mob_threshold";
+ protected static final String OPT_MOB_THRESHOLD_USAGE =
+ "Desired cell size to exceed in bytes that will use the MOB write path";
+
+ protected static final long DEFAULT_START_KEY = 0;
+
+ /** This will be removed as we factor out the dependency on command line */
+ protected CommandLine cmd;
+
+ protected MultiThreadedWriter writerThreads = null;
+ protected MultiThreadedReader readerThreads = null;
+ protected MultiThreadedUpdater updaterThreads = null;
+
+ protected long startKey, endKey;
+
+ protected boolean isWrite, isRead, isUpdate;
+ protected boolean deferredLogFlush;
+
+ // Column family options
+ protected DataBlockEncoding dataBlockEncodingAlgo;
+ protected Compression.Algorithm compressAlgo;
+ protected BloomType bloomType;
+ private boolean inMemoryCF;
+
+ private User userOwner;
+ // Writer options
+ protected int numWriterThreads = DEFAULT_NUM_THREADS;
+ protected int minColsPerKey, maxColsPerKey;
+ protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
+ protected boolean isMultiPut;
+
+ // Updater options
+ protected int numUpdaterThreads = DEFAULT_NUM_THREADS;
+ protected int updatePercent;
+ protected boolean ignoreConflicts = false;
+ protected boolean isBatchUpdate;
+
+ // Reader options
+ private int numReaderThreads = DEFAULT_NUM_THREADS;
+ private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
+ private int multiGetBatchSize = MultiThreadedReader.DEFAULT_BATCH_SIZE;
+ private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
+ private int verifyPercent;
+
+ private int numTables = 1;
+
+ private String superUser;
+
+ private String userNames;
+ //This file is used to read authentication information in secure clusters.
+ private String authnFileName;
+
+ private int numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
+ private int regionReplication = -1; // not set
+ private int regionReplicaId = -1; // not set
+
+ private int mobThreshold = -1; // not set
+
+ // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
+ // console tool itself should only be used from console.
+ protected boolean isSkipInit = false;
+ protected boolean isInitOnly = false;
+
+ protected Cipher cipher = null;
+
+ protected String[] splitColonSeparated(String option,
+ int minNumCols, int maxNumCols) {
+ String optVal = cmd.getOptionValue(option);
+ String[] cols = optVal.split(COLON);
+ if (cols.length < minNumCols || cols.length > maxNumCols) {
+ throw new IllegalArgumentException("Expected at least "
+ + minNumCols + " columns but no more than " + maxNumCols +
+ " in the colon-separated value '" + optVal + "' of the " +
+ "-" + option + " option");
+ }
+ return cols;
+ }
+
+ protected int getNumThreads(String numThreadsStr) {
+ return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
+ }
+
+ public byte[][] getColumnFamilies() {
+ return families;
+ }
+
+ /**
+ * Apply column family options such as Bloom filters, compression, and data
+ * block encoding.
+ */
+ protected void applyColumnFamilyOptions(TableName tableName,
+ byte[][] columnFamilies) throws IOException {
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ Admin admin = conn.getAdmin()) {
+ TableDescriptor tableDesc = admin.getTableDescriptor(tableName);
+ LOG.info("Disabling table " + tableName);
+ admin.disableTable(tableName);
+ for (byte[] cf : columnFamilies) {
+ HColumnDescriptor columnDesc = (HColumnDescriptor) tableDesc.getColumnFamily(cf);
+ boolean isNewCf = columnDesc == null;
+ if (isNewCf) {
+ columnDesc = new HColumnDescriptor(cf);
+ }
+ if (bloomType != null) {
+ columnDesc.setBloomFilterType(bloomType);
+ }
+ if (compressAlgo != null) {
+ columnDesc.setCompressionType(compressAlgo);
+ }
+ if (dataBlockEncodingAlgo != null) {
+ columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
+ }
+ if (inMemoryCF) {
+ columnDesc.setInMemory(inMemoryCF);
+ }
+ if (cipher != null) {
+ byte[] keyBytes = new byte[cipher.getKeyLength()];
+ new SecureRandom().nextBytes(keyBytes);
+ columnDesc.setEncryptionType(cipher.getName());
+ columnDesc.setEncryptionKey(
+ EncryptionUtil.wrapKey(conf,
+ User.getCurrent().getShortName(),
+ new SecretKeySpec(keyBytes,
+ cipher.getName())));
+ }
+ if (mobThreshold >= 0) {
+ columnDesc.setMobEnabled(true);
+ columnDesc.setMobThreshold(mobThreshold);
+ }
+
+ if (isNewCf) {
+ admin.addColumnFamily(tableName, columnDesc);
+ } else {
+ admin.modifyColumnFamily(tableName, columnDesc);
+ }
+ }
+ LOG.info("Enabling table " + tableName);
+ admin.enableTable(tableName);
+ }
+ }
+
+ @Override
+ protected void addOptions() {
+ addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
+ "without port numbers");
+ addOptWithArg(OPT_ZK_PARENT_NODE, "name of parent znode in zookeeper");
+ addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
+ addOptWithArg(OPT_COLUMN_FAMILIES, "The name of the column families to use separated by comma");
+ addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
+ addOptWithArg(OPT_READ, OPT_USAGE_READ);
+ addOptWithArg(OPT_UPDATE, OPT_USAGE_UPDATE);
+ addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
+ addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
+ addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
+ addOptWithArg(HFileTestUtil.OPT_DATA_BLOCK_ENCODING, HFileTestUtil.OPT_DATA_BLOCK_ENCODING_USAGE);
+ addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
+ "to tolerate before terminating all reader threads. The default is " +
+ MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
+ addOptWithArg(OPT_MULTIGET, "Whether to use multi-gets as opposed to " +
+ "separate gets for every column in a row");
+ addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
+ "reads and writes for concurrent write/read workload. The default " +
+ "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
+
+ addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
+ "separate puts for every column in a row");
+ addOptNoArg(OPT_BATCHUPDATE, "Whether to use batch as opposed to " +
+ "separate updates for every column in a row");
+ addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
+ addOptWithArg(OPT_GENERATOR, OPT_GENERATOR_USAGE);
+ addOptWithArg(OPT_WRITER, OPT_WRITER_USAGE);
+ addOptWithArg(OPT_UPDATER, OPT_UPDATER_USAGE);
+ addOptWithArg(OPT_READER, OPT_READER_USAGE);
+
+ addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
+ addOptWithArg(OPT_START_KEY, "The first key to read/write " +
+ "(a 0-based index). The default value is " +
+ DEFAULT_START_KEY + ".");
+ addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
+ + "already exists");
+
+ addOptWithArg(NUM_TABLES,
+ "A positive integer number. When a number n is speicfied, load test "
+ + "tool will load n table parallely. -tn parameter value becomes "
+ + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
+
+ addOptWithArg(OPT_REGIONS_PER_SERVER,
+ "A positive integer number. When a number n is specified, load test "
+ + "tool will create the test table with n regions per server");
+
+ addOptWithArg(OPT_ENCRYPTION, OPT_ENCRYPTION_USAGE);
+ addOptNoArg(OPT_DEFERRED_LOG_FLUSH, OPT_DEFERRED_LOG_FLUSH_USAGE);
+ addOptWithArg(OPT_NUM_REGIONS_PER_SERVER, OPT_NUM_REGIONS_PER_SERVER_USAGE);
+ addOptWithArg(OPT_REGION_REPLICATION, OPT_REGION_REPLICATION_USAGE);
+ addOptWithArg(OPT_REGION_REPLICA_ID, OPT_REGION_REPLICA_ID_USAGE);
+ addOptWithArg(OPT_MOB_THRESHOLD, OPT_MOB_THRESHOLD_USAGE);
+ }
+
+ @Override
+ protected void processOptions(CommandLine cmd) {
+ this.cmd = cmd;
+
+ tableName = TableName.valueOf(cmd.getOptionValue(OPT_TABLE_NAME,
+ DEFAULT_TABLE_NAME));
+
+ if (cmd.hasOption(OPT_COLUMN_FAMILIES)) {
+ String[] list = cmd.getOptionValue(OPT_COLUMN_FAMILIES).split(",");
+ families = new byte[list.length][];
+ for (int i = 0; i < list.length; i++) {
+ families[i] = Bytes.toBytes(list[i]);
+ }
+ } else {
+ families = HFileTestUtil.DEFAULT_COLUMN_FAMILIES;
+ }
+
+ isWrite = cmd.hasOption(OPT_WRITE);
+ isRead = cmd.hasOption(OPT_READ);
+ isUpdate = cmd.hasOption(OPT_UPDATE);
+ isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
+ deferredLogFlush = cmd.hasOption(OPT_DEFERRED_LOG_FLUSH);
+
+ if (!isWrite && !isRead && !isUpdate && !isInitOnly) {
+ throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
+ "-" + OPT_UPDATE + " or -" + OPT_READ + " has to be specified");
+ }
+
+ if (isInitOnly && (isRead || isWrite || isUpdate)) {
+ throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
+ + " either -" + OPT_WRITE + " or -" + OPT_UPDATE + " or -" + OPT_READ);
+ }
+
+ if (!isInitOnly) {
+ if (!cmd.hasOption(OPT_NUM_KEYS)) {
+ throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
+ + "read or write mode");
+ }
+ startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
+ String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
+ long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
+ Long.MAX_VALUE - startKey);
+ endKey = startKey + numKeys;
+ isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
+ System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
+ }
+
+ parseColumnFamilyOptions(cmd);
+
+ if (isWrite) {
+ String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
+
+ int colIndex = 0;
+ minColsPerKey = 1;
+ maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
+ int avgColDataSize =
+ parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
+ minColDataSize = avgColDataSize / 2;
+ maxColDataSize = avgColDataSize * 3 / 2;
+
+ if (colIndex < writeOpts.length) {
+ numWriterThreads = getNumThreads(writeOpts[colIndex++]);
+ }
+
+ isMultiPut = cmd.hasOption(OPT_MULTIPUT);
+
+ mobThreshold = -1;
+ if (cmd.hasOption(OPT_MOB_THRESHOLD)) {
+ mobThreshold = Integer.parseInt(cmd.getOptionValue(OPT_MOB_THRESHOLD));
+ }
+
+ System.out.println("Multi-puts: " + isMultiPut);
+ System.out.println("Columns per key: " + minColsPerKey + ".."
+ + maxColsPerKey);
+ System.out.println("Data size per column: " + minColDataSize + ".."
+ + maxColDataSize);
+ }
+
+ if (isUpdate) {
+ String[] mutateOpts = splitColonSeparated(OPT_UPDATE, 1, 3);
+ int colIndex = 0;
+ updatePercent = parseInt(mutateOpts[colIndex++], 0, 100);
+ if (colIndex < mutateOpts.length) {
+ numUpdaterThreads = getNumThreads(mutateOpts[colIndex++]);
+ }
+ if (colIndex < mutateOpts.length) {
+ ignoreConflicts = parseInt(mutateOpts[colIndex++], 0, 1) == 1;
+ }
+
+ isBatchUpdate = cmd.hasOption(OPT_BATCHUPDATE);
+
+ System.out.println("Batch updates: " + isBatchUpdate);
+ System.out.println("Percent of keys to update: " + updatePercent);
+ System.out.println("Updater threads: " + numUpdaterThreads);
+ System.out.println("Ignore nonce conflicts: " + ignoreConflicts);
+ }
+
+ if (isRead) {
+ String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
+ int colIndex = 0;
+ verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
+ if (colIndex < readOpts.length) {
+ numReaderThreads = getNumThreads(readOpts[colIndex++]);
+ }
+
+ if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
+ maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
+ 0, Integer.MAX_VALUE);
+ }
+
+ if (cmd.hasOption(OPT_KEY_WINDOW)) {
+ keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
+ 0, Integer.MAX_VALUE);
+ }
+
+ if (cmd.hasOption(OPT_MULTIGET)) {
+ multiGetBatchSize = parseInt(cmd.getOptionValue(OPT_MULTIGET),
+ 0, Integer.MAX_VALUE);
+ }
+
+ System.out.println("Multi-gets (value of 1 means no multigets): " + multiGetBatchSize);
+ System.out.println("Percent of keys to verify: " + verifyPercent);
+ System.out.println("Reader threads: " + numReaderThreads);
+ }
+
+ numTables = 1;
+ if (cmd.hasOption(NUM_TABLES)) {
+ numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
+ }
+
+ numRegionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
+ if (cmd.hasOption(OPT_NUM_REGIONS_PER_SERVER)) {
+ numRegionsPerServer = Integer.parseInt(cmd.getOptionValue(OPT_NUM_REGIONS_PER_SERVER));
+ }
+
+ regionReplication = 1;
+ if (cmd.hasOption(OPT_REGION_REPLICATION)) {
+ regionReplication = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICATION));
+ }
+
+ regionReplicaId = -1;
+ if (cmd.hasOption(OPT_REGION_REPLICA_ID)) {
+ regionReplicaId = Integer.parseInt(cmd.getOptionValue(OPT_REGION_REPLICA_ID));
+ }
+ }
+
+ private void parseColumnFamilyOptions(CommandLine cmd) {
+ String dataBlockEncodingStr = cmd.getOptionValue(HFileTestUtil.OPT_DATA_BLOCK_ENCODING);
+ dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
+ DataBlockEncoding.valueOf(dataBlockEncodingStr);
+
+ String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
+ compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
+ Compression.Algorithm.valueOf(compressStr);
+
+ String bloomStr = cmd.getOptionValue(OPT_BLOOM);
+ bloomType = bloomStr == null ? BloomType.ROW :
+ BloomType.valueOf(bloomStr);
+
+ inMemoryCF = cmd.hasOption(OPT_INMEMORY);
+ if (cmd.hasOption(OPT_ENCRYPTION)) {
+ cipher = Encryption.getCipher(conf, cmd.getOptionValue(OPT_ENCRYPTION));
+ }
+
+ }
+
+ public void initTestTable() throws IOException {
+ Durability durability = Durability.USE_DEFAULT;
+ if (deferredLogFlush) {
+ durability = Durability.ASYNC_WAL;
+ }
+
+ HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
+ getColumnFamilies(), compressAlgo, dataBlockEncodingAlgo, numRegionsPerServer,
+ regionReplication, durability);
+ applyColumnFamilyOptions(tableName, getColumnFamilies());
+ }
+
+ @Override
+ protected int doWork() throws IOException {
+ if (numTables > 1) {
+ return parallelLoadTables();
+ } else {
+ return loadTable();
+ }
+ }
+
+ protected int loadTable() throws IOException {
+ if (cmd.hasOption(OPT_ZK_QUORUM)) {
+ conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
+ }
+ if (cmd.hasOption(OPT_ZK_PARENT_NODE)) {
+ conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, cmd.getOptionValue(OPT_ZK_PARENT_NODE));
+ }
+
+ if (isInitOnly) {
+ LOG.info("Initializing only; no reads or writes");
+ initTestTable();
+ return 0;
+ }
+
+ if (!isSkipInit) {
+ initTestTable();
+ }
+ LoadTestDataGenerator dataGen = null;
+ if (cmd.hasOption(OPT_GENERATOR)) {
+ String[] clazzAndArgs = cmd.getOptionValue(OPT_GENERATOR).split(COLON);
+ dataGen = getLoadGeneratorInstance(clazzAndArgs[0]);
+ String[] args;
+ if (dataGen instanceof LoadTestDataGeneratorWithACL) {
+ LOG.info("Using LoadTestDataGeneratorWithACL");
+ if (User.isHBaseSecurityEnabled(conf)) {
+ LOG.info("Security is enabled");
+ authnFileName = clazzAndArgs[1];
+ superUser = clazzAndArgs[2];
+ userNames = clazzAndArgs[3];
+ args = Arrays.copyOfRange(clazzAndArgs, 2, clazzAndArgs.length);
+ Properties authConfig = new Properties();
+ authConfig.load(this.getClass().getClassLoader().getResourceAsStream(authnFileName));
+ try {
+ addAuthInfoToConf(authConfig, conf, superUser, userNames);
+ } catch (IOException exp) {
+ LOG.error(exp);
+ return EXIT_FAILURE;
+ }
+ userOwner = User.create(HBaseKerberosUtils.loginAndReturnUGI(conf, superUser));
+ } else {
+ superUser = clazzAndArgs[1];
+ userNames = clazzAndArgs[2];
+ args = Arrays.copyOfRange(clazzAndArgs, 1, clazzAndArgs.length);
+ userOwner = User.createUserForTesting(conf, superUser, new String[0]);
+ }
+ } else {
+ args = clazzAndArgs.length == 1 ? new String[0] : Arrays.copyOfRange(clazzAndArgs, 1,
+ clazzAndArgs.length);
+ }
+ dataGen.initialize(args);
+ } else {
+ // Default DataGenerator is MultiThreadedAction.DefaultDataGenerator
+ dataGen = new MultiThreadedAction.DefaultDataGenerator(minColDataSize, maxColDataSize,
+ minColsPerKey, maxColsPerKey, families);
+ }
+
+ if (userOwner != null) {
+ LOG.info("Granting permissions for user " + userOwner.getShortName());
+ Permission.Action[] actions = {
+ Permission.Action.ADMIN, Permission.Action.CREATE,
+ Permission.Action.READ, Permission.Action.WRITE };
+ try {
+ AccessControlClient.grant(ConnectionFactory.createConnection(conf),
+ tableName, userOwner.getShortName(), null, null, actions);
+ } catch (Throwable e) {
+ LOG.fatal("Error in granting permission for the user " + userOwner.getShortName(), e);
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (userNames != null) {
+ // This will be comma separated list of expressions.
+ String users[] = userNames.split(",");
+ User user = null;
+ for (String userStr : users) {
+ if (User.isHBaseSecurityEnabled(conf)) {
+ user = User.create(HBaseKerberosUtils.loginAndReturnUGI(conf, userStr));
+ } else {
+ user = User.createUserForTesting(conf, userStr, new String[0]);
+ }
+ }
+ }
+
+ if (isWrite) {
+ if (userOwner != null) {
+ writerThreads = new MultiThreadedWriterWithACL(dataGen, conf, tableName, userOwner);
+ } else {
+ String writerClass = null;
+ if (cmd.hasOption(OPT_WRITER)) {
+ writerClass = cmd.getOptionValue(OPT_WRITER);
+ } else {
+ writerClass = MultiThreadedWriter.class.getCanonicalName();
+ }
+
+ writerThreads = getMultiThreadedWriterInstance(writerClass, dataGen);
+ }
+ writerThreads.setMultiPut(isMultiPut);
+ }
+
+ if (isUpdate) {
+ if (userOwner != null) {
+ updaterThreads = new MultiThreadedUpdaterWithACL(dataGen, conf, tableName, updatePercent,
+ userOwner, userNames);
+ } else {
+ String updaterClass = null;
+ if (cmd.hasOption(OPT_UPDATER)) {
+ updaterClass = cmd.getOptionValue(OPT_UPDATER);
+ } else {
+ updaterClass = MultiThreadedUpdater.class.getCanonicalName();
+ }
+ updaterThreads = getMultiThreadedUpdaterInstance(updaterClass, dataGen);
+ }
+ updaterThreads.setBatchUpdate(isBatchUpdate);
+ updaterThreads.setIgnoreNonceConflicts(ignoreConflicts);
+ }
+
+ if (isRead) {
+ if (userOwner != null) {
+ readerThreads = new MultiThreadedReaderWithACL(dataGen, conf, tableName, verifyPercent,
+ userNames);
+ } else {
+ String readerClass = null;
+ if (cmd.hasOption(OPT_READER)) {
+ readerClass = cmd.getOptionValue(OPT_READER);
+ } else {
+ readerClass = MultiThreadedReader.class.getCanonicalName();
+ }
+ readerThreads = getMultiThreadedReaderInstance(readerClass, dataGen);
+ }
+ readerThreads.setMaxErrors(maxReadErrors);
+ readerThreads.setKeyWindow(keyWindow);
+ readerThreads.setMultiGetBatchSize(multiGetBatchSize);
+ readerThreads.setRegionReplicaId(regionReplicaId);
+ }
+
+ if (isUpdate && isWrite) {
+ LOG.info("Concurrent write/update workload: making updaters aware of the " +
+ "write point");
+ updaterThreads.linkToWriter(writerThreads);
+ }
+
+ if (isRead && (isUpdate || isWrite)) {
+ LOG.info("Concurrent write/read workload: making readers aware of the " +
+ "write point");
+ readerThreads.linkToWriter(isUpdate ? updaterThreads : writerThreads);
+ }
+
+ if (isWrite) {
+ System.out.println("Starting to write data...");
+ writerThreads.start(startKey, endKey, numWriterThreads);
+ }
+
+ if (isUpdate) {
+ LOG.info("Starting to mutate data...");
+ System.out.println("Starting to mutate data...");
+ // TODO : currently append and increment operations not tested with tags
+ // Will update this aftet it is done
+ updaterThreads.start(startKey, endKey, numUpdaterThreads);
+ }
+
+ if (isRead) {
+ System.out.println("Starting to read data...");
+ readerThreads.start(startKey, endKey, numReaderThreads);
+ }
+
+ if (isWrite) {
+ writerThreads.waitForFinish();
+ }
+
+ if (isUpdate) {
+ updaterThreads.waitForFinish();
+ }
+
+ if (isRead) {
+ readerThreads.waitForFinish();
+ }
+
+ boolean success = true;
+ if (isWrite) {
+ success = success && writerThreads.getNumWriteFailures() == 0;
+ }
+ if (isUpdate) {
+ success = success && updaterThreads.getNumWriteFailures() == 0;
+ }
+ if (isRead) {
+ success = success && readerThreads.getNumReadErrors() == 0
+ && readerThreads.getNumReadFailures() == 0;
+ }
+ return success ? EXIT_SUCCESS : EXIT_FAILURE;
+ }
+
+ private LoadTestDataGenerator getLoadGeneratorInstance(String clazzName) throws IOException {
+ try {
+ Class<?> clazz = Class.forName(clazzName);
+ Constructor<?> constructor = clazz.getConstructor(int.class, int.class, int.class, int.class,
+ byte[][].class);
+ return (LoadTestDataGenerator) constructor.newInstance(minColDataSize, maxColDataSize,
+ minColsPerKey, maxColsPerKey, families);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+ private MultiThreadedWriter getMultiThreadedWriterInstance(String clazzName
+ , LoadTestDataGenerator dataGen) throws IOException {
+ try {
+ Class<?> clazz = Class.forName(clazzName);
+ Constructor<?> constructor = clazz.getConstructor(
+ LoadTestDataGenerator.class, Configuration.class, TableName.class);
+ return (MultiThreadedWriter) constructor.newInstance(dataGen, conf, tableName);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+ private MultiThreadedUpdater getMultiThreadedUpdaterInstance(String clazzName
+ , LoadTestDataGenerator dataGen) throws IOException {
+ try {
+ Class<?> clazz = Class.forName(clazzName);
+ Constructor<?> constructor = clazz.getConstructor(
+ LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
+ return (MultiThreadedUpdater) constructor.newInstance(
+ dataGen, conf, tableName, updatePercent);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+ private MultiThreadedReader getMultiThreadedReaderInstance(String clazzName
+ , LoadTestDataGenerator dataGen) throws IOException {
+ try {
+ Class<?> clazz = Class.forName(clazzName);
+ Constructor<?> constructor = clazz.getConstructor(
+ LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
+ return (MultiThreadedReader) constructor.newInstance(dataGen, conf, tableName, verifyPercent);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+ public static void main(String[] args) {
+ new LoadTestTool().doStaticMain(args);
+ }
+
+ /**
+ * When NUM_TABLES is specified, the function starts multiple worker threads
+ * which individually start a LoadTestTool instance to load a table. Each
+ * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
+ * , table names will be "test_1", "test_2"
+ *
+ * @throws IOException
+ */
+ private int parallelLoadTables()
+ throws IOException {
+ // create new command args
+ String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
+ String[] newArgs = null;
+ if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
+ newArgs = new String[cmdLineArgs.length + 2];
+ newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
+ newArgs[1] = LoadTestTool.DEFAULT_TABLE_NAME;
+ System.arraycopy(cmdLineArgs, 0, newArgs, 2, cmdLineArgs.length);
+ } else {
+ newArgs = cmdLineArgs;
+ }
+
+ int tableNameValueIndex = -1;
+ for (int j = 0; j < newArgs.length; j++) {
+ if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
+ tableNameValueIndex = j + 1;
+ } else if (newArgs[j].endsWith(NUM_TABLES)) {
+ // change NUM_TABLES to 1 so that each worker loads one table
+ newArgs[j + 1] = "1";
+ }
+ }
+
+ // starting to load multiple tables
+ List<WorkerThread> workers = new ArrayList<>();
+ for (int i = 0; i < numTables; i++) {
+ String[] workerArgs = newArgs.clone();
+ workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
+ WorkerThread worker = new WorkerThread(i, workerArgs);
+ workers.add(worker);
+ LOG.info(worker + " starting");
+ worker.start();
+ }
+
+ // wait for all workers finish
+ LOG.info("Waiting for worker threads to finish");
+ for (WorkerThread t : workers) {
+ try {
+ t.join();
+ } catch (InterruptedException ie) {
+ IOException iie = new InterruptedIOException();
+ iie.initCause(ie);
+ throw iie;
+ }
+ checkForErrors();
+ }
+
+ return EXIT_SUCCESS;
+ }
+
+ // If an exception is thrown by one of worker threads, it will be
+ // stored here.
+ protected AtomicReference<Throwable> thrown = new AtomicReference<>();
+
+ private void workerThreadError(Throwable t) {
+ thrown.compareAndSet(null, t);
+ }
+
+ /**
+ * Check for errors in the writer threads. If any is found, rethrow it.
+ */
+ private void checkForErrors() throws IOException {
+ Throwable thrown = this.thrown.get();
+ if (thrown == null) return;
+ if (thrown instanceof IOException) {
+ throw (IOException) thrown;
+ } else {
+ throw new RuntimeException(thrown);
+ }
+ }
+
+ class WorkerThread extends Thread {
+ private String[] workerArgs;
+
+ WorkerThread(int i, String[] args) {
+ super("WorkerThread-" + i);
+ workerArgs = args;
+ }
+
+ @Override
+ public void run() {
+ try {
+ int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
+ if (ret != 0) {
+ throw new RuntimeException("LoadTestTool exit with non-zero return code.");
+ }
+ } catch (Exception ex) {
+ LOG.error("Error in worker thread", ex);
+ workerThreadError(ex);
+ }
+ }
+ }
+
+ private void addAuthInfoToConf(Properties authConfig, Configuration conf, String owner,
+ String userList) throws IOException {
+ List<String> users = new ArrayList(Arrays.asList(userList.split(",")));
+ users.add(owner);
+ for (String user : users) {
+ String keyTabFileConfKey = "hbase." + user + ".keytab.file";
+ String principalConfKey = "hbase." + user + ".kerberos.principal";
+ if (!authConfig.containsKey(keyTabFileConfKey) || !authConfig.containsKey(principalConfKey)) {
+ throw new IOException("Authentication configs missing for user : " + user);
+ }
+ }
+ for (String key : authConfig.stringPropertyNames()) {
+ conf.set(key, authConfig.getProperty(key));
+ }
+ LOG.debug("Added authentication properties to config successfully.");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hbase-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hbase-site.xml b/hbase-mapreduce/src/test/resources/hbase-site.xml
new file mode 100644
index 0000000..64a1964
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hbase-site.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+ <property>
+ <name>hbase.regionserver.msginterval</name>
+ <value>1000</value>
+ <description>Interval between messages from the RegionServer to HMaster
+ in milliseconds. Default is 15. Set this value low if you want unit
+ tests to be responsive.
+ </description>
+ </property>
+ <property>
+ <name>hbase.defaults.for.version.skip</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>hbase.server.thread.wakefrequency</name>
+ <value>1000</value>
+ <description>Time to sleep in between searches for work (in milliseconds).
+ Used as sleep interval by service threads such as hbase:meta scanner and log roller.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.event.waiting.time</name>
+ <value>50</value>
+ <description>Time to sleep between checks to see if a table event took place.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.handler.count</name>
+ <value>5</value>
+ </property>
+ <property>
+ <name>hbase.regionserver.metahandler.count</name>
+ <value>6</value>
+ </property>
+ <property>
+ <name>hbase.ipc.server.read.threadpool.size</name>
+ <value>3</value>
+ </property>
+ <property>
+ <name>hbase.master.info.port</name>
+ <value>-1</value>
+ <description>The port for the hbase master web UI
+ Set to -1 if you do not want the info server to run.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.port</name>
+ <value>0</value>
+ <description>Always have masters and regionservers come up on port '0' so we don't clash over
+ default ports.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.port</name>
+ <value>0</value>
+ <description>Always have masters and regionservers come up on port '0' so we don't clash over
+ default ports.
+ </description>
+ </property>
+ <property>
+ <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hbase.regionserver.info.port</name>
+ <value>-1</value>
+ <description>The port for the hbase regionserver web UI
+ Set to -1 if you do not want the info server to run.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.info.port.auto</name>
+ <value>true</value>
+ <description>Info server auto port bind. Enables automatic port
+ search if hbase.regionserver.info.port is already in use.
+ Enabled for testing to run multiple tests on one machine.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.safemode</name>
+ <value>false</value>
+ <description>
+ Turn on/off safe mode in region server. Always on for production, always off
+ for tests.
+ </description>
+ </property>
+ <property>
+ <name>hbase.hregion.max.filesize</name>
+ <value>67108864</value>
+ <description>
+ Maximum desired file size for an HRegion. If filesize exceeds
+ value + (value / 2), the HRegion is split in two. Default: 256M.
+
+ Keep the maximum filesize small so we split more often in tests.
+ </description>
+ </property>
+ <property>
+ <name>hadoop.log.dir</name>
+ <value>${user.dir}/../logs</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.clientPort</name>
+ <value>21818</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The port at which the clients will connect.
+ </description>
+ </property>
+ <property>
+ <name>hbase.defaults.for.version.skip</name>
+ <value>true</value>
+ <description>
+ Set to true to skip the 'hbase.defaults.for.version'.
+ Setting this to true can be useful in contexts other than
+ the other side of a maven generation; i.e. running in an
+ ide. You'll want to set this boolean to true to avoid
+ seeing the RuntimeException complaint: "hbase-default.xml file
+ seems to be for and old version of HBase (@@@VERSION@@@), this
+ version is X.X.X-SNAPSHOT"
+ </description>
+ </property>
+ <property>
+ <name>hbase.table.sanity.checks</name>
+ <value>false</value>
+ <description>Skip sanity checks in tests
+ </description>
+ </property>
+ <property>
+ <name>hbase.procedure.fail.on.corruption</name>
+ <value>true</value>
+ <description>
+ Enable replay sanity checks on procedure tests.
+ </description>
+ </property>
+ <property>
+ <name>hbase.hconnection.threads.keepalivetime</name>
+ <value>3</value>
+ </property>
+</configuration>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hbase-site2.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hbase-site2.xml b/hbase-mapreduce/src/test/resources/hbase-site2.xml
new file mode 100644
index 0000000..8bef31a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hbase-site2.xml
@@ -0,0 +1,146 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+ <property>
+ <name>hbase.custom.config</name>
+ <value>1000</value>
+ </property>
+ <property>
+ <name>hbase.regionserver.msginterval</name>
+ <value>1000</value>
+ <description>Interval between messages from the RegionServer to HMaster
+ in milliseconds. Default is 15. Set this value low if you want unit
+ tests to be responsive.
+ </description>
+ </property>
+ <property>
+ <name>hbase.defaults.for.version.skip</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>hbase.server.thread.wakefrequency</name>
+ <value>1000</value>
+ <description>Time to sleep in between searches for work (in milliseconds).
+ Used as sleep interval by service threads such as hbase:meta scanner and log roller.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.event.waiting.time</name>
+ <value>50</value>
+ <description>Time to sleep between checks to see if a table event took place.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.handler.count</name>
+ <value>5</value>
+ </property>
+ <property>
+ <name>hbase.master.info.port</name>
+ <value>-1</value>
+ <description>The port for the hbase master web UI
+ Set to -1 if you do not want the info server to run.
+ </description>
+ </property>
+ <property>
+ <name>hbase.master.port</name>
+ <value>0</value>
+ <description>Always have masters and regionservers come up on port '0' so we don't clash over
+ default ports.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.port</name>
+ <value>0</value>
+ <description>Always have masters and regionservers come up on port '0' so we don't clash over
+ default ports.
+ </description>
+ </property>
+ <property>
+ <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hbase.regionserver.info.port</name>
+ <value>-1</value>
+ <description>The port for the hbase regionserver web UI
+ Set to -1 if you do not want the info server to run.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.info.port.auto</name>
+ <value>true</value>
+ <description>Info server auto port bind. Enables automatic port
+ search if hbase.regionserver.info.port is already in use.
+ Enabled for testing to run multiple tests on one machine.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regionserver.safemode</name>
+ <value>false</value>
+ <description>
+ Turn on/off safe mode in region server. Always on for production, always off
+ for tests.
+ </description>
+ </property>
+ <property>
+ <name>hbase.hregion.max.filesize</name>
+ <value>67108864</value>
+ <description>
+ Maximum desired file size for an HRegion. If filesize exceeds
+ value + (value / 2), the HRegion is split in two. Default: 256M.
+
+ Keep the maximum filesize small so we split more often in tests.
+ </description>
+ </property>
+ <property>
+ <name>hadoop.log.dir</name>
+ <value>${user.dir}/../logs</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.property.clientPort</name>
+ <value>21818</value>
+ <description>Property from ZooKeeper's config zoo.cfg.
+ The port at which the clients will connect.
+ </description>
+ </property>
+ <property>
+ <name>hbase.defaults.for.version.skip</name>
+ <value>true</value>
+ <description>
+ Set to true to skip the 'hbase.defaults.for.version'.
+ Setting this to true can be useful in contexts other than
+ the other side of a maven generation; i.e. running in an
+ ide. You'll want to set this boolean to true to avoid
+ seeing the RuntimeException complaint: "hbase-default.xml file
+ seems to be for and old version of HBase (@@@VERSION@@@), this
+ version is X.X.X-SNAPSHOT"
+ </description>
+ </property>
+ <property>
+ <name>hbase.table.sanity.checks</name>
+ <value>false</value>
+ <description>Skip sanity checks in tests
+ </description>
+ </property>
+</configuration>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/hdfs-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/hdfs-site.xml b/hbase-mapreduce/src/test/resources/hdfs-site.xml
new file mode 100644
index 0000000..03be0c7
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/hdfs-site.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+
+ <!-- hadoop-2.0.5+'s HDFS-4305 by default enforces a min blocks size
+ of 1024*1024. Many unit tests that use the hlog use smaller
+ blocks. Setting this config to 0 to have tests pass -->
+ <property>
+ <name>dfs.namenode.fs-limits.min-block-size</name>
+ <value>0</value>
+ </property>
+</configuration>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/log4j.properties b/hbase-mapreduce/src/test/resources/log4j.properties
new file mode 100644
index 0000000..c322699
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/log4j.properties
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hbase.root.logger=INFO,console
+hbase.log.dir=.
+hbase.log.file=hbase.log
+
+# Define the root logger to the system property "hbase.root.logger".
+log4j.rootLogger=${hbase.root.logger}
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+# Debugging Pattern format
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.zookeeper=ERROR
+log4j.logger.org.apache.hadoop.hbase=DEBUG
+
+#These settings are workarounds against spurious logs from the minicluster.
+#See HBASE-4709
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
+log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
+log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
+# Enable this to get detailed connection error/retry logging.
+# log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/mapred-queues.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/mapred-queues.xml b/hbase-mapreduce/src/test/resources/mapred-queues.xml
new file mode 100644
index 0000000..43f3e2a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/mapred-queues.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<!-- This is the template for queue configuration. The format supports nesting of
+ queues within queues - a feature called hierarchical queues. All queues are
+ defined within the 'queues' tag which is the top level element for this
+ XML document.
+ The 'aclsEnabled' attribute should be set to true, if ACLs should be checked
+ on queue operations such as submitting jobs, killing jobs etc. -->
+<queues aclsEnabled="false">
+
+ <!-- Configuration for a queue is specified by defining a 'queue' element. -->
+ <queue>
+
+ <!-- Name of a queue. Queue name cannot contain a ':' -->
+ <name>default</name>
+
+ <!-- properties for a queue, typically used by schedulers,
+ can be defined here -->
+ <properties>
+ </properties>
+
+ <!-- State of the queue. If running, the queue will accept new jobs.
+ If stopped, the queue will not accept new jobs. -->
+ <state>running</state>
+
+ <!-- Specifies the ACLs to check for submitting jobs to this queue.
+ If set to '*', it allows all users to submit jobs to the queue.
+ For specifying a list of users and groups the format to use is
+ user1,user2 group1,group2 -->
+ <acl-submit-job>*</acl-submit-job>
+
+ <!-- Specifies the ACLs to check for modifying jobs in this queue.
+ Modifications include killing jobs, tasks of jobs or changing
+ priorities.
+ If set to '*', it allows all users to submit jobs to the queue.
+ For specifying a list of users and groups the format to use is
+ user1,user2 group1,group2 -->
+ <acl-administer-jobs>*</acl-administer-jobs>
+ </queue>
+
+ <!-- Here is a sample of a hierarchical queue configuration
+ where q2 is a child of q1. In this example, q2 is a leaf level
+ queue as it has no queues configured within it. Currently, ACLs
+ and state are only supported for the leaf level queues.
+ Note also the usage of properties for the queue q2.
+ <queue>
+ <name>q1</name>
+ <queue>
+ <name>q2</name>
+ <properties>
+ <property key="capacity" value="20"/>
+ <property key="user-limit" value="30"/>
+ </properties>
+ </queue>
+ </queue>
+ -->
+</queues>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/mapred-site.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/mapred-site.xml b/hbase-mapreduce/src/test/resources/mapred-site.xml
new file mode 100644
index 0000000..787ffb7
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/mapred-site.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+<property>
+ <name>mapred.map.child.java.opts</name>
+ <value>-Djava.awt.headless=true</value>
+</property>
+
+<property>
+ <name>mapred.reduce.child.java.opts</name>
+ <value>-Djava.awt.headless=true</value>
+</property>
+</configuration>
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
new file mode 100644
index 0000000..6fca96a
--- /dev/null
+++ b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/PerformanceEvaluation_Counter.properties
@@ -0,0 +1,28 @@
+# ResourceBundle properties file for Map-Reduce counters
+
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements. See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership. The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License. You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+CounterGroupName= HBase Performance Evaluation
+ELAPSED_TIME.name= Elapsed time in milliseconds
+ROWS.name= Row count
+# ResourceBundle properties file for Map-Reduce counters
+
+CounterGroupName= HBase Performance Evaluation
+ELAPSED_TIME.name= Elapsed time in milliseconds
+ROWS.name= Row count
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format
new file mode 100755
index 0000000..762ddd7
Binary files /dev/null and b/hbase-mapreduce/src/test/resources/org/apache/hadoop/hbase/mapreduce/exportedTableIn94Format differ
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-rest/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-rest/pom.xml b/hbase-rest/pom.xml
index 3af9829..639c0c2 100644
--- a/hbase-rest/pom.xml
+++ b/hbase-rest/pom.xml
@@ -212,6 +212,16 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop-compat</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java b/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
index 3559ee0..6ed170e 100644
--- a/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
+++ b/hbase-rest/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java
@@ -220,8 +220,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
/**
* This class works as the InputSplit of Performance Evaluation
- * MapReduce InputFormat, and the Record Value of RecordReader.
- * Each map task will only read one record from a PeInputSplit,
+ * MapReduce InputFormat, and the Record Value of RecordReader.
+ * Each map task will only read one record from a PeInputSplit,
* the record value is the PeInputSplit itself.
*/
public static class PeInputSplit extends InputSplit implements Writable {
@@ -950,7 +950,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
static abstract class TableTest extends Test {
protected Table table;
-
+
public TableTest(Configuration conf, TestOptions options, Status status) {
super(conf, options, status);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
index bcd433c..d520113 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
@@ -43,7 +43,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
* <p>
* This also allows one to run the scan from an
* online or offline hbase cluster. The snapshot files can be exported by using the
- * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool,
+ * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool,
* to a pure-hdfs cluster, and this scanner can be used to
* run the scan directly over the snapshot files. The snapshot should not be deleted while there
* are open scanners reading from snapshot files.
@@ -60,7 +60,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
* snapshot files, the job has to be run as the HBase user or the user must have group or other
* priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
* snapshot/data files will completely circumvent the access control enforced by HBase.
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
*/
@InterfaceAudience.Public
public class TableSnapshotScanner extends AbstractClientScanner {
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
deleted file mode 100644
index 618c14a..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.util.ProgramDriver;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Driver for hbase mapreduce jobs. Select which to run by passing name of job
- * to this main.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-@InterfaceStability.Stable
-public class Driver {
-
- private static ProgramDriver pgd = new ProgramDriver();
-
- @VisibleForTesting
- static void setProgramDriver(ProgramDriver pgd0) {
- pgd = pgd0;
- }
-
- /**
- * @param args
- * @throws Throwable
- */
- public static void main(String[] args) throws Throwable {
- pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase table");
- ProgramDriver.class.getMethod("driver", new Class[] { String[].class })
- .invoke(pgd, new Object[] { args });
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
deleted file mode 100644
index a534224..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-
-/**
- * Extract grouping columns from input record
- */
-@InterfaceAudience.Public
-public class GroupingTableMap
-extends MapReduceBase
-implements TableMap<ImmutableBytesWritable,Result> {
-
- /**
- * JobConf parameter to specify the columns used to produce the key passed to
- * collect from the map phase
- */
- public static final String GROUP_COLUMNS =
- "hbase.mapred.groupingtablemap.columns";
-
- protected byte [][] columns;
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up the
- * JobConf.
- *
- * @param table table to be processed
- * @param columns space separated list of columns to fetch
- * @param groupColumns space separated list of columns used to form the key
- * used in collect
- * @param mapper map class
- * @param job job configuration object
- */
- @SuppressWarnings("unchecked")
- public static void initJob(String table, String columns, String groupColumns,
- Class<? extends TableMap> mapper, JobConf job) {
-
- TableMapReduceUtil.initTableMapJob(table, columns, mapper,
- ImmutableBytesWritable.class, Result.class, job);
- job.set(GROUP_COLUMNS, groupColumns);
- }
-
- @Override
- public void configure(JobConf job) {
- super.configure(job);
- String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
- columns = new byte[cols.length][];
- for(int i = 0; i < cols.length; i++) {
- columns[i] = Bytes.toBytes(cols[i]);
- }
- }
-
- /**
- * Extract the grouping columns from value to construct a new key.
- *
- * Pass the new key and value to reduce.
- * If any of the grouping columns are not found in the value, the record is skipped.
- * @param key
- * @param value
- * @param output
- * @param reporter
- * @throws IOException
- */
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<ImmutableBytesWritable,Result> output,
- Reporter reporter) throws IOException {
-
- byte[][] keyVals = extractKeyValues(value);
- if(keyVals != null) {
- ImmutableBytesWritable tKey = createGroupKey(keyVals);
- output.collect(tKey, value);
- }
- }
-
- /**
- * Extract columns values from the current record. This method returns
- * null if any of the columns are not found.
- *
- * Override this method if you want to deal with nulls differently.
- *
- * @param r
- * @return array of byte values
- */
- protected byte[][] extractKeyValues(Result r) {
- byte[][] keyVals = null;
- ArrayList<byte[]> foundList = new ArrayList<>();
- int numCols = columns.length;
- if (numCols > 0) {
- for (Cell value: r.listCells()) {
- byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
- CellUtil.cloneQualifier(value));
- for (int i = 0; i < numCols; i++) {
- if (Bytes.equals(column, columns[i])) {
- foundList.add(CellUtil.cloneValue(value));
- break;
- }
- }
- }
- if(foundList.size() == numCols) {
- keyVals = foundList.toArray(new byte[numCols][]);
- }
- }
- return keyVals;
- }
-
- /**
- * Create a key by concatenating multiple column values.
- * Override this function in order to produce different types of keys.
- *
- * @param vals
- * @return key generated by concatenating multiple column values
- */
- protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
- if(vals == null) {
- return null;
- }
- StringBuilder sb = new StringBuilder();
- for(int i = 0; i < vals.length; i++) {
- if(i > 0) {
- sb.append(" ");
- }
- sb.append(Bytes.toString(vals[i]));
- }
- return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
deleted file mode 100644
index 0011a60..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Partitioner;
-
-
-/**
- * This is used to partition the output keys into groups of keys.
- * Keys are grouped according to the regions that currently exist
- * so that each reducer fills a single region so load is distributed.
- *
- * @param <K2>
- * @param <V2>
- */
-@InterfaceAudience.Public
-public class HRegionPartitioner<K2,V2>
-implements Partitioner<ImmutableBytesWritable, V2> {
- private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
- // Connection and locator are not cleaned up; they just die when partitioner is done.
- private Connection connection;
- private RegionLocator locator;
- private byte[][] startKeys;
-
- public void configure(JobConf job) {
- try {
- this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
- TableName tableName = TableName.valueOf(job.get(TableOutputFormat.OUTPUT_TABLE));
- this.locator = this.connection.getRegionLocator(tableName);
- } catch (IOException e) {
- LOG.error(e);
- }
-
- try {
- this.startKeys = this.locator.getStartKeys();
- } catch (IOException e) {
- LOG.error(e);
- }
- }
-
- public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
- byte[] region = null;
- // Only one region return 0
- if (this.startKeys.length == 1){
- return 0;
- }
- try {
- // Not sure if this is cached after a split so we could have problems
- // here if a region splits while mapping
- region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
- } catch (IOException e) {
- LOG.error(e);
- }
- for (int i = 0; i < this.startKeys.length; i++){
- if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
- if (i >= numPartitions-1){
- // cover if we have less reduces then regions.
- return (Integer.toString(i).hashCode()
- & Integer.MAX_VALUE) % numPartitions;
- }
- return i;
- }
- }
- // if above fails to find start key that match we need to return something
- return 0;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
deleted file mode 100644
index dfacff9..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Pass the given key and record as-is to reduce
- */
-@InterfaceAudience.Public
-public class IdentityTableMap
-extends MapReduceBase
-implements TableMap<ImmutableBytesWritable, Result> {
-
- /** constructor */
- public IdentityTableMap() {
- super();
- }
-
- /**
- * Use this before submitting a TableMap job. It will
- * appropriately set up the JobConf.
- *
- * @param table table name
- * @param columns columns to scan
- * @param mapper mapper class
- * @param job job configuration
- */
- @SuppressWarnings("unchecked")
- public static void initJob(String table, String columns,
- Class<? extends TableMap> mapper, JobConf job) {
- TableMapReduceUtil.initTableMapJob(table, columns, mapper,
- ImmutableBytesWritable.class,
- Result.class, job);
- }
-
- /**
- * Pass the key, value to reduce
- * @param key
- * @param value
- * @param output
- * @param reporter
- * @throws IOException
- */
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<ImmutableBytesWritable,Result> output,
- Reporter reporter) throws IOException {
-
- // convert
- output.collect(key, value);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
deleted file mode 100644
index 9c2e604..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Write to table each key, record pair
- */
-@InterfaceAudience.Public
-public class IdentityTableReduce
-extends MapReduceBase
-implements TableReduce<ImmutableBytesWritable, Put> {
- @SuppressWarnings("unused")
- private static final Log LOG =
- LogFactory.getLog(IdentityTableReduce.class.getName());
-
- /**
- * No aggregation, output pairs of (key, record)
- * @param key
- * @param values
- * @param output
- * @param reporter
- * @throws IOException
- */
- public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
- OutputCollector<ImmutableBytesWritable, Put> output,
- Reporter reporter)
- throws IOException {
-
- while(values.hasNext()) {
- output.collect(key, values.next());
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
deleted file mode 100644
index 3e121fe..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * MultiTableSnapshotInputFormat generalizes {@link org.apache.hadoop.hbase.mapred
- * .TableSnapshotInputFormat}
- * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
- * configured for each.
- * Internally, the input format delegates to {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * and thus has the same performance advantages; see {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat} for
- * more details.
- * Usage is similar to TableSnapshotInputFormat, with the following exception:
- * initMultiTableSnapshotMapperJob takes in a map
- * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
- * scan will be applied;
- * the overall dataset for the job is defined by the concatenation of the regions and tables
- * included in each snapshot/scan
- * pair.
- * {@link org.apache.hadoop.hbase.mapred.TableMapReduceUtil#initMultiTableSnapshotMapperJob(Map,
- * Class, Class, Class, JobConf, boolean, Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
- * "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
- * "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
- * );
- * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
- * TableMapReduceUtil.initTableSnapshotMapperJob(
- * snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
- * MyMapOutputValueWritable.class, job, true, restoreDir);
- * }
- * </pre>
- * Internally, this input format restores each snapshot into a subdirectory of the given tmp
- * directory. Input splits and
- * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
- * .TableSnapshotInputFormat}
- * (one per region).
- * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
- * permissioning; the
- * same caveats apply here.
- *
- * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat
- implements InputFormat<ImmutableBytesWritable, Result> {
-
- private final MultiTableSnapshotInputFormatImpl delegate;
-
- public MultiTableSnapshotInputFormat() {
- this.delegate = new MultiTableSnapshotInputFormatImpl();
- }
-
- @Override
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- List<TableSnapshotInputFormatImpl.InputSplit> splits = delegate.getSplits(job);
- InputSplit[] results = new InputSplit[splits.size()];
- for (int i = 0; i < splits.size(); i++) {
- results[i] = new TableSnapshotRegionSplit(splits.get(i));
- }
- return results;
- }
-
- @Override
- public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
- Reporter reporter) throws IOException {
- return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
- }
-
- /**
- * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
- * restoreDir.
- * Sets: {@link org.apache.hadoop.hbase.mapreduce
- * .MultiTableSnapshotInputFormatImpl#RESTORE_DIRS_KEY},
- * {@link org.apache.hadoop.hbase.mapreduce
- * .MultiTableSnapshotInputFormatImpl#SNAPSHOT_TO_SCANS_KEY}
- *
- * @param conf
- * @param snapshotScans
- * @param restoreDir
- * @throws IOException
- */
- public static void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
- Path restoreDir) throws IOException {
- new MultiTableSnapshotInputFormatImpl().setInput(conf, snapshotScans, restoreDir);
- }
-
-}
[30/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
new file mode 100644
index 0000000..23a70a9
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
@@ -0,0 +1,2627 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.lang.reflect.Constructor;
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.text.DecimalFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Random;
+import java.util.TreeMap;
+import java.util.NoSuchElementException;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Append;
+import org.apache.hadoop.hbase.client.AsyncConnection;
+import org.apache.hadoop.hbase.client.AsyncTable;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Consistency;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RawAsyncTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.RowMutations;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.BinaryComparator;
+import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterAllFilter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.PageFilter;
+import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
+import org.apache.hadoop.hbase.filter.WhileMatchFilter;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.RandomDistribution;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
+import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem;
+import org.apache.hadoop.hbase.trace.HBaseHTraceConfiguration;
+import org.apache.hadoop.hbase.trace.SpanReceiverHost;
+import org.apache.hadoop.hbase.util.*;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.apache.htrace.Sampler;
+import org.apache.htrace.Trace;
+import org.apache.htrace.TraceScope;
+import org.apache.htrace.impl.ProbabilitySampler;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects;
+import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.UniformReservoir;
+
+/**
+ * Script used evaluating HBase performance and scalability. Runs a HBase
+ * client that steps through one of a set of hardcoded tests or 'experiments'
+ * (e.g. a random reads test, a random writes test, etc.). Pass on the
+ * command-line which test to run and how many clients are participating in
+ * this experiment. Run {@code PerformanceEvaluation --help} to obtain usage.
+ *
+ * <p>This class sets up and runs the evaluation programs described in
+ * Section 7, <i>Performance Evaluation</i>, of the <a
+ * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
+ * paper, pages 8-10.
+ *
+ * <p>By default, runs as a mapreduce job where each mapper runs a single test
+ * client. Can also run as a non-mapreduce, multithreaded application by
+ * specifying {@code --nomapred}. Each client does about 1GB of data, unless
+ * specified otherwise.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class PerformanceEvaluation extends Configured implements Tool {
+ static final String RANDOM_SEEK_SCAN = "randomSeekScan";
+ static final String RANDOM_READ = "randomRead";
+ private static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
+ private static final ObjectMapper MAPPER = new ObjectMapper();
+ static {
+ MAPPER.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
+ }
+
+ public static final String TABLE_NAME = "TestTable";
+ public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
+ public static final byte [] COLUMN_ZERO = Bytes.toBytes("" + 0);
+ public static final byte [] QUALIFIER_NAME = COLUMN_ZERO;
+ public static final int DEFAULT_VALUE_LENGTH = 1000;
+ public static final int ROW_LENGTH = 26;
+
+ private static final int ONE_GB = 1024 * 1024 * 1000;
+ private static final int DEFAULT_ROWS_PER_GB = ONE_GB / DEFAULT_VALUE_LENGTH;
+ // TODO : should we make this configurable
+ private static final int TAG_LENGTH = 256;
+ private static final DecimalFormat FMT = new DecimalFormat("0.##");
+ private static final MathContext CXT = MathContext.DECIMAL64;
+ private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
+ private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
+ private static final TestOptions DEFAULT_OPTS = new TestOptions();
+
+ private static Map<String, CmdDescriptor> COMMANDS = new TreeMap<>();
+ private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
+
+ static {
+ addCommandDescriptor(AsyncRandomReadTest.class, "asyncRandomRead",
+ "Run async random read test");
+ addCommandDescriptor(AsyncRandomWriteTest.class, "asyncRandomWrite",
+ "Run async random write test");
+ addCommandDescriptor(AsyncSequentialReadTest.class, "asyncSequentialRead",
+ "Run async sequential read test");
+ addCommandDescriptor(AsyncSequentialWriteTest.class, "asyncSequentialWrite",
+ "Run async sequential write test");
+ addCommandDescriptor(AsyncScanTest.class, "asyncScan",
+ "Run async scan test (read every row)");
+ addCommandDescriptor(RandomReadTest.class, RANDOM_READ,
+ "Run random read test");
+ addCommandDescriptor(RandomSeekScanTest.class, RANDOM_SEEK_SCAN,
+ "Run random seek and scan 100 test");
+ addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
+ "Run random seek scan with both start and stop row (max 10 rows)");
+ addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
+ "Run random seek scan with both start and stop row (max 100 rows)");
+ addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
+ "Run random seek scan with both start and stop row (max 1000 rows)");
+ addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
+ "Run random seek scan with both start and stop row (max 10000 rows)");
+ addCommandDescriptor(RandomWriteTest.class, "randomWrite",
+ "Run random write test");
+ addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
+ "Run sequential read test");
+ addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
+ "Run sequential write test");
+ addCommandDescriptor(ScanTest.class, "scan",
+ "Run scan test (read every row)");
+ addCommandDescriptor(FilteredScanTest.class, "filterScan",
+ "Run scan test using a filter to find a specific row based on it's value " +
+ "(make sure to use --rows=20)");
+ addCommandDescriptor(IncrementTest.class, "increment",
+ "Increment on each row; clients overlap on keyspace so some concurrent operations");
+ addCommandDescriptor(AppendTest.class, "append",
+ "Append on each row; clients overlap on keyspace so some concurrent operations");
+ addCommandDescriptor(CheckAndMutateTest.class, "checkAndMutate",
+ "CheckAndMutate on each row; clients overlap on keyspace so some concurrent operations");
+ addCommandDescriptor(CheckAndPutTest.class, "checkAndPut",
+ "CheckAndPut on each row; clients overlap on keyspace so some concurrent operations");
+ addCommandDescriptor(CheckAndDeleteTest.class, "checkAndDelete",
+ "CheckAndDelete on each row; clients overlap on keyspace so some concurrent operations");
+ }
+
+ /**
+ * Enum for map metrics. Keep it out here rather than inside in the Map
+ * inner-class so we can find associated properties.
+ */
+ protected static enum Counter {
+ /** elapsed time */
+ ELAPSED_TIME,
+ /** number of rows */
+ ROWS
+ }
+
+ protected static class RunResult implements Comparable<RunResult> {
+ public RunResult(long duration, Histogram hist) {
+ this.duration = duration;
+ this.hist = hist;
+ }
+
+ public final long duration;
+ public final Histogram hist;
+
+ @Override
+ public String toString() {
+ return Long.toString(duration);
+ }
+
+ @Override public int compareTo(RunResult o) {
+ return Long.compare(this.duration, o.duration);
+ }
+ }
+
+ /**
+ * Constructor
+ * @param conf Configuration object
+ */
+ public PerformanceEvaluation(final Configuration conf) {
+ super(conf);
+ }
+
+ protected static void addCommandDescriptor(Class<? extends TestBase> cmdClass,
+ String name, String description) {
+ CmdDescriptor cmdDescriptor = new CmdDescriptor(cmdClass, name, description);
+ COMMANDS.put(name, cmdDescriptor);
+ }
+
+ /**
+ * Implementations can have their status set.
+ */
+ interface Status {
+ /**
+ * Sets status
+ * @param msg status message
+ * @throws IOException
+ */
+ void setStatus(final String msg) throws IOException;
+ }
+
+ /**
+ * MapReduce job that runs a performance evaluation client in each map task.
+ */
+ public static class EvaluationMapTask
+ extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
+
+ /** configuration parameter name that contains the command */
+ public final static String CMD_KEY = "EvaluationMapTask.command";
+ /** configuration parameter name that contains the PE impl */
+ public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
+
+ private Class<? extends Test> cmd;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
+
+ // this is required so that extensions of PE are instantiated within the
+ // map reduce task...
+ Class<? extends PerformanceEvaluation> peClass =
+ forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
+ try {
+ peClass.getConstructor(Configuration.class).newInstance(context.getConfiguration());
+ } catch (Exception e) {
+ throw new IllegalStateException("Could not instantiate PE instance", e);
+ }
+ }
+
+ private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
+ try {
+ return Class.forName(className).asSubclass(type);
+ } catch (ClassNotFoundException e) {
+ throw new IllegalStateException("Could not find class for name: " + className, e);
+ }
+ }
+
+ @Override
+ protected void map(LongWritable key, Text value, final Context context)
+ throws IOException, InterruptedException {
+
+ Status status = new Status() {
+ @Override
+ public void setStatus(String msg) {
+ context.setStatus(msg);
+ }
+ };
+
+ ObjectMapper mapper = new ObjectMapper();
+ TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
+ Configuration conf = HBaseConfiguration.create(context.getConfiguration());
+ final Connection con = ConnectionFactory.createConnection(conf);
+ AsyncConnection asyncCon = null;
+ try {
+ asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
+ } catch (ExecutionException e) {
+ throw new IOException(e);
+ }
+
+ // Evaluation task
+ RunResult result = PerformanceEvaluation.runOneClient(this.cmd, conf, con, asyncCon, opts, status);
+ // Collect how much time the thing took. Report as map output and
+ // to the ELAPSED_TIME counter.
+ context.getCounter(Counter.ELAPSED_TIME).increment(result.duration);
+ context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
+ context.write(new LongWritable(opts.startRow), new LongWritable(result.duration));
+ context.progress();
+ }
+ }
+
+ /*
+ * If table does not already exist, create. Also create a table when
+ * {@code opts.presplitRegions} is specified or when the existing table's
+ * region replica count doesn't match {@code opts.replicas}.
+ */
+ static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
+ TableName tableName = TableName.valueOf(opts.tableName);
+ boolean needsDelete = false, exists = admin.tableExists(tableName);
+ boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
+ || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
+ if (!exists && isReadCmd) {
+ throw new IllegalStateException(
+ "Must specify an existing table for read commands. Run a write command first.");
+ }
+ HTableDescriptor desc =
+ exists ? admin.getTableDescriptor(TableName.valueOf(opts.tableName)) : null;
+ byte[][] splits = getSplits(opts);
+
+ // recreate the table when user has requested presplit or when existing
+ // {RegionSplitPolicy,replica count} does not match requested.
+ if ((exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions)
+ || (!isReadCmd && desc != null &&
+ !StringUtils.equals(desc.getRegionSplitPolicyClassName(), opts.splitPolicy))
+ || (!isReadCmd && desc != null && desc.getRegionReplication() != opts.replicas)) {
+ needsDelete = true;
+ // wait, why did it delete my table?!?
+ LOG.debug(MoreObjects.toStringHelper("needsDelete")
+ .add("needsDelete", needsDelete)
+ .add("isReadCmd", isReadCmd)
+ .add("exists", exists)
+ .add("desc", desc)
+ .add("presplit", opts.presplitRegions)
+ .add("splitPolicy", opts.splitPolicy)
+ .add("replicas", opts.replicas));
+ }
+
+ // remove an existing table
+ if (needsDelete) {
+ if (admin.isTableEnabled(tableName)) {
+ admin.disableTable(tableName);
+ }
+ admin.deleteTable(tableName);
+ }
+
+ // table creation is necessary
+ if (!exists || needsDelete) {
+ desc = getTableDescriptor(opts);
+ if (splits != null) {
+ if (LOG.isDebugEnabled()) {
+ for (int i = 0; i < splits.length; i++) {
+ LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
+ }
+ }
+ }
+ admin.createTable(desc, splits);
+ LOG.info("Table " + desc + " created");
+ }
+ return admin.tableExists(tableName);
+ }
+
+ /**
+ * Create an HTableDescriptor from provided TestOptions.
+ */
+ protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
+ HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(opts.tableName));
+ HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
+ family.setDataBlockEncoding(opts.blockEncoding);
+ family.setCompressionType(opts.compression);
+ family.setBloomFilterType(opts.bloomType);
+ family.setBlocksize(opts.blockSize);
+ if (opts.inMemoryCF) {
+ family.setInMemory(true);
+ }
+ family.setInMemoryCompaction(opts.inMemoryCompaction);
+ desc.addFamily(family);
+ if (opts.replicas != DEFAULT_OPTS.replicas) {
+ desc.setRegionReplication(opts.replicas);
+ }
+ if (opts.splitPolicy != DEFAULT_OPTS.splitPolicy) {
+ desc.setRegionSplitPolicyClassName(opts.splitPolicy);
+ }
+ return desc;
+ }
+
+ /**
+ * generates splits based on total number of rows and specified split regions
+ */
+ protected static byte[][] getSplits(TestOptions opts) {
+ if (opts.presplitRegions == DEFAULT_OPTS.presplitRegions)
+ return null;
+
+ int numSplitPoints = opts.presplitRegions - 1;
+ byte[][] splits = new byte[numSplitPoints][];
+ int jump = opts.totalRows / opts.presplitRegions;
+ for (int i = 0; i < numSplitPoints; i++) {
+ int rowkey = jump * (1 + i);
+ splits[i] = format(rowkey);
+ }
+ return splits;
+ }
+
+ /*
+ * Run all clients in this vm each to its own thread.
+ */
+ static RunResult[] doLocalClients(final TestOptions opts, final Configuration conf)
+ throws IOException, InterruptedException, ExecutionException {
+ final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
+ assert cmd != null;
+ @SuppressWarnings("unchecked")
+ Future<RunResult>[] threads = new Future[opts.numClientThreads];
+ RunResult[] results = new RunResult[opts.numClientThreads];
+ ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
+ new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
+ final Connection con = ConnectionFactory.createConnection(conf);
+ final AsyncConnection asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
+ for (int i = 0; i < threads.length; i++) {
+ final int index = i;
+ threads[i] = pool.submit(new Callable<RunResult>() {
+ @Override
+ public RunResult call() throws Exception {
+ TestOptions threadOpts = new TestOptions(opts);
+ if (threadOpts.startRow == 0) threadOpts.startRow = index * threadOpts.perClientRunRows;
+ RunResult run = runOneClient(cmd, conf, con, asyncCon, threadOpts, new Status() {
+ @Override
+ public void setStatus(final String msg) throws IOException {
+ LOG.info(msg);
+ }
+ });
+ LOG.info("Finished " + Thread.currentThread().getName() + " in " + run.duration +
+ "ms over " + threadOpts.perClientRunRows + " rows");
+ return run;
+ }
+ });
+ }
+ pool.shutdown();
+
+ for (int i = 0; i < threads.length; i++) {
+ try {
+ results[i] = threads[i].get();
+ } catch (ExecutionException e) {
+ throw new IOException(e.getCause());
+ }
+ }
+ final String test = cmd.getSimpleName();
+ LOG.info("[" + test + "] Summary of timings (ms): "
+ + Arrays.toString(results));
+ Arrays.sort(results);
+ long total = 0;
+ for (RunResult result : results) {
+ total += result.duration;
+ }
+ LOG.info("[" + test + "]"
+ + "\tMin: " + results[0] + "ms"
+ + "\tMax: " + results[results.length - 1] + "ms"
+ + "\tAvg: " + (total / results.length) + "ms");
+
+ con.close();
+ asyncCon.close();
+
+ return results;
+ }
+
+ /*
+ * Run a mapreduce job. Run as many maps as asked-for clients.
+ * Before we start up the job, write out an input file with instruction
+ * per client regards which row they are to start on.
+ * @param cmd Command to run.
+ * @throws IOException
+ */
+ static Job doMapReduce(TestOptions opts, final Configuration conf)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
+ assert cmd != null;
+ Path inputDir = writeInputFile(conf, opts);
+ conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
+ conf.set(EvaluationMapTask.PE_KEY, PerformanceEvaluation.class.getName());
+ Job job = Job.getInstance(conf);
+ job.setJarByClass(PerformanceEvaluation.class);
+ job.setJobName("HBase Performance Evaluation - " + opts.cmdName);
+
+ job.setInputFormatClass(NLineInputFormat.class);
+ NLineInputFormat.setInputPaths(job, inputDir);
+ // this is default, but be explicit about it just in case.
+ NLineInputFormat.setNumLinesPerSplit(job, 1);
+
+ job.setOutputKeyClass(LongWritable.class);
+ job.setOutputValueClass(LongWritable.class);
+
+ job.setMapperClass(EvaluationMapTask.class);
+ job.setReducerClass(LongSumReducer.class);
+
+ job.setNumReduceTasks(1);
+
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
+
+ TableMapReduceUtil.addDependencyJars(job);
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ Histogram.class, // yammer metrics
+ ObjectMapper.class); // jackson-mapper-asl
+
+ TableMapReduceUtil.initCredentials(job);
+
+ job.waitForCompletion(true);
+ return job;
+ }
+
+ /**
+ * Each client has one mapper to do the work, and client do the resulting count in a map task.
+ */
+
+ static String JOB_INPUT_FILENAME = "input.txt";
+
+ /*
+ * Write input file of offsets-per-client for the mapreduce job.
+ * @param c Configuration
+ * @return Directory that contains file written whose name is JOB_INPUT_FILENAME
+ * @throws IOException
+ */
+ static Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
+ return writeInputFile(c, opts, new Path("."));
+ }
+
+ static Path writeInputFile(final Configuration c, final TestOptions opts, final Path basedir)
+ throws IOException {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
+ Path jobdir = new Path(new Path(basedir, PERF_EVAL_DIR), formatter.format(new Date()));
+ Path inputDir = new Path(jobdir, "inputs");
+
+ FileSystem fs = FileSystem.get(c);
+ fs.mkdirs(inputDir);
+
+ Path inputFile = new Path(inputDir, JOB_INPUT_FILENAME);
+ PrintStream out = new PrintStream(fs.create(inputFile));
+ // Make input random.
+ Map<Integer, String> m = new TreeMap<>();
+ Hash h = MurmurHash.getInstance();
+ int perClientRows = (opts.totalRows / opts.numClientThreads);
+ try {
+ for (int j = 0; j < opts.numClientThreads; j++) {
+ TestOptions next = new TestOptions(opts);
+ next.startRow = j * perClientRows;
+ next.perClientRunRows = perClientRows;
+ String s = MAPPER.writeValueAsString(next);
+ LOG.info("Client=" + j + ", input=" + s);
+ byte[] b = Bytes.toBytes(s);
+ int hash = h.hash(new ByteArrayHashKey(b, 0, b.length), -1);
+ m.put(hash, s);
+ }
+ for (Map.Entry<Integer, String> e: m.entrySet()) {
+ out.println(e.getValue());
+ }
+ } finally {
+ out.close();
+ }
+ return inputDir;
+ }
+
+ /**
+ * Describes a command.
+ */
+ static class CmdDescriptor {
+ private Class<? extends TestBase> cmdClass;
+ private String name;
+ private String description;
+
+ CmdDescriptor(Class<? extends TestBase> cmdClass, String name, String description) {
+ this.cmdClass = cmdClass;
+ this.name = name;
+ this.description = description;
+ }
+
+ public Class<? extends TestBase> getCmdClass() {
+ return cmdClass;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+ }
+
+ /**
+ * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
+ * This makes tracking all these arguments a little easier.
+ * NOTE: ADDING AN OPTION, you need to add a data member, a getter/setter (to make JSON
+ * serialization of this TestOptions class behave), and you need to add to the clone constructor
+ * below copying your new option from the 'that' to the 'this'. Look for 'clone' below.
+ */
+ static class TestOptions {
+ String cmdName = null;
+ boolean nomapred = false;
+ boolean filterAll = false;
+ int startRow = 0;
+ float size = 1.0f;
+ int perClientRunRows = DEFAULT_ROWS_PER_GB;
+ int numClientThreads = 1;
+ int totalRows = DEFAULT_ROWS_PER_GB;
+ int measureAfter = 0;
+ float sampleRate = 1.0f;
+ double traceRate = 0.0;
+ String tableName = TABLE_NAME;
+ boolean flushCommits = true;
+ boolean writeToWAL = true;
+ boolean autoFlush = false;
+ boolean oneCon = false;
+ boolean useTags = false;
+ int noOfTags = 1;
+ boolean reportLatency = false;
+ int multiGet = 0;
+ int randomSleep = 0;
+ boolean inMemoryCF = false;
+ int presplitRegions = 0;
+ int replicas = HTableDescriptor.DEFAULT_REGION_REPLICATION;
+ String splitPolicy = null;
+ Compression.Algorithm compression = Compression.Algorithm.NONE;
+ BloomType bloomType = BloomType.ROW;
+ int blockSize = HConstants.DEFAULT_BLOCKSIZE;
+ DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
+ boolean valueRandom = false;
+ boolean valueZipf = false;
+ int valueSize = DEFAULT_VALUE_LENGTH;
+ int period = (this.perClientRunRows / 10) == 0? perClientRunRows: perClientRunRows / 10;
+ int cycles = 1;
+ int columns = 1;
+ int caching = 30;
+ boolean addColumns = true;
+ MemoryCompactionPolicy inMemoryCompaction =
+ MemoryCompactionPolicy.valueOf(
+ CompactingMemStore.COMPACTING_MEMSTORE_TYPE_DEFAULT);
+ boolean asyncPrefetch = false;
+ boolean cacheBlocks = true;
+ Scan.ReadType scanReadType = Scan.ReadType.DEFAULT;
+
+ public TestOptions() {}
+
+ /**
+ * Clone constructor.
+ * @param that Object to copy from.
+ */
+ public TestOptions(TestOptions that) {
+ this.cmdName = that.cmdName;
+ this.cycles = that.cycles;
+ this.nomapred = that.nomapred;
+ this.startRow = that.startRow;
+ this.size = that.size;
+ this.perClientRunRows = that.perClientRunRows;
+ this.numClientThreads = that.numClientThreads;
+ this.totalRows = that.totalRows;
+ this.sampleRate = that.sampleRate;
+ this.traceRate = that.traceRate;
+ this.tableName = that.tableName;
+ this.flushCommits = that.flushCommits;
+ this.writeToWAL = that.writeToWAL;
+ this.autoFlush = that.autoFlush;
+ this.oneCon = that.oneCon;
+ this.useTags = that.useTags;
+ this.noOfTags = that.noOfTags;
+ this.reportLatency = that.reportLatency;
+ this.multiGet = that.multiGet;
+ this.inMemoryCF = that.inMemoryCF;
+ this.presplitRegions = that.presplitRegions;
+ this.replicas = that.replicas;
+ this.splitPolicy = that.splitPolicy;
+ this.compression = that.compression;
+ this.blockEncoding = that.blockEncoding;
+ this.filterAll = that.filterAll;
+ this.bloomType = that.bloomType;
+ this.blockSize = that.blockSize;
+ this.valueRandom = that.valueRandom;
+ this.valueZipf = that.valueZipf;
+ this.valueSize = that.valueSize;
+ this.period = that.period;
+ this.randomSleep = that.randomSleep;
+ this.measureAfter = that.measureAfter;
+ this.addColumns = that.addColumns;
+ this.columns = that.columns;
+ this.caching = that.caching;
+ this.inMemoryCompaction = that.inMemoryCompaction;
+ this.asyncPrefetch = that.asyncPrefetch;
+ this.cacheBlocks = that.cacheBlocks;
+ this.scanReadType = that.scanReadType;
+ }
+
+ public int getCaching() {
+ return this.caching;
+ }
+
+ public void setCaching(final int caching) {
+ this.caching = caching;
+ }
+
+ public int getColumns() {
+ return this.columns;
+ }
+
+ public void setColumns(final int columns) {
+ this.columns = columns;
+ }
+
+ public int getCycles() {
+ return this.cycles;
+ }
+
+ public void setCycles(final int cycles) {
+ this.cycles = cycles;
+ }
+
+ public boolean isValueZipf() {
+ return valueZipf;
+ }
+
+ public void setValueZipf(boolean valueZipf) {
+ this.valueZipf = valueZipf;
+ }
+
+ public String getCmdName() {
+ return cmdName;
+ }
+
+ public void setCmdName(String cmdName) {
+ this.cmdName = cmdName;
+ }
+
+ public int getRandomSleep() {
+ return randomSleep;
+ }
+
+ public void setRandomSleep(int randomSleep) {
+ this.randomSleep = randomSleep;
+ }
+
+ public int getReplicas() {
+ return replicas;
+ }
+
+ public void setReplicas(int replicas) {
+ this.replicas = replicas;
+ }
+
+ public String getSplitPolicy() {
+ return splitPolicy;
+ }
+
+ public void setSplitPolicy(String splitPolicy) {
+ this.splitPolicy = splitPolicy;
+ }
+
+ public void setNomapred(boolean nomapred) {
+ this.nomapred = nomapred;
+ }
+
+ public void setFilterAll(boolean filterAll) {
+ this.filterAll = filterAll;
+ }
+
+ public void setStartRow(int startRow) {
+ this.startRow = startRow;
+ }
+
+ public void setSize(float size) {
+ this.size = size;
+ }
+
+ public void setPerClientRunRows(int perClientRunRows) {
+ this.perClientRunRows = perClientRunRows;
+ }
+
+ public void setNumClientThreads(int numClientThreads) {
+ this.numClientThreads = numClientThreads;
+ }
+
+ public void setTotalRows(int totalRows) {
+ this.totalRows = totalRows;
+ }
+
+ public void setSampleRate(float sampleRate) {
+ this.sampleRate = sampleRate;
+ }
+
+ public void setTraceRate(double traceRate) {
+ this.traceRate = traceRate;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public void setFlushCommits(boolean flushCommits) {
+ this.flushCommits = flushCommits;
+ }
+
+ public void setWriteToWAL(boolean writeToWAL) {
+ this.writeToWAL = writeToWAL;
+ }
+
+ public void setAutoFlush(boolean autoFlush) {
+ this.autoFlush = autoFlush;
+ }
+
+ public void setOneCon(boolean oneCon) {
+ this.oneCon = oneCon;
+ }
+
+ public void setUseTags(boolean useTags) {
+ this.useTags = useTags;
+ }
+
+ public void setNoOfTags(int noOfTags) {
+ this.noOfTags = noOfTags;
+ }
+
+ public void setReportLatency(boolean reportLatency) {
+ this.reportLatency = reportLatency;
+ }
+
+ public void setMultiGet(int multiGet) {
+ this.multiGet = multiGet;
+ }
+
+ public void setInMemoryCF(boolean inMemoryCF) {
+ this.inMemoryCF = inMemoryCF;
+ }
+
+ public void setPresplitRegions(int presplitRegions) {
+ this.presplitRegions = presplitRegions;
+ }
+
+ public void setCompression(Compression.Algorithm compression) {
+ this.compression = compression;
+ }
+
+ public void setBloomType(BloomType bloomType) {
+ this.bloomType = bloomType;
+ }
+
+ public void setBlockSize(int blockSize) {
+ this.blockSize = blockSize;
+ }
+
+ public void setBlockEncoding(DataBlockEncoding blockEncoding) {
+ this.blockEncoding = blockEncoding;
+ }
+
+ public void setValueRandom(boolean valueRandom) {
+ this.valueRandom = valueRandom;
+ }
+
+ public void setValueSize(int valueSize) {
+ this.valueSize = valueSize;
+ }
+
+ public void setPeriod(int period) {
+ this.period = period;
+ }
+
+ public boolean isNomapred() {
+ return nomapred;
+ }
+
+ public boolean isFilterAll() {
+ return filterAll;
+ }
+
+ public int getStartRow() {
+ return startRow;
+ }
+
+ public float getSize() {
+ return size;
+ }
+
+ public int getPerClientRunRows() {
+ return perClientRunRows;
+ }
+
+ public int getNumClientThreads() {
+ return numClientThreads;
+ }
+
+ public int getTotalRows() {
+ return totalRows;
+ }
+
+ public float getSampleRate() {
+ return sampleRate;
+ }
+
+ public double getTraceRate() {
+ return traceRate;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public boolean isFlushCommits() {
+ return flushCommits;
+ }
+
+ public boolean isWriteToWAL() {
+ return writeToWAL;
+ }
+
+ public boolean isAutoFlush() {
+ return autoFlush;
+ }
+
+ public boolean isUseTags() {
+ return useTags;
+ }
+
+ public int getNoOfTags() {
+ return noOfTags;
+ }
+
+ public boolean isReportLatency() {
+ return reportLatency;
+ }
+
+ public int getMultiGet() {
+ return multiGet;
+ }
+
+ public boolean isInMemoryCF() {
+ return inMemoryCF;
+ }
+
+ public int getPresplitRegions() {
+ return presplitRegions;
+ }
+
+ public Compression.Algorithm getCompression() {
+ return compression;
+ }
+
+ public DataBlockEncoding getBlockEncoding() {
+ return blockEncoding;
+ }
+
+ public boolean isValueRandom() {
+ return valueRandom;
+ }
+
+ public int getValueSize() {
+ return valueSize;
+ }
+
+ public int getPeriod() {
+ return period;
+ }
+
+ public BloomType getBloomType() {
+ return bloomType;
+ }
+
+ public int getBlockSize() {
+ return blockSize;
+ }
+
+ public boolean isOneCon() {
+ return oneCon;
+ }
+
+ public int getMeasureAfter() {
+ return measureAfter;
+ }
+
+ public void setMeasureAfter(int measureAfter) {
+ this.measureAfter = measureAfter;
+ }
+
+ public boolean getAddColumns() {
+ return addColumns;
+ }
+
+ public void setAddColumns(boolean addColumns) {
+ this.addColumns = addColumns;
+ }
+
+ public void setInMemoryCompaction(MemoryCompactionPolicy inMemoryCompaction) {
+ this.inMemoryCompaction = inMemoryCompaction;
+ }
+
+ public MemoryCompactionPolicy getInMemoryCompaction() {
+ return this.inMemoryCompaction;
+ }
+ }
+
+ /*
+ * A test.
+ * Subclass to particularize what happens per row.
+ */
+ static abstract class TestBase {
+ // Below is make it so when Tests are all running in the one
+ // jvm, that they each have a differently seeded Random.
+ private static final Random randomSeed = new Random(System.currentTimeMillis());
+
+ private static long nextRandomSeed() {
+ return randomSeed.nextLong();
+ }
+ private final int everyN;
+
+ protected final Random rand = new Random(nextRandomSeed());
+ protected final Configuration conf;
+ protected final TestOptions opts;
+
+ private final Status status;
+ private final Sampler<?> traceSampler;
+ private final SpanReceiverHost receiverHost;
+
+ private String testName;
+ private Histogram latencyHistogram;
+ private Histogram valueSizeHistogram;
+ private RandomDistribution.Zipf zipf;
+
+ /**
+ * Note that all subclasses of this class must provide a public constructor
+ * that has the exact same list of arguments.
+ */
+ TestBase(final Configuration conf, final TestOptions options, final Status status) {
+ this.conf = conf;
+ this.receiverHost = this.conf == null? null: SpanReceiverHost.getInstance(conf);
+ this.opts = options;
+ this.status = status;
+ this.testName = this.getClass().getSimpleName();
+ if (options.traceRate >= 1.0) {
+ this.traceSampler = Sampler.ALWAYS;
+ } else if (options.traceRate > 0.0) {
+ conf.setDouble("hbase.sampler.fraction", options.traceRate);
+ this.traceSampler = new ProbabilitySampler(new HBaseHTraceConfiguration(conf));
+ } else {
+ this.traceSampler = Sampler.NEVER;
+ }
+ everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
+ if (options.isValueZipf()) {
+ this.zipf = new RandomDistribution.Zipf(this.rand, 1, options.getValueSize(), 1.2);
+ }
+ LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
+ }
+
+ int getValueLength(final Random r) {
+ if (this.opts.isValueRandom()) return Math.abs(r.nextInt() % opts.valueSize);
+ else if (this.opts.isValueZipf()) return Math.abs(this.zipf.nextInt());
+ else return opts.valueSize;
+ }
+
+ void updateValueSize(final Result [] rs) throws IOException {
+ if (rs == null || !isRandomValueSize()) return;
+ for (Result r: rs) updateValueSize(r);
+ }
+
+ void updateValueSize(final Result r) throws IOException {
+ if (r == null || !isRandomValueSize()) return;
+ int size = 0;
+ for (CellScanner scanner = r.cellScanner(); scanner.advance();) {
+ size += scanner.current().getValueLength();
+ }
+ updateValueSize(size);
+ }
+
+ void updateValueSize(final int valueSize) {
+ if (!isRandomValueSize()) return;
+ this.valueSizeHistogram.update(valueSize);
+ }
+
+ String generateStatus(final int sr, final int i, final int lr) {
+ return sr + "/" + i + "/" + lr + ", latency " + getShortLatencyReport() +
+ (!isRandomValueSize()? "": ", value size " + getShortValueSizeReport());
+ }
+
+ boolean isRandomValueSize() {
+ return opts.valueRandom;
+ }
+
+ protected int getReportingPeriod() {
+ return opts.period;
+ }
+
+ /**
+ * Populated by testTakedown. Only implemented by RandomReadTest at the moment.
+ */
+ public Histogram getLatencyHistogram() {
+ return latencyHistogram;
+ }
+
+ void testSetup() throws IOException {
+ createConnection();
+ onStartup();
+ latencyHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
+ valueSizeHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
+ }
+
+ abstract void createConnection() throws IOException;
+
+ abstract void onStartup() throws IOException;
+
+ void testTakedown() throws IOException {
+ onTakedown();
+ // Print all stats for this thread continuously.
+ // Synchronize on Test.class so different threads don't intermingle the
+ // output. We can't use 'this' here because each thread has its own instance of Test class.
+ synchronized (Test.class) {
+ status.setStatus("Test : " + testName + ", Thread : " + Thread.currentThread().getName());
+ status.setStatus("Latency (us) : " + YammerHistogramUtils.getHistogramReport(
+ latencyHistogram));
+ status.setStatus("Num measures (latency) : " + latencyHistogram.getCount());
+ status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(latencyHistogram));
+ status.setStatus("ValueSize (bytes) : "
+ + YammerHistogramUtils.getHistogramReport(valueSizeHistogram));
+ status.setStatus("Num measures (ValueSize): " + valueSizeHistogram.getCount());
+ status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(valueSizeHistogram));
+ }
+ closeConnection();
+ receiverHost.closeReceivers();
+ }
+
+ abstract void onTakedown() throws IOException;
+
+ abstract void closeConnection() throws IOException;
+
+ /*
+ * Run test
+ * @return Elapsed time.
+ * @throws IOException
+ */
+ long test() throws IOException, InterruptedException {
+ testSetup();
+ LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
+ final long startTime = System.nanoTime();
+ try {
+ testTimed();
+ } finally {
+ testTakedown();
+ }
+ return (System.nanoTime() - startTime) / 1000000;
+ }
+
+ int getStartRow() {
+ return opts.startRow;
+ }
+
+ int getLastRow() {
+ return getStartRow() + opts.perClientRunRows;
+ }
+
+ /**
+ * Provides an extension point for tests that don't want a per row invocation.
+ */
+ void testTimed() throws IOException, InterruptedException {
+ int startRow = getStartRow();
+ int lastRow = getLastRow();
+ // Report on completion of 1/10th of total.
+ for (int ii = 0; ii < opts.cycles; ii++) {
+ if (opts.cycles > 1) LOG.info("Cycle=" + ii + " of " + opts.cycles);
+ for (int i = startRow; i < lastRow; i++) {
+ if (i % everyN != 0) continue;
+ long startTime = System.nanoTime();
+ TraceScope scope = Trace.startSpan("test row", traceSampler);
+ try {
+ testRow(i);
+ } finally {
+ scope.close();
+ }
+ if ( (i - startRow) > opts.measureAfter) {
+ // If multiget is enabled, say set to 10, testRow() returns immediately first 9 times
+ // and sends the actual get request in the 10th iteration. We should only set latency
+ // when actual request is sent because otherwise it turns out to be 0.
+ if (opts.multiGet == 0 || (i - startRow + 1) % opts.multiGet == 0) {
+ latencyHistogram.update((System.nanoTime() - startTime) / 1000);
+ }
+ if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
+ status.setStatus(generateStatus(startRow, i, lastRow));
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * @return Subset of the histograms' calculation.
+ */
+ public String getShortLatencyReport() {
+ return YammerHistogramUtils.getShortHistogramReport(this.latencyHistogram);
+ }
+
+ /**
+ * @return Subset of the histograms' calculation.
+ */
+ public String getShortValueSizeReport() {
+ return YammerHistogramUtils.getShortHistogramReport(this.valueSizeHistogram);
+ }
+
+ /*
+ * Test for individual row.
+ * @param i Row index.
+ */
+ abstract void testRow(final int i) throws IOException, InterruptedException;
+ }
+
+ static abstract class Test extends TestBase {
+ protected Connection connection;
+
+ Test(final Connection con, final TestOptions options, final Status status) {
+ super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
+ this.connection = con;
+ }
+
+ @Override
+ void createConnection() throws IOException {
+ if (!opts.isOneCon()) {
+ this.connection = ConnectionFactory.createConnection(conf);
+ }
+ }
+
+ @Override
+ void closeConnection() throws IOException {
+ if (!opts.isOneCon()) {
+ this.connection.close();
+ }
+ }
+ }
+
+ static abstract class AsyncTest extends TestBase {
+ protected AsyncConnection connection;
+
+ AsyncTest(final AsyncConnection con, final TestOptions options, final Status status) {
+ super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
+ this.connection = con;
+ }
+
+ @Override
+ void createConnection() {
+ if (!opts.isOneCon()) {
+ try {
+ this.connection = ConnectionFactory.createAsyncConnection(conf).get();
+ } catch (InterruptedException | ExecutionException e) {
+ LOG.error("Failed to create async connection", e);
+ }
+ }
+ }
+
+ @Override
+ void closeConnection() throws IOException {
+ if (!opts.isOneCon()) {
+ this.connection.close();
+ }
+ }
+ }
+
+ static abstract class TableTest extends Test {
+ protected Table table;
+
+ TableTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void onStartup() throws IOException {
+ this.table = connection.getTable(TableName.valueOf(opts.tableName));
+ }
+
+ @Override
+ void onTakedown() throws IOException {
+ table.close();
+ }
+ }
+
+ static abstract class AsyncTableTest extends AsyncTest {
+ protected RawAsyncTable table;
+
+ AsyncTableTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void onStartup() throws IOException {
+ this.table = connection.getRawTable(TableName.valueOf(opts.tableName));
+ }
+
+ @Override
+ void onTakedown() throws IOException {
+ }
+ }
+
+ static class AsyncRandomReadTest extends AsyncTableTest {
+ private final Consistency consistency;
+ private ArrayList<Get> gets;
+ private Random rd = new Random();
+
+ AsyncRandomReadTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
+ if (opts.multiGet > 0) {
+ LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
+ this.gets = new ArrayList<>(opts.multiGet);
+ }
+ }
+
+ @Override
+ void testRow(final int i) throws IOException, InterruptedException {
+ if (opts.randomSleep > 0) {
+ Thread.sleep(rd.nextInt(opts.randomSleep));
+ }
+ Get get = new Get(getRandomRow(this.rand, opts.totalRows));
+ if (opts.addColumns) {
+ get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ get.addFamily(FAMILY_NAME);
+ }
+ if (opts.filterAll) {
+ get.setFilter(new FilterAllFilter());
+ }
+ get.setConsistency(consistency);
+ if (LOG.isTraceEnabled()) LOG.trace(get.toString());
+ try {
+ if (opts.multiGet > 0) {
+ this.gets.add(get);
+ if (this.gets.size() == opts.multiGet) {
+ Result[] rs =
+ this.table.get(this.gets).stream().map(f -> propagate(f::get)).toArray(Result[]::new);
+ updateValueSize(rs);
+ this.gets.clear();
+ }
+ } else {
+ updateValueSize(this.table.get(get).get());
+ }
+ } catch (ExecutionException e) {
+ throw new IOException(e);
+ }
+ }
+
+ public static RuntimeException runtime(Throwable e) {
+ if (e instanceof RuntimeException) {
+ return (RuntimeException) e;
+ }
+ return new RuntimeException(e);
+ }
+
+ public static <V> V propagate(Callable<V> callable) {
+ try {
+ return callable.call();
+ } catch (Exception e) {
+ throw runtime(e);
+ }
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ int period = opts.perClientRunRows / 10;
+ return period == 0 ? opts.perClientRunRows : period;
+ }
+
+ @Override
+ protected void testTakedown() throws IOException {
+ if (this.gets != null && this.gets.size() > 0) {
+ this.table.get(gets);
+ this.gets.clear();
+ }
+ super.testTakedown();
+ }
+ }
+
+ static class AsyncRandomWriteTest extends AsyncTableTest {
+ AsyncRandomWriteTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException, InterruptedException {
+ byte[] row = getRandomRow(this.rand, opts.totalRows);
+ Put put = new Put(row);
+ for (int column = 0; column < opts.columns; column++) {
+ byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
+ byte[] value = generateData(this.rand, getValueLength(this.rand));
+ if (opts.useTags) {
+ byte[] tag = generateData(this.rand, TAG_LENGTH);
+ Tag[] tags = new Tag[opts.noOfTags];
+ for (int n = 0; n < opts.noOfTags; n++) {
+ Tag t = new ArrayBackedTag((byte) n, tag);
+ tags[n] = t;
+ }
+ KeyValue kv =
+ new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP, value, tags);
+ put.add(kv);
+ updateValueSize(kv.getValueLength());
+ } else {
+ put.addColumn(FAMILY_NAME, qualifier, value);
+ updateValueSize(value.length);
+ }
+ }
+ put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+ try {
+ table.put(put).get();
+ } catch (ExecutionException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ static class AsyncScanTest extends AsyncTableTest {
+ private ResultScanner testScanner;
+ private AsyncTable asyncTable;
+
+ AsyncScanTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void onStartup() throws IOException {
+ this.asyncTable =
+ connection.getTable(TableName.valueOf(opts.tableName),
+ Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
+ }
+
+ @Override
+ void testTakedown() throws IOException {
+ if (this.testScanner != null) {
+ this.testScanner.close();
+ }
+ super.testTakedown();
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ if (this.testScanner == null) {
+ Scan scan =
+ new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
+ .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+ .setReadType(opts.scanReadType);
+ if (opts.addColumns) {
+ scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ scan.addFamily(FAMILY_NAME);
+ }
+ if (opts.filterAll) {
+ scan.setFilter(new FilterAllFilter());
+ }
+ this.testScanner = asyncTable.getScanner(scan);
+ }
+ Result r = testScanner.next();
+ updateValueSize(r);
+ }
+ }
+
+ static class AsyncSequentialReadTest extends AsyncTableTest {
+ AsyncSequentialReadTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException, InterruptedException {
+ Get get = new Get(format(i));
+ if (opts.addColumns) {
+ get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ }
+ if (opts.filterAll) {
+ get.setFilter(new FilterAllFilter());
+ }
+ try {
+ updateValueSize(table.get(get).get());
+ } catch (ExecutionException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ static class AsyncSequentialWriteTest extends AsyncTableTest {
+ AsyncSequentialWriteTest(AsyncConnection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException, InterruptedException {
+ byte[] row = format(i);
+ Put put = new Put(row);
+ for (int column = 0; column < opts.columns; column++) {
+ byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+ byte[] value = generateData(this.rand, getValueLength(this.rand));
+ if (opts.useTags) {
+ byte[] tag = generateData(this.rand, TAG_LENGTH);
+ Tag[] tags = new Tag[opts.noOfTags];
+ for (int n = 0; n < opts.noOfTags; n++) {
+ Tag t = new ArrayBackedTag((byte) n, tag);
+ tags[n] = t;
+ }
+ KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+ value, tags);
+ put.add(kv);
+ updateValueSize(kv.getValueLength());
+ } else {
+ put.addColumn(FAMILY_NAME, qualifier, value);
+ updateValueSize(value.length);
+ }
+ }
+ put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+ try {
+ table.put(put).get();
+ } catch (ExecutionException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ static abstract class BufferedMutatorTest extends Test {
+ protected BufferedMutator mutator;
+ protected Table table;
+
+ BufferedMutatorTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void onStartup() throws IOException {
+ this.mutator = connection.getBufferedMutator(TableName.valueOf(opts.tableName));
+ this.table = connection.getTable(TableName.valueOf(opts.tableName));
+ }
+
+ @Override
+ void onTakedown() throws IOException {
+ mutator.close();
+ table.close();
+ }
+ }
+
+ static class RandomSeekScanTest extends TableTest {
+ RandomSeekScanTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ Scan scan = new Scan().withStartRow(getRandomRow(this.rand, opts.totalRows))
+ .setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+ .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
+ FilterList list = new FilterList();
+ if (opts.addColumns) {
+ scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ scan.addFamily(FAMILY_NAME);
+ }
+ if (opts.filterAll) {
+ list.addFilter(new FilterAllFilter());
+ }
+ list.addFilter(new WhileMatchFilter(new PageFilter(120)));
+ scan.setFilter(list);
+ ResultScanner s = this.table.getScanner(scan);
+ for (Result rr; (rr = s.next()) != null;) {
+ updateValueSize(rr);
+ }
+ s.close();
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ int period = opts.perClientRunRows / 100;
+ return period == 0 ? opts.perClientRunRows : period;
+ }
+
+ }
+
+ static abstract class RandomScanWithRangeTest extends TableTest {
+ RandomScanWithRangeTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
+ Scan scan = new Scan().withStartRow(startAndStopRow.getFirst())
+ .withStopRow(startAndStopRow.getSecond()).setCaching(opts.caching)
+ .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+ .setReadType(opts.scanReadType);
+ if (opts.filterAll) {
+ scan.setFilter(new FilterAllFilter());
+ }
+ if (opts.addColumns) {
+ scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ scan.addFamily(FAMILY_NAME);
+ }
+ Result r = null;
+ int count = 0;
+ ResultScanner s = this.table.getScanner(scan);
+ for (; (r = s.next()) != null;) {
+ updateValueSize(r);
+ count++;
+ }
+ if (i % 100 == 0) {
+ LOG.info(String.format("Scan for key range %s - %s returned %s rows",
+ Bytes.toString(startAndStopRow.getFirst()),
+ Bytes.toString(startAndStopRow.getSecond()), count));
+ }
+
+ s.close();
+ }
+
+ protected abstract Pair<byte[],byte[]> getStartAndStopRow();
+
+ protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
+ int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
+ int stop = start + maxRange;
+ return new Pair<>(format(start), format(stop));
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ int period = opts.perClientRunRows / 100;
+ return period == 0? opts.perClientRunRows: period;
+ }
+ }
+
+ static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
+ RandomScanWithRange10Test(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ protected Pair<byte[], byte[]> getStartAndStopRow() {
+ return generateStartAndStopRows(10);
+ }
+ }
+
+ static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
+ RandomScanWithRange100Test(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ protected Pair<byte[], byte[]> getStartAndStopRow() {
+ return generateStartAndStopRows(100);
+ }
+ }
+
+ static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
+ RandomScanWithRange1000Test(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ protected Pair<byte[], byte[]> getStartAndStopRow() {
+ return generateStartAndStopRows(1000);
+ }
+ }
+
+ static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
+ RandomScanWithRange10000Test(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ protected Pair<byte[], byte[]> getStartAndStopRow() {
+ return generateStartAndStopRows(10000);
+ }
+ }
+
+ static class RandomReadTest extends TableTest {
+ private final Consistency consistency;
+ private ArrayList<Get> gets;
+ private Random rd = new Random();
+
+ RandomReadTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
+ if (opts.multiGet > 0) {
+ LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
+ this.gets = new ArrayList<>(opts.multiGet);
+ }
+ }
+
+ @Override
+ void testRow(final int i) throws IOException, InterruptedException {
+ if (opts.randomSleep > 0) {
+ Thread.sleep(rd.nextInt(opts.randomSleep));
+ }
+ Get get = new Get(getRandomRow(this.rand, opts.totalRows));
+ if (opts.addColumns) {
+ get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ get.addFamily(FAMILY_NAME);
+ }
+ if (opts.filterAll) {
+ get.setFilter(new FilterAllFilter());
+ }
+ get.setConsistency(consistency);
+ if (LOG.isTraceEnabled()) LOG.trace(get.toString());
+ if (opts.multiGet > 0) {
+ this.gets.add(get);
+ if (this.gets.size() == opts.multiGet) {
+ Result [] rs = this.table.get(this.gets);
+ updateValueSize(rs);
+ this.gets.clear();
+ }
+ } else {
+ updateValueSize(this.table.get(get));
+ }
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ int period = opts.perClientRunRows / 10;
+ return period == 0 ? opts.perClientRunRows : period;
+ }
+
+ @Override
+ protected void testTakedown() throws IOException {
+ if (this.gets != null && this.gets.size() > 0) {
+ this.table.get(gets);
+ this.gets.clear();
+ }
+ super.testTakedown();
+ }
+ }
+
+ static class RandomWriteTest extends BufferedMutatorTest {
+ RandomWriteTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte[] row = getRandomRow(this.rand, opts.totalRows);
+ Put put = new Put(row);
+ for (int column = 0; column < opts.columns; column++) {
+ byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+ byte[] value = generateData(this.rand, getValueLength(this.rand));
+ if (opts.useTags) {
+ byte[] tag = generateData(this.rand, TAG_LENGTH);
+ Tag[] tags = new Tag[opts.noOfTags];
+ for (int n = 0; n < opts.noOfTags; n++) {
+ Tag t = new ArrayBackedTag((byte) n, tag);
+ tags[n] = t;
+ }
+ KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+ value, tags);
+ put.add(kv);
+ updateValueSize(kv.getValueLength());
+ } else {
+ put.addColumn(FAMILY_NAME, qualifier, value);
+ updateValueSize(value.length);
+ }
+ }
+ put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+ if (opts.autoFlush) {
+ table.put(put);
+ } else {
+ mutator.mutate(put);
+ }
+ }
+ }
+
+ static class ScanTest extends TableTest {
+ private ResultScanner testScanner;
+
+ ScanTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testTakedown() throws IOException {
+ if (this.testScanner != null) {
+ this.testScanner.close();
+ }
+ super.testTakedown();
+ }
+
+
+ @Override
+ void testRow(final int i) throws IOException {
+ if (this.testScanner == null) {
+ Scan scan = new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
+ .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
+ .setReadType(opts.scanReadType);
+ if (opts.addColumns) {
+ scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ scan.addFamily(FAMILY_NAME);
+ }
+ if (opts.filterAll) {
+ scan.setFilter(new FilterAllFilter());
+ }
+ this.testScanner = table.getScanner(scan);
+ }
+ Result r = testScanner.next();
+ updateValueSize(r);
+ }
+ }
+
+ /**
+ * Base class for operations that are CAS-like; that read a value and then set it based off what
+ * they read. In this category is increment, append, checkAndPut, etc.
+ *
+ * <p>These operations also want some concurrency going on. Usually when these tests run, they
+ * operate in their own part of the key range. In CASTest, we will have them all overlap on the
+ * same key space. We do this with our getStartRow and getLastRow overrides.
+ */
+ static abstract class CASTableTest extends TableTest {
+ private final byte [] qualifier;
+ CASTableTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ qualifier = Bytes.toBytes(this.getClass().getSimpleName());
+ }
+
+ byte [] getQualifier() {
+ return this.qualifier;
+ }
+
+ @Override
+ int getStartRow() {
+ return 0;
+ }
+
+ @Override
+ int getLastRow() {
+ return opts.perClientRunRows;
+ }
+ }
+
+ static class IncrementTest extends CASTableTest {
+ IncrementTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ Increment increment = new Increment(format(i));
+ increment.addColumn(FAMILY_NAME, getQualifier(), 1l);
+ updateValueSize(this.table.increment(increment));
+ }
+ }
+
+ static class AppendTest extends CASTableTest {
+ AppendTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte [] bytes = format(i);
+ Append append = new Append(bytes);
+ append.addColumn(FAMILY_NAME, getQualifier(), bytes);
+ updateValueSize(this.table.append(append));
+ }
+ }
+
+ static class CheckAndMutateTest extends CASTableTest {
+ CheckAndMutateTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte [] bytes = format(i);
+ // Put a known value so when we go to check it, it is there.
+ Put put = new Put(bytes);
+ put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+ this.table.put(put);
+ RowMutations mutations = new RowMutations(bytes);
+ mutations.add(put);
+ this.table.checkAndMutate(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes,
+ mutations);
+ }
+ }
+
+ static class CheckAndPutTest extends CASTableTest {
+ CheckAndPutTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte [] bytes = format(i);
+ // Put a known value so when we go to check it, it is there.
+ Put put = new Put(bytes);
+ put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+ this.table.put(put);
+ this.table.checkAndPut(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, put);
+ }
+ }
+
+ static class CheckAndDeleteTest extends CASTableTest {
+ CheckAndDeleteTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte [] bytes = format(i);
+ // Put a known value so when we go to check it, it is there.
+ Put put = new Put(bytes);
+ put.addColumn(FAMILY_NAME, getQualifier(), bytes);
+ this.table.put(put);
+ Delete delete = new Delete(put.getRow());
+ delete.addColumn(FAMILY_NAME, getQualifier());
+ this.table.checkAndDelete(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, delete);
+ }
+ }
+
+ static class SequentialReadTest extends TableTest {
+ SequentialReadTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ Get get = new Get(format(i));
+ if (opts.addColumns) {
+ get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ }
+ if (opts.filterAll) {
+ get.setFilter(new FilterAllFilter());
+ }
+ updateValueSize(table.get(get));
+ }
+ }
+
+ static class SequentialWriteTest extends BufferedMutatorTest {
+ SequentialWriteTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(final int i) throws IOException {
+ byte[] row = format(i);
+ Put put = new Put(row);
+ for (int column = 0; column < opts.columns; column++) {
+ byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
+ byte[] value = generateData(this.rand, getValueLength(this.rand));
+ if (opts.useTags) {
+ byte[] tag = generateData(this.rand, TAG_LENGTH);
+ Tag[] tags = new Tag[opts.noOfTags];
+ for (int n = 0; n < opts.noOfTags; n++) {
+ Tag t = new ArrayBackedTag((byte) n, tag);
+ tags[n] = t;
+ }
+ KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
+ value, tags);
+ put.add(kv);
+ updateValueSize(kv.getValueLength());
+ } else {
+ put.addColumn(FAMILY_NAME, qualifier, value);
+ updateValueSize(value.length);
+ }
+ }
+ put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
+ if (opts.autoFlush) {
+ table.put(put);
+ } else {
+ mutator.mutate(put);
+ }
+ }
+ }
+
+ static class FilteredScanTest extends TableTest {
+ protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
+
+ FilteredScanTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testRow(int i) throws IOException {
+ byte[] value = generateData(this.rand, getValueLength(this.rand));
+ Scan scan = constructScan(value);
+ ResultScanner scanner = null;
+ try {
+ scanner = this.table.getScanner(scan);
+ for (Result r = null; (r = scanner.next()) != null;) {
+ updateValueSize(r);
+ }
+ } finally {
+ if (scanner != null) scanner.close();
+ }
+ }
+
+ protected Scan constructScan(byte[] valuePrefix) throws IOException {
+ FilterList list = new FilterList();
+ Filter filter = new SingleColumnValueFilter(
+ FAMILY_NAME, COLUMN_ZERO, CompareFilter.CompareOp.EQUAL,
+ new BinaryComparator(valuePrefix)
+ );
+ list.addFilter(filter);
+ if(opts.filterAll) {
+ list.addFilter(new FilterAllFilter());
+ }
+ Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+ .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
+ if (opts.addColumns) {
+ scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
+ } else {
+ scan.addFamily(FAMILY_NAME);
+ }
+ scan.setFilter(list);
+ return scan;
+ }
+ }
+
+ /**
+ * Compute a throughput rate in MB/s.
+ * @param rows Number of records consumed.
+ * @param timeMs Time taken in milliseconds.
+ * @return String value with label, ie '123.76 MB/s'
+ */
+ private static String calculateMbps(int rows, long timeMs, final int valueSize, int columns) {
+ BigDecimal rowSize = BigDecimal.valueOf(ROW_LENGTH +
+ ((valueSize + FAMILY_NAME.length + COLUMN_ZERO.length) * columns));
+ BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
+ .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
+ .divide(BYTES_PER_MB, CXT);
+ return FMT.format(mbps) + " MB/s";
+ }
+
+ /*
+ * Format passed integer.
+ * @param number
+ * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
+ * number (Does absolute in case number is negative).
+ */
+ public static byte [] format(final int number) {
+ byte [] b = new byte[ROW_LENGTH];
+ int d = Math.abs(number);
+ for (int i = b.length - 1; i >= 0; i--) {
+ b[i] = (byte)((d % 10) + '0');
+ d /= 10;
+ }
+ return b;
+ }
+
+ /*
+ * This method takes some time and is done inline uploading data. For
+ * example, doing the mapfile test, generation of the key and value
+ * consumes about 30% of CPU time.
+ * @return Generated random value to insert into a table cell.
+ */
+ public static byte[] generateData(final Random r, int length) {
+ byte [] b = new byte [length];
+ int i;
+
+ for(i = 0; i < (length-8); i += 8) {
+ b[i] = (byte) (65 + r.nextInt(26));
+ b[i+1] = b[i];
+ b[i+2] = b[i];
+ b[i+3] = b[i];
+ b[i+4] = b[i];
+ b[i+5] = b[i];
+ b[i+6] = b[i];
+ b[i+7] = b[i];
+ }
+
+ byte a = (byte) (65 + r.nextInt(26));
+ for(; i < length; i++) {
+ b[i] = a;
+ }
+ return b;
+ }
+
+ static byte [] getRandomRow(final Random random, final int totalRows) {
+ return format(generateRandomRow(random, totalRows));
+ }
+
+ static int generateRandomRow(final Random random, final int totalRows) {
+ return random.nextInt(Integer.MAX_VALUE) % totalRows;
+ }
+
+ static RunResult runOneClient(final Class<? extends TestBase> cmd, Configuration conf,
+ Connection con, AsyncConnection asyncCon, TestOptions opts, final Status status)
+ throws IOException, InterruptedException {
+ status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for "
+ + opts.perClientRunRows + " rows");
+ long totalElapsedTime;
+
+ final TestBase t;
+ try {
+ if (AsyncTest.class.isAssignableFrom(cmd)) {
+ Class<? extends AsyncTest> newCmd = (Class<? extends AsyncTest>) cmd;
+ Constructor<? extends AsyncTest> constructor =
+ newCmd.getDeclaredConstructor(AsyncConnection.class, TestOptions.class, Status.class);
+ t = constructor.newInstance(asyncCon, opts, status);
+ } else {
+ Class<? extends Test> newCmd = (Class<? extends Test>) cmd;
+ Constructor<? extends Test> constructor =
+ newCmd.getDeclaredConstructor(Connection.class, TestOptions.class, Status.class);
+ t = constructor.newInstance(con, opts, status);
+ }
+ } catch (NoSuchMethodException e) {
+ throw new IllegalArgumentException("Invalid command class: " + cmd.getName()
+ + ". It does not provide a constructor as described by "
+ + "the javadoc comment. Available constructors are: "
+ + Arrays.toString(cmd.getConstructors()));
+ } catch (Exception e) {
+ throw new IllegalStateException("Failed to construct command class", e);
+ }
+ totalElapsedTime = t.test();
+
+ status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
+ "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
+ " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime,
+ getAverageValueLength(opts), opts.columns) + ")");
+
+ return new RunResult(totalElapsedTime, t.getLatencyHistogram());
+ }
+
+ private static int getAverageValueLength(final TestOptions opts) {
+ return opts.valueRandom? opts.valueSize/2: opts.valueSize;
+ }
+
+ private void runTest(final Class<? extends TestBase> cmd, TestOptions opts) throws IOException,
+ InterruptedException, ClassNotFoundException, ExecutionException {
+ // Log the configuration we're going to run with. Uses JSON mapper because lazy. It'll do
+ // the TestOptions introspection for us and dump the output in a readable format.
+ LOG.info(cmd.getSimpleName() + " test run options=" + MAPPER.writeValueAsString(opts));
+ Admin admin = null;
+ Connection connection = null;
+ try {
+ connection = ConnectionFactory.createConnection(getConf());
+ admin = connection.getAdmin();
+ checkTable(admin, opts);
+ } finally {
+ if (admin != null) admin.close();
+ if (connection != null) connection.close();
+ }
+ if (opts.nomapred) {
+ doLocalClients(opts, getConf());
+ } else {
+ doMapReduce(opts, getConf());
+ }
+ }
+
+ protected void printUsage() {
+ printUsage(this.getClass().getName(), null);
+ }
+
+ protected static void printUsage(final String message) {
+ printUsage(PerformanceEvaluation.class.getName(), message);
+ }
+
+ protected static void printUsageAndExit(final String message, final int exitCode) {
+ printUsage(message);
+ System.exit(exitCode);
+ }
+
+ protected static void printUsage(final String className, final String message) {
+ if (message != null && message.length() > 0) {
+ System.err.println(message);
+ }
+ System.err.println("Usage: java " + className + " \\");
+ System.err.println(" <OPTIONS> [-D<property=value>]* <command> <nclients>");
+ System.err.println();
+ System.err.println("General Options:");
+ System.err.println(" nomapred Run multiple clients using threads " +
+ "(rather than use mapreduce)");
+ System.err.println(" oneCon all the threads share the same connection. Default: False");
+ System.err.println(" sampleRate Execute test on a sample of total " +
+ "rows. Only supported by randomRead. Default: 1.0");
+ System.err.println(" period Report every 'period' rows: " +
+ "Default: opts.perClientRunRows / 10 = " + DEFAULT_OPTS.getPerClientRunRows()/10);
+ System.err.println(" cycles How many times to cycle the test. Defaults: 1.");
+ System.err.println(" traceRate Enable HTrace spans. Initiate tracing every N rows. " +
+ "Default: 0");
+ System.err.println(" latency Set to report operation latencies. Default: False");
+ System.err.println(" measureAfter Start to measure the latency once 'measureAfter'" +
+ " rows have been treated. Default: 0");
+ System.err.println(" valueSize Pass value size to use: Default: "
+ + DEFAULT_OPTS.getValueSize());
+ System.err.println(" valueRandom Set if we should vary value size between 0 and " +
+ "'valueSize'; set on read for stats on size: Default: Not set.");
+ System.err.println(" blockEncoding Block encoding to use. Value should be one of "
+ + Arrays.toString(DataBlockEncoding.values()) + ". Default: NONE");
+ System.err.println();
+ System.err.println("Table Creation / Write Tests:");
+ System.err.println(" table Alternate table name. Default: 'TestTable'");
+ System.err.println(" rows Rows each client runs. Default: "
+ + DEFAULT_OPTS.getPerClientRunRows()
+ + ". In case of randomReads and randomSeekScans this could"
+ + " be specified along with --size to specify the number of rows to be scanned within"
+ + " the total range specified by the size.");
+ System.err.println(
+ " size Total size in GiB. Mutually exclusive with --rows for writes and scans"
+ + ". But for randomReads and randomSeekScans when you use size with --rows you could"
+ + " use size to specify the end range and --rows"
+ + " specifies the number of rows within that range. " + "Default: 1.0.");
+ System.err.println(" compress Compression type to use (GZ, LZO, ...). Default: 'NONE'");
+ System.err.println(" flushCommits Used to determine if the test should flush the table. " +
+ "Default: false");
+ System.err.println(" valueZipf Set if we should vary value size between 0 and " +
+ "'valueSize' in zipf form: Default: Not set.");
+ System.err.println(" writeToWAL Set writeToWAL on puts. Default: True");
+ System.err.println(" autoFlush Set autoFlush on htable. Default: False");
+ System.err.println(" presplit Create presplit table. If a table with same name exists,"
+ + " it'll be deleted and recreated (instead of verifying count of its existing regions). "
+ + "Recommended for accurate perf analysis (see guide). Default: disabled");
+ System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. " +
+ "Default: false");
+ System.err.println(" numoftags Specify the no of tags that would be needed. " +
+ "This works only if usetags is true. Default: " + DEFAULT_OPTS.noOfTags);
+ System.err.println(" splitPolicy Specify a custom RegionSplitPolicy for the table.");
+ System.err.println(" columns Columns to write per row. Default: 1");
+ System.err.println();
+ System.err.println("Read Tests:");
+ System.err.println(" filterAll Helps to filter out all the rows on the server side"
+ + " there by not returning any thing back to the client. Helps to check the server side"
+ + " performance. Uses FilterAllFilter internally. ");
+ System.err.println(" multiGet Batch gets together into groups of N. Only supported " +
+ "by randomRead. Default: disabled");
+ System.err.println(" inmemory Tries to keep the HFiles of the CF " +
+ "inmemory as far as possible. Not guaranteed that reads are always served " +
+ "from memory. Default: false");
+ System.err.println(" bloomFilter Bloom filter type, one of "
+ + Arrays.toString(BloomType.values()));
+ System.err.println(" blockSize Blocksize to use when writing out hfiles. ");
+ System.err.println(" inmemoryCompaction Makes the column family to do inmemory flushes/compactions. "
+ + "Uses the CompactingMemstore");
+ System.err.println(" addColumns Adds columns to scans/gets explicitly. Default: true");
+ System.err.println(" replicas Enable region replica testing. Defaults: 1.");
+ System.err.println(" randomSleep Do a random sleep before each get between 0 and entered value. Defaults: 0");
+ System.err.println(" caching Scan caching to use. Default: 30");
+ System.err.println(" asyncPrefetch Enable asyncPrefetch for scan");
+ System.err.println(" cacheBlocks Set the cacheBlocks option for scan. Default: true");
+ System.err.println(" scanReadType Set the readType option for scan, stream/pread/default. Default: default");
+ System.err.println();
+ System.err.println(" Note: -D properties will be applied to the conf used. ");
+ System.err.println(" For example: ");
+ System.err.println(" -Dmapreduce.output.fileoutputformat.compress=true");
+ System.err.println(" -Dmapreduce.task.timeout=60000");
+ System.err.println();
+ System.err.println("Command:");
+ for (CmdDescriptor command : COMMANDS.values()) {
+ System.err.println(String.format(" %-20s %s", command.getName(), command.getDescription()));
+ }
+ System.err.println();
+ System.err.println("Args:");
+ System.err.println(" nclients Integer. Required. Total number of clients "
+ + "(and HRegionServers) running. 1 <= value <= 500");
+ System.err.println("Examples:");
+ System.err.println(" To run a single client doing the default 1M sequentialWrites:");
+ System.err.println(" $ hbase " + className + " sequentialWrite 1");
+ System.err.println(" To run 10 clients doing increments over ten rows:");
+ System.err.println(" $ hbase " + className + " --rows=10 --nomapred increment 10");
+ }
+
+ /**
+ * Parse options passed in via an arguments array. Assumes that array has been split
+ * on white-space and placed into a {@code Queue}. Any unknown arguments will remain
+ * in the queue at the conclusion of this method call. It's up to the caller to deal
+ * with these unrecognized arguments.
+ */
+ static TestOptions parseOpts(Queue<String> args) {
+ TestOptions opts = new TestOptions();
+
+ String cmd = null;
+ while ((cmd = args.poll()) != null) {
+ if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ // place item back onto queue so that caller knows parsing was incomplete
+ args.add(cmd);
+ break;
+ }
+
+ final String nmr = "--nomapred";
+ if (cmd.startsWith(nmr)) {
+ opts.nomapred = true;
+ continue;
+ }
+
+ final String rows = "--rows=";
+ if (cmd.startsWith(rows)) {
+ opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
+ continue;
+ }
+
+ final String cycles = "--cycles=";
+ if (cmd.startsWith(cycles)) {
+ opts.cycles = Integer.parseInt(cmd.substring(cycles.length()));
+ continue;
+ }
+
+ final String sampleRate = "--sampleRate=";
+ if (cmd.startsWith(sampleRate)) {
+ opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
+ continue;
+ }
+
+ final String table = "--table=";
+ if (cmd.startsWith(table)) {
+ opts.tableName = cmd.substring(table.length());
+ continue;
+ }
+
+ final String startRow = "--startRow=";
+ if (cmd.startsWith(startRow)) {
+ opts.startRow = Integer.parseInt(cmd.substring(startRow.length()));
+ continue;
+ }
+
+ final String compress = "--compress=";
+ if (cmd.startsWith(compress)) {
+ opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
+ continue;
+ }
+
+ final String traceRate = "--traceRate=";
+ if (cmd.startsWith(traceRate)) {
+ opts.traceRate = Double.parseDouble(cmd.substring(traceRate.length()));
+ continue;
+ }
+
+ final String blockEncoding = "--blockEncoding=";
+ if (cmd.startsWith(blockEncoding)) {
+ opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
+ continue;
+ }
+
+ final String flushCommits = "--flushCommits=";
+ if (cmd.startsWith(flushCommits)) {
+ opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
+ continue;
+ }
+
+ final String writeToWAL = "--writeToWAL=";
+ if (cmd.startsWith(writeToWAL)) {
+ opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
+ continue;
+ }
+
+ final String presplit = "--presplit=";
+ if (cmd.startsWith(presplit)) {
+ opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
+ continue;
+ }
+
+ final String inMemory = "--inmemory=";
+ if (cmd.startsWith(inMemory)) {
+ opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
+ continue;
+ }
+
+ final String autoFlush = "--autoFlush=";
+ if (cmd.startsWith(autoFlush)) {
+ opts.autoFlush = Boolean.parseBoolean(cmd.substring(autoFlush.length()));
+ continue;
+ }
+
+ final String onceCon = "--oneCon=";
+ if (cmd.startsWith(onceCon)) {
+ opts.oneCon = Boolean.parseBoolean(cmd.substring(onceCon.length()));
+ continue;
+ }
+
+ final String latency = "--latency";
+ if (cmd.startsWith(latency)) {
+ opts.reportLatency = true;
+ continue;
+ }
+
+ final String multiGet = "--multiGet=";
+ if (cmd.startsWith(multiGet)) {
+ opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
+ continue;
+ }
+
+ final String useTags = "--usetags=";
+ if (cmd.startsWith(useTags)) {
+ opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
+ continue;
+ }
+
+ final String noOfTags = "--numoftags=";
+ if (cmd.startsWith(noOfTags)) {
+ opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
+ continue;
+ }
+
+ final String replicas = "--replicas=";
+ if (cmd.startsWith(replicas)) {
+ opts.replicas = Integer.parseInt(cmd.substring(replicas.length()));
+ continue;
+ }
+
+ final String filterOutAll = "--filterAll";
+ if (cmd.startsWith(filterOutAll)) {
+ opts.filterAll = true;
+ continue;
+ }
+
+ final String size = "--size=";
+ if (cmd.startsWith(size)) {
+ opts.size = Float.parseFloat(cmd.substring(size.length()));
+ if (opts.size <= 1.0f) throw new IllegalStateException("Size must be > 1; i.e. 1GB");
+ continue;
+ }
+
+ final String splitPolicy = "--splitPolicy=";
+ if (cmd.startsWith(splitPolicy)) {
+ opts.splitPolicy = cmd.substring(splitPolicy.length());
+ continue;
+ }
+
+ final String randomSleep = "--randomSleep=";
+ if (cmd.startsWith(randomSleep)) {
+ opts.randomSleep = Integer.parseInt(cmd.substring(randomSleep.length()));
+ continue;
+ }
+
+ final String measureAfter = "--measureAfter=";
+ if (cmd.startsWith(measureAfter)) {
+ opts.measureAfter = Integer.parseInt(cmd.substring(measureAfter.length()));
+ continue;
+ }
+
+ final String bloomFilter = "--bloomFilter=";
+ if (cmd.startsWith(bloomFilter)) {
+ opts.bloomType = BloomType.valueOf(cmd.substring(bloomFilter.length()));
+ continue;
+ }
+
+ final String blockSize = "--blockSize=";
+ if(cmd.startsWith(blockSize) ) {
+ opts.blockSize = Integer.parseInt(cmd.substring(blockSize.length()));
+ }
+
+ final String valueSize = "--valueSize=";
+ if (cmd.startsWith(valueSize)) {
+ opts.valueSize = Integer.parseInt(cmd.substring(valueSize.length()));
+ continue;
+ }
+
+ final String valueRandom = "--valueRandom";
+ if (cmd.startsWith(valueRandom)) {
+ opts.valueRandom = true;
+ if (opts.valueZipf) {
+ throw new IllegalStateException("Either valueZipf or valueRandom but not both");
+ }
+ continue;
+ }
+
+ final String valueZipf = "--valueZipf";
+ if (cmd.startsWith(valueZipf)) {
+ opts.valueZipf = true;
+ if (opts.valueRandom) {
+ throw new IllegalStateException("Either valueZipf or valueRandom but not both");
+ }
+ continue;
+ }
+
+ final String period = "--period=";
+ if (cmd.startsWith(period)) {
+ opts.period = Integer.parseInt(cmd.substring(period.length()));
+ continue;
+ }
+
+ final String addColumns = "--addColumns=";
+ if (cmd.startsWith(addColumns)) {
+
<TRUNCATED>
[12/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
deleted file mode 100644
index 8bb266e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
+++ /dev/null
@@ -1,700 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce.replication;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Abortable;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.TableSnapshotScanner;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapper;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableSplit;
-import org.apache.hadoop.hbase.replication.ReplicationException;
-import org.apache.hadoop.hbase.replication.ReplicationFactory;
-import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
-import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
-import org.apache.hadoop.hbase.replication.ReplicationPeers;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.Threads;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * This map-only job compares the data from a local table with a remote one.
- * Every cell is compared and must have exactly the same keys (even timestamp)
- * as well as same value. It is possible to restrict the job by time range and
- * families. The peer id that's provided must match the one given when the
- * replication stream was setup.
- * <p>
- * Two counters are provided, Verifier.Counters.GOODROWS and BADROWS. The reason
- * for a why a row is different is shown in the map's log.
- */
-public class VerifyReplication extends Configured implements Tool {
-
- private static final Log LOG =
- LogFactory.getLog(VerifyReplication.class);
-
- public final static String NAME = "verifyrep";
- private final static String PEER_CONFIG_PREFIX = NAME + ".peer.";
- long startTime = 0;
- long endTime = Long.MAX_VALUE;
- int batch = -1;
- int versions = -1;
- String tableName = null;
- String families = null;
- String delimiter = "";
- String peerId = null;
- String rowPrefixes = null;
- int sleepMsBeforeReCompare = 0;
- boolean verbose = false;
- boolean includeDeletedCells = false;
- //Source table snapshot name
- String sourceSnapshotName = null;
- //Temp location in source cluster to restore source snapshot
- String sourceSnapshotTmpDir = null;
- //Peer table snapshot name
- String peerSnapshotName = null;
- //Temp location in peer cluster to restore peer snapshot
- String peerSnapshotTmpDir = null;
- //Peer cluster Hadoop FS address
- String peerFSAddress = null;
- //Peer cluster HBase root dir location
- String peerHBaseRootAddress = null;
-
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- /**
- * Map-only comparator for 2 tables
- */
- public static class Verifier
- extends TableMapper<ImmutableBytesWritable, Put> {
-
-
-
- public static enum Counters {
- GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
-
- private Connection sourceConnection;
- private Table sourceTable;
- private Connection replicatedConnection;
- private Table replicatedTable;
- private ResultScanner replicatedScanner;
- private Result currentCompareRowInPeerTable;
- private int sleepMsBeforeReCompare;
- private String delimiter = "";
- private boolean verbose = false;
- private int batch = -1;
-
- /**
- * Map method that compares every scanned row with the equivalent from
- * a distant cluster.
- * @param row The current table row key.
- * @param value The columns.
- * @param context The current context.
- * @throws IOException When something is broken with the data.
- */
- @Override
- public void map(ImmutableBytesWritable row, final Result value,
- Context context)
- throws IOException {
- if (replicatedScanner == null) {
- Configuration conf = context.getConfiguration();
- sleepMsBeforeReCompare = conf.getInt(NAME +".sleepMsBeforeReCompare", 0);
- delimiter = conf.get(NAME + ".delimiter", "");
- verbose = conf.getBoolean(NAME +".verbose", false);
- batch = conf.getInt(NAME + ".batch", -1);
- final Scan scan = new Scan();
- if (batch > 0) {
- scan.setBatch(batch);
- }
- scan.setCacheBlocks(false);
- scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
- long startTime = conf.getLong(NAME + ".startTime", 0);
- long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
- String families = conf.get(NAME + ".families", null);
- if(families != null) {
- String[] fams = families.split(",");
- for(String fam : fams) {
- scan.addFamily(Bytes.toBytes(fam));
- }
- }
- boolean includeDeletedCells = conf.getBoolean(NAME + ".includeDeletedCells", false);
- scan.setRaw(includeDeletedCells);
- String rowPrefixes = conf.get(NAME + ".rowPrefixes", null);
- setRowPrefixFilter(scan, rowPrefixes);
- scan.setTimeRange(startTime, endTime);
- int versions = conf.getInt(NAME+".versions", -1);
- LOG.info("Setting number of version inside map as: " + versions);
- if (versions >= 0) {
- scan.setMaxVersions(versions);
- }
- TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName"));
- sourceConnection = ConnectionFactory.createConnection(conf);
- sourceTable = sourceConnection.getTable(tableName);
-
- final InputSplit tableSplit = context.getInputSplit();
-
- String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
- Configuration peerConf = HBaseConfiguration.createClusterConf(conf,
- zkClusterKey, PEER_CONFIG_PREFIX);
-
- replicatedConnection = ConnectionFactory.createConnection(peerConf);
- replicatedTable = replicatedConnection.getTable(tableName);
- scan.setStartRow(value.getRow());
-
- byte[] endRow = null;
- if (tableSplit instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit) {
- endRow = ((TableSnapshotInputFormat.TableSnapshotRegionSplit) tableSplit).getRegionInfo()
- .getEndKey();
- } else {
- endRow = ((TableSplit) tableSplit).getEndRow();
- }
-
- scan.setStopRow(endRow);
-
- String peerSnapshotName = conf.get(NAME + ".peerSnapshotName", null);
- if (peerSnapshotName != null) {
- String peerSnapshotTmpDir = conf.get(NAME + ".peerSnapshotTmpDir", null);
- String peerFSAddress = conf.get(NAME + ".peerFSAddress", null);
- String peerHBaseRootAddress = conf.get(NAME + ".peerHBaseRootAddress", null);
- FileSystem.setDefaultUri(peerConf, peerFSAddress);
- FSUtils.setRootDir(peerConf, new Path(peerHBaseRootAddress));
- LOG.info("Using peer snapshot:" + peerSnapshotName + " with temp dir:"
- + peerSnapshotTmpDir + " peer root uri:" + FSUtils.getRootDir(peerConf)
- + " peerFSAddress:" + peerFSAddress);
-
- replicatedScanner = new TableSnapshotScanner(peerConf,
- new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName, scan);
- } else {
- replicatedScanner = replicatedTable.getScanner(scan);
- }
- currentCompareRowInPeerTable = replicatedScanner.next();
- }
- while (true) {
- if (currentCompareRowInPeerTable == null) {
- // reach the region end of peer table, row only in source table
- logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
- break;
- }
- int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
- if (rowCmpRet == 0) {
- // rowkey is same, need to compare the content of the row
- try {
- Result.compareResults(value, currentCompareRowInPeerTable);
- context.getCounter(Counters.GOODROWS).increment(1);
- if (verbose) {
- LOG.info("Good row key: " + delimiter
- + Bytes.toStringBinary(value.getRow()) + delimiter);
- }
- } catch (Exception e) {
- logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
- }
- currentCompareRowInPeerTable = replicatedScanner.next();
- break;
- } else if (rowCmpRet < 0) {
- // row only exists in source table
- logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
- break;
- } else {
- // row only exists in peer table
- logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
- currentCompareRowInPeerTable);
- currentCompareRowInPeerTable = replicatedScanner.next();
- }
- }
- }
-
- private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
- if (sleepMsBeforeReCompare > 0) {
- Threads.sleep(sleepMsBeforeReCompare);
- try {
- Result sourceResult = sourceTable.get(new Get(row.getRow()));
- Result replicatedResult = replicatedTable.get(new Get(row.getRow()));
- Result.compareResults(sourceResult, replicatedResult);
- if (!sourceResult.isEmpty()) {
- context.getCounter(Counters.GOODROWS).increment(1);
- if (verbose) {
- LOG.info("Good row key (with recompare): " + delimiter + Bytes.toStringBinary(row.getRow())
- + delimiter);
- }
- }
- return;
- } catch (Exception e) {
- LOG.error("recompare fail after sleep, rowkey=" + delimiter +
- Bytes.toStringBinary(row.getRow()) + delimiter);
- }
- }
- context.getCounter(counter).increment(1);
- context.getCounter(Counters.BADROWS).increment(1);
- LOG.error(counter.toString() + ", rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) +
- delimiter);
- }
-
- @Override
- protected void cleanup(Context context) {
- if (replicatedScanner != null) {
- try {
- while (currentCompareRowInPeerTable != null) {
- logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
- currentCompareRowInPeerTable);
- currentCompareRowInPeerTable = replicatedScanner.next();
- }
- } catch (Exception e) {
- LOG.error("fail to scan peer table in cleanup", e);
- } finally {
- replicatedScanner.close();
- replicatedScanner = null;
- }
- }
-
- if (sourceTable != null) {
- try {
- sourceTable.close();
- } catch (IOException e) {
- LOG.error("fail to close source table in cleanup", e);
- }
- }
- if(sourceConnection != null){
- try {
- sourceConnection.close();
- } catch (Exception e) {
- LOG.error("fail to close source connection in cleanup", e);
- }
- }
-
- if(replicatedTable != null){
- try{
- replicatedTable.close();
- } catch (Exception e) {
- LOG.error("fail to close replicated table in cleanup", e);
- }
- }
- if(replicatedConnection != null){
- try {
- replicatedConnection.close();
- } catch (Exception e) {
- LOG.error("fail to close replicated connection in cleanup", e);
- }
- }
- }
- }
-
- private static Pair<ReplicationPeerConfig, Configuration> getPeerQuorumConfig(
- final Configuration conf, String peerId) throws IOException {
- ZooKeeperWatcher localZKW = null;
- ReplicationPeerZKImpl peer = null;
- try {
- localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
- new Abortable() {
- @Override public void abort(String why, Throwable e) {}
- @Override public boolean isAborted() {return false;}
- });
-
- ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
- rp.init();
-
- Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
- if (pair == null) {
- throw new IOException("Couldn't get peer conf!");
- }
-
- return pair;
- } catch (ReplicationException e) {
- throw new IOException(
- "An error occurred while trying to connect to the remove peer cluster", e);
- } finally {
- if (peer != null) {
- peer.close();
- }
- if (localZKW != null) {
- localZKW.close();
- }
- }
- }
-
- /**
- * Sets up the actual job.
- *
- * @param conf The current configuration.
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws java.io.IOException When setting up the job fails.
- */
- public Job createSubmittableJob(Configuration conf, String[] args)
- throws IOException {
- if (!doCommandLine(args)) {
- return null;
- }
- conf.set(NAME+".peerId", peerId);
- conf.set(NAME+".tableName", tableName);
- conf.setLong(NAME+".startTime", startTime);
- conf.setLong(NAME+".endTime", endTime);
- conf.setInt(NAME +".sleepMsBeforeReCompare", sleepMsBeforeReCompare);
- conf.set(NAME + ".delimiter", delimiter);
- conf.setInt(NAME + ".batch", batch);
- conf.setBoolean(NAME +".verbose", verbose);
- conf.setBoolean(NAME +".includeDeletedCells", includeDeletedCells);
- if (families != null) {
- conf.set(NAME+".families", families);
- }
- if (rowPrefixes != null){
- conf.set(NAME+".rowPrefixes", rowPrefixes);
- }
-
- Pair<ReplicationPeerConfig, Configuration> peerConfigPair = getPeerQuorumConfig(conf, peerId);
- ReplicationPeerConfig peerConfig = peerConfigPair.getFirst();
- String peerQuorumAddress = peerConfig.getClusterKey();
- LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " +
- peerConfig.getConfiguration());
- conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
- HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX,
- peerConfig.getConfiguration().entrySet());
-
- conf.setInt(NAME + ".versions", versions);
- LOG.info("Number of version: " + versions);
-
- //Set Snapshot specific parameters
- if (peerSnapshotName != null) {
- conf.set(NAME + ".peerSnapshotName", peerSnapshotName);
- conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir);
- conf.set(NAME + ".peerFSAddress", peerFSAddress);
- conf.set(NAME + ".peerHBaseRootAddress", peerHBaseRootAddress);
-
- // This is to create HDFS delegation token for peer cluster in case of secured
- conf.setStrings(MRJobConfig.JOB_NAMENODES, peerFSAddress);
- }
-
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
- job.setJarByClass(VerifyReplication.class);
-
- Scan scan = new Scan();
- scan.setTimeRange(startTime, endTime);
- scan.setRaw(includeDeletedCells);
- scan.setCacheBlocks(false);
- if (batch > 0) {
- scan.setBatch(batch);
- }
- if (versions >= 0) {
- scan.setMaxVersions(versions);
- LOG.info("Number of versions set to " + versions);
- }
- if(families != null) {
- String[] fams = families.split(",");
- for(String fam : fams) {
- scan.addFamily(Bytes.toBytes(fam));
- }
- }
-
- setRowPrefixFilter(scan, rowPrefixes);
-
- if (sourceSnapshotName != null) {
- Path snapshotTempPath = new Path(sourceSnapshotTmpDir);
- LOG.info(
- "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir);
- TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null,
- null, job, true, snapshotTempPath);
- } else {
- TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job);
- }
- Configuration peerClusterConf = peerConfigPair.getSecond();
- // Obtain the auth token from peer cluster
- TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf);
-
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setNumReduceTasks(0);
- return job;
- }
-
- private static void setRowPrefixFilter(Scan scan, String rowPrefixes) {
- if (rowPrefixes != null && !rowPrefixes.isEmpty()) {
- String[] rowPrefixArray = rowPrefixes.split(",");
- Arrays.sort(rowPrefixArray);
- FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
- for (String prefix : rowPrefixArray) {
- Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
- filterList.addFilter(filter);
- }
- scan.setFilter(filterList);
- byte[] startPrefixRow = Bytes.toBytes(rowPrefixArray[0]);
- byte[] lastPrefixRow = Bytes.toBytes(rowPrefixArray[rowPrefixArray.length -1]);
- setStartAndStopRows(scan, startPrefixRow, lastPrefixRow);
- }
- }
-
- private static void setStartAndStopRows(Scan scan, byte[] startPrefixRow, byte[] lastPrefixRow) {
- scan.setStartRow(startPrefixRow);
- byte[] stopRow = Bytes.add(Bytes.head(lastPrefixRow, lastPrefixRow.length - 1),
- new byte[]{(byte) (lastPrefixRow[lastPrefixRow.length - 1] + 1)});
- scan.setStopRow(stopRow);
- }
-
- @VisibleForTesting
- public boolean doCommandLine(final String[] args) {
- if (args.length < 2) {
- printUsage(null);
- return false;
- }
- try {
- for (int i = 0; i < args.length; i++) {
- String cmd = args[i];
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
- printUsage(null);
- return false;
- }
-
- final String startTimeArgKey = "--starttime=";
- if (cmd.startsWith(startTimeArgKey)) {
- startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
- continue;
- }
-
- final String endTimeArgKey = "--endtime=";
- if (cmd.startsWith(endTimeArgKey)) {
- endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
- continue;
- }
-
- final String includeDeletedCellsArgKey = "--raw";
- if (cmd.equals(includeDeletedCellsArgKey)) {
- includeDeletedCells = true;
- continue;
- }
-
- final String versionsArgKey = "--versions=";
- if (cmd.startsWith(versionsArgKey)) {
- versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
- continue;
- }
-
- final String batchArgKey = "--batch=";
- if (cmd.startsWith(batchArgKey)) {
- batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
- continue;
- }
-
- final String familiesArgKey = "--families=";
- if (cmd.startsWith(familiesArgKey)) {
- families = cmd.substring(familiesArgKey.length());
- continue;
- }
-
- final String rowPrefixesKey = "--row-prefixes=";
- if (cmd.startsWith(rowPrefixesKey)){
- rowPrefixes = cmd.substring(rowPrefixesKey.length());
- continue;
- }
-
- final String delimiterArgKey = "--delimiter=";
- if (cmd.startsWith(delimiterArgKey)) {
- delimiter = cmd.substring(delimiterArgKey.length());
- continue;
- }
-
- final String sleepToReCompareKey = "--recomparesleep=";
- if (cmd.startsWith(sleepToReCompareKey)) {
- sleepMsBeforeReCompare = Integer.parseInt(cmd.substring(sleepToReCompareKey.length()));
- continue;
- }
- final String verboseKey = "--verbose";
- if (cmd.startsWith(verboseKey)) {
- verbose = true;
- continue;
- }
-
- final String sourceSnapshotNameArgKey = "--sourceSnapshotName=";
- if (cmd.startsWith(sourceSnapshotNameArgKey)) {
- sourceSnapshotName = cmd.substring(sourceSnapshotNameArgKey.length());
- continue;
- }
-
- final String sourceSnapshotTmpDirArgKey = "--sourceSnapshotTmpDir=";
- if (cmd.startsWith(sourceSnapshotTmpDirArgKey)) {
- sourceSnapshotTmpDir = cmd.substring(sourceSnapshotTmpDirArgKey.length());
- continue;
- }
-
- final String peerSnapshotNameArgKey = "--peerSnapshotName=";
- if (cmd.startsWith(peerSnapshotNameArgKey)) {
- peerSnapshotName = cmd.substring(peerSnapshotNameArgKey.length());
- continue;
- }
-
- final String peerSnapshotTmpDirArgKey = "--peerSnapshotTmpDir=";
- if (cmd.startsWith(peerSnapshotTmpDirArgKey)) {
- peerSnapshotTmpDir = cmd.substring(peerSnapshotTmpDirArgKey.length());
- continue;
- }
-
- final String peerFSAddressArgKey = "--peerFSAddress=";
- if (cmd.startsWith(peerFSAddressArgKey)) {
- peerFSAddress = cmd.substring(peerFSAddressArgKey.length());
- continue;
- }
-
- final String peerHBaseRootAddressArgKey = "--peerHBaseRootAddress=";
- if (cmd.startsWith(peerHBaseRootAddressArgKey)) {
- peerHBaseRootAddress = cmd.substring(peerHBaseRootAddressArgKey.length());
- continue;
- }
-
- if (cmd.startsWith("--")) {
- printUsage("Invalid argument '" + cmd + "'");
- return false;
- }
-
- if (i == args.length-2) {
- peerId = cmd;
- }
-
- if (i == args.length-1) {
- tableName = cmd;
- }
- }
-
- if ((sourceSnapshotName != null && sourceSnapshotTmpDir == null)
- || (sourceSnapshotName == null && sourceSnapshotTmpDir != null)) {
- printUsage("Source snapshot name and snapshot temp location should be provided"
- + " to use snapshots in source cluster");
- return false;
- }
-
- if (peerSnapshotName != null || peerSnapshotTmpDir != null || peerFSAddress != null
- || peerHBaseRootAddress != null) {
- if (peerSnapshotName == null || peerSnapshotTmpDir == null || peerFSAddress == null
- || peerHBaseRootAddress == null) {
- printUsage(
- "Peer snapshot name, peer snapshot temp location, Peer HBase root address and "
- + "peer FSAddress should be provided to use snapshots in peer cluster");
- return false;
- }
- }
-
- // This is to avoid making recompare calls to source/peer tables when snapshots are used
- if ((sourceSnapshotName != null || peerSnapshotName != null) && sleepMsBeforeReCompare > 0) {
- printUsage(
- "Using sleepMsBeforeReCompare along with snapshots is not allowed as snapshots are immutable");
- return false;
- }
-
- } catch (Exception e) {
- e.printStackTrace();
- printUsage("Can't start because " + e.getMessage());
- return false;
- }
- return true;
- }
-
- /*
- * @param errorMsg Error message. Can be null.
- */
- private static void printUsage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- System.err.println("Usage: verifyrep [--starttime=X]" +
- " [--endtime=Y] [--families=A] [--row-prefixes=B] [--delimiter=] [--recomparesleep=] " +
- "[--batch=] [--verbose] [--sourceSnapshotName=P] [--sourceSnapshotTmpDir=Q] [--peerSnapshotName=R] "
- + "[--peerSnapshotTmpDir=S] [--peerFSAddress=T] [--peerHBaseRootAddress=U] <peerid> <tablename>");
- System.err.println();
- System.err.println("Options:");
- System.err.println(" starttime beginning of the time range");
- System.err.println(" without endtime means from starttime to forever");
- System.err.println(" endtime end of the time range");
- System.err.println(" versions number of cell versions to verify");
- System.err.println(" batch batch count for scan, " +
- "note that result row counts will no longer be actual number of rows when you use this option");
- System.err.println(" raw includes raw scan if given in options");
- System.err.println(" families comma-separated list of families to copy");
- System.err.println(" row-prefixes comma-separated list of row key prefixes to filter on ");
- System.err.println(" delimiter the delimiter used in display around rowkey");
- System.err.println(" recomparesleep milliseconds to sleep before recompare row, " +
- "default value is 0 which disables the recompare.");
- System.err.println(" verbose logs row keys of good rows");
- System.err.println(" sourceSnapshotName Source Snapshot Name");
- System.err.println(" sourceSnapshotTmpDir Tmp location to restore source table snapshot");
- System.err.println(" peerSnapshotName Peer Snapshot Name");
- System.err.println(" peerSnapshotTmpDir Tmp location to restore peer table snapshot");
- System.err.println(" peerFSAddress Peer cluster Hadoop FS address");
- System.err.println(" peerHBaseRootAddress Peer cluster HBase root location");
- System.err.println();
- System.err.println("Args:");
- System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
- System.err.println(" tablename Name of the table to verify");
- System.err.println();
- System.err.println("Examples:");
- System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
- System.err.println(" $ hbase " +
- "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
- " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Configuration conf = this.getConf();
- Job job = createSubmittableJob(conf, args);
- if (job != null) {
- return job.waitForCompletion(true) ? 0 : 1;
- }
- return 1;
- }
-
- /**
- * Main entry point.
- *
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
- System.exit(res);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
deleted file mode 100644
index eb9a5f7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
+++ /dev/null
@@ -1,470 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.regionserver;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.mapreduce.JobUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
-import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.LineReader;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/*
- * The CompactionTool allows to execute a compaction specifying a:
- * <ul>
- * <li>table folder (all regions and families will be compacted)
- * <li>region folder (all families in the region will be compacted)
- * <li>family folder (the store files will be compacted)
- * </ul>
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class CompactionTool extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(CompactionTool.class);
-
- private final static String CONF_TMP_DIR = "hbase.tmp.dir";
- private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
- private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
- private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
- private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
-
- /**
- * Class responsible to execute the Compaction on the specified path.
- * The path can be a table, region or family directory.
- */
- private static class CompactionWorker {
- private final boolean keepCompactedFiles;
- private final boolean deleteCompacted;
- private final Configuration conf;
- private final FileSystem fs;
- private final Path tmpDir;
-
- public CompactionWorker(final FileSystem fs, final Configuration conf) {
- this.conf = conf;
- this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
- this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
- this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
- this.fs = fs;
- }
-
- /**
- * Execute the compaction on the specified path.
- *
- * @param path Directory path on which to run compaction.
- * @param compactOnce Execute just a single step of compaction.
- * @param major Request major compaction.
- */
- public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
- if (isFamilyDir(fs, path)) {
- Path regionDir = path.getParent();
- Path tableDir = regionDir.getParent();
- TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
- HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
- compactStoreFiles(tableDir, htd, hri,
- path.getName(), compactOnce, major);
- } else if (isRegionDir(fs, path)) {
- Path tableDir = path.getParent();
- TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
- compactRegion(tableDir, htd, path, compactOnce, major);
- } else if (isTableDir(fs, path)) {
- compactTable(path, compactOnce, major);
- } else {
- throw new IOException(
- "Specified path is not a table, region or family directory. path=" + path);
- }
- }
-
- private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
- throws IOException {
- TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
- for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
- compactRegion(tableDir, htd, regionDir, compactOnce, major);
- }
- }
-
- private void compactRegion(final Path tableDir, final TableDescriptor htd,
- final Path regionDir, final boolean compactOnce, final boolean major)
- throws IOException {
- HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
- for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
- compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
- }
- }
-
- /**
- * Execute the actual compaction job.
- * If the compact once flag is not specified, execute the compaction until
- * no more compactions are needed. Uses the Configuration settings provided.
- */
- private void compactStoreFiles(final Path tableDir, final TableDescriptor htd,
- final HRegionInfo hri, final String familyName, final boolean compactOnce,
- final boolean major) throws IOException {
- HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
- LOG.info("Compact table=" + htd.getTableName() +
- " region=" + hri.getRegionNameAsString() +
- " family=" + familyName);
- if (major) {
- store.triggerMajorCompaction();
- }
- do {
- CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
- if (compaction == null) break;
- List<StoreFile> storeFiles =
- store.compact(compaction, NoLimitThroughputController.INSTANCE);
- if (storeFiles != null && !storeFiles.isEmpty()) {
- if (keepCompactedFiles && deleteCompacted) {
- for (StoreFile storeFile: storeFiles) {
- fs.delete(storeFile.getPath(), false);
- }
- }
- }
- } while (store.needsCompaction() && !compactOnce);
- }
-
- /**
- * Create a "mock" HStore that uses the tmpDir specified by the user and
- * the store dir to compact as source.
- */
- private static HStore getStore(final Configuration conf, final FileSystem fs,
- final Path tableDir, final TableDescriptor htd, final HRegionInfo hri,
- final String familyName, final Path tempDir) throws IOException {
- HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
- @Override
- public Path getTempDir() {
- return tempDir;
- }
- };
- HRegion region = new HRegion(regionFs, null, conf, htd, null);
- return new HStore(region, htd.getColumnFamily(Bytes.toBytes(familyName)), conf);
- }
- }
-
- private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
- Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
- return fs.exists(regionInfo);
- }
-
- private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
- return FSTableDescriptors.getTableInfoPath(fs, path) != null;
- }
-
- private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
- return isRegionDir(fs, path.getParent());
- }
-
- private static class CompactionMapper
- extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
- private CompactionWorker compactor = null;
- private boolean compactOnce = false;
- private boolean major = false;
-
- @Override
- public void setup(Context context) {
- Configuration conf = context.getConfiguration();
- compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
- major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
-
- try {
- FileSystem fs = FileSystem.get(conf);
- this.compactor = new CompactionWorker(fs, conf);
- } catch (IOException e) {
- throw new RuntimeException("Could not get the input FileSystem", e);
- }
- }
-
- @Override
- public void map(LongWritable key, Text value, Context context)
- throws InterruptedException, IOException {
- Path path = new Path(value.toString());
- this.compactor.compact(path, compactOnce, major);
- }
- }
-
- /**
- * Input format that uses store files block location as input split locality.
- */
- private static class CompactionInputFormat extends TextInputFormat {
- @Override
- protected boolean isSplitable(JobContext context, Path file) {
- return true;
- }
-
- /**
- * Returns a split for each store files directory using the block location
- * of each file as locality reference.
- */
- @Override
- public List<InputSplit> getSplits(JobContext job) throws IOException {
- List<InputSplit> splits = new ArrayList<>();
- List<FileStatus> files = listStatus(job);
-
- Text key = new Text();
- for (FileStatus file: files) {
- Path path = file.getPath();
- FileSystem fs = path.getFileSystem(job.getConfiguration());
- LineReader reader = new LineReader(fs.open(path));
- long pos = 0;
- int n;
- try {
- while ((n = reader.readLine(key)) > 0) {
- String[] hosts = getStoreDirHosts(fs, path);
- splits.add(new FileSplit(path, pos, n, hosts));
- pos += n;
- }
- } finally {
- reader.close();
- }
- }
-
- return splits;
- }
-
- /**
- * return the top hosts of the store files, used by the Split
- */
- private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
- throws IOException {
- FileStatus[] files = FSUtils.listStatus(fs, path);
- if (files == null) {
- return new String[] {};
- }
-
- HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
- for (FileStatus hfileStatus: files) {
- HDFSBlocksDistribution storeFileBlocksDistribution =
- FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
- hdfsBlocksDistribution.add(storeFileBlocksDistribution);
- }
-
- List<String> hosts = hdfsBlocksDistribution.getTopHosts();
- return hosts.toArray(new String[hosts.size()]);
- }
-
- /**
- * Create the input file for the given directories to compact.
- * The file is a TextFile with each line corrisponding to a
- * store files directory to compact.
- */
- public static void createInputFile(final FileSystem fs, final Path path,
- final Set<Path> toCompactDirs) throws IOException {
- // Extract the list of store dirs
- List<Path> storeDirs = new LinkedList<>();
- for (Path compactDir: toCompactDirs) {
- if (isFamilyDir(fs, compactDir)) {
- storeDirs.add(compactDir);
- } else if (isRegionDir(fs, compactDir)) {
- for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
- storeDirs.add(familyDir);
- }
- } else if (isTableDir(fs, compactDir)) {
- // Lookup regions
- for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
- for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
- storeDirs.add(familyDir);
- }
- }
- } else {
- throw new IOException(
- "Specified path is not a table, region or family directory. path=" + compactDir);
- }
- }
-
- // Write Input File
- FSDataOutputStream stream = fs.create(path);
- LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
- try {
- final byte[] newLine = Bytes.toBytes("\n");
- for (Path storeDir: storeDirs) {
- stream.write(Bytes.toBytes(storeDir.toString()));
- stream.write(newLine);
- }
- } finally {
- stream.close();
- }
- }
- }
-
- /**
- * Execute compaction, using a Map-Reduce job.
- */
- private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
- final boolean compactOnce, final boolean major) throws Exception {
- Configuration conf = getConf();
- conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
- conf.setBoolean(CONF_COMPACT_MAJOR, major);
-
- Job job = new Job(conf);
- job.setJobName("CompactionTool");
- job.setJarByClass(CompactionTool.class);
- job.setMapperClass(CompactionMapper.class);
- job.setInputFormatClass(CompactionInputFormat.class);
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setMapSpeculativeExecution(false);
- job.setNumReduceTasks(0);
-
- // add dependencies (including HBase ones)
- TableMapReduceUtil.addDependencyJars(job);
-
- Path stagingDir = JobUtil.getStagingDir(conf);
- try {
- // Create input file with the store dirs
- Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
- CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
- CompactionInputFormat.addInputPath(job, inputPath);
-
- // Initialize credential for secure cluster
- TableMapReduceUtil.initCredentials(job);
-
- // Start the MR Job and wait
- return job.waitForCompletion(true) ? 0 : 1;
- } finally {
- fs.delete(stagingDir, true);
- }
- }
-
- /**
- * Execute compaction, from this client, one path at the time.
- */
- private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
- final boolean compactOnce, final boolean major) throws IOException {
- CompactionWorker worker = new CompactionWorker(fs, getConf());
- for (Path path: toCompactDirs) {
- worker.compact(path, compactOnce, major);
- }
- return 0;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Set<Path> toCompactDirs = new HashSet<>();
- boolean compactOnce = false;
- boolean major = false;
- boolean mapred = false;
-
- Configuration conf = getConf();
- FileSystem fs = FileSystem.get(conf);
-
- try {
- for (int i = 0; i < args.length; ++i) {
- String opt = args[i];
- if (opt.equals("-compactOnce")) {
- compactOnce = true;
- } else if (opt.equals("-major")) {
- major = true;
- } else if (opt.equals("-mapred")) {
- mapred = true;
- } else if (!opt.startsWith("-")) {
- Path path = new Path(opt);
- FileStatus status = fs.getFileStatus(path);
- if (!status.isDirectory()) {
- printUsage("Specified path is not a directory. path=" + path);
- return 1;
- }
- toCompactDirs.add(path);
- } else {
- printUsage();
- }
- }
- } catch (Exception e) {
- printUsage(e.getMessage());
- return 1;
- }
-
- if (toCompactDirs.isEmpty()) {
- printUsage("No directories to compact specified.");
- return 1;
- }
-
- // Execute compaction!
- if (mapred) {
- return doMapReduce(fs, toCompactDirs, compactOnce, major);
- } else {
- return doClient(fs, toCompactDirs, compactOnce, major);
- }
- }
-
- private void printUsage() {
- printUsage(null);
- }
-
- private void printUsage(final String message) {
- if (message != null && message.length() > 0) {
- System.err.println(message);
- }
- System.err.println("Usage: java " + this.getClass().getName() + " \\");
- System.err.println(" [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
- System.err.println();
- System.err.println("Options:");
- System.err.println(" mapred Use MapReduce to run compaction.");
- System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
- System.err.println(" major Trigger major compaction.");
- System.err.println();
- System.err.println("Note: -D properties will be applied to the conf used. ");
- System.err.println("For example: ");
- System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
- System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
- System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
- System.err.println();
- System.err.println("Examples:");
- System.err.println(" To compact the full 'TestTable' using MapReduce:");
- System.err.println(" $ hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
- System.err.println();
- System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
- System.err.println(" $ hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
- }
-
- public static void main(String[] args) throws Exception {
- System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
- }
-}
[26/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
new file mode 100644
index 0000000..91d2696
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
@@ -0,0 +1,726 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeepDeletedCells;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.Import.KeyValueImporter;
+import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Tests the table import and table export MR job functionality
+ */
+@Category({VerySlowMapReduceTests.class, MediumTests.class})
+public class TestImportExport {
+ private static final Log LOG = LogFactory.getLog(TestImportExport.class);
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1");
+ private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2");
+ private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3");
+ private static final String FAMILYA_STRING = "a";
+ private static final String FAMILYB_STRING = "b";
+ private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
+ private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
+ private static final byte[] QUAL = Bytes.toBytes("q");
+ private static final String OUTPUT_DIR = "outputdir";
+ private static String FQ_OUTPUT_DIR;
+ private static final String EXPORT_BATCH_SIZE = "100";
+
+ private static long now = System.currentTimeMillis();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // Up the handlers; this test needs more than usual.
+ UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
+ UTIL.startMiniCluster();
+ FQ_OUTPUT_DIR =
+ new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Rule
+ public final TestName name = new TestName();
+
+ @Before
+ public void announce() {
+ LOG.info("Running " + name.getMethodName());
+ }
+
+ @Before
+ @After
+ public void cleanup() throws Exception {
+ FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+ fs.delete(new Path(OUTPUT_DIR), true);
+ }
+
+ /**
+ * Runs an export job with the specified command line args
+ * @param args
+ * @return true if job completed successfully
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ boolean runExport(String[] args) throws Exception {
+ // need to make a copy of the configuration because to make sure different temp dirs are used.
+ int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Export(), args);
+ return status == 0;
+ }
+
+ /**
+ * Runs an import job with the specified command line args
+ * @param args
+ * @return true if job completed successfully
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ boolean runImport(String[] args) throws Exception {
+ // need to make a copy of the configuration because to make sure different temp dirs are used.
+ int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Import(), args);
+ return status == 0;
+ }
+
+ /**
+ * Test simple replication case with column mapping
+ * @throws Exception
+ */
+ @Test
+ public void testSimpleCase() throws Exception {
+ try (Table t = UTIL.createTable(TableName.valueOf(name.getMethodName()), FAMILYA, 3);) {
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ t.put(p);
+ p = new Put(ROW2);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ t.put(p);
+ p = new Put(ROW3);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ t.put(p);
+ }
+
+ String[] args = new String[] {
+ // Only export row1 & row2.
+ "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1",
+ "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3",
+ name.getMethodName(),
+ FQ_OUTPUT_DIR,
+ "1000", // max number of key versions per key to export
+ };
+ assertTrue(runExport(args));
+
+ final String IMPORT_TABLE = name.getMethodName() + "import";
+ try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), FAMILYB, 3);) {
+ args = new String[] {
+ "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
+ IMPORT_TABLE,
+ FQ_OUTPUT_DIR
+ };
+ assertTrue(runImport(args));
+
+ Get g = new Get(ROW1);
+ g.setMaxVersions();
+ Result r = t.get(g);
+ assertEquals(3, r.size());
+ g = new Get(ROW2);
+ g.setMaxVersions();
+ r = t.get(g);
+ assertEquals(3, r.size());
+ g = new Get(ROW3);
+ r = t.get(g);
+ assertEquals(0, r.size());
+ }
+ }
+
+ /**
+ * Test export hbase:meta table
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testMetaExport() throws Exception {
+ String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
+ String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
+ assertTrue(runExport(args));
+ }
+
+ /**
+ * Test import data from 0.94 exported file
+ * @throws Exception
+ */
+ @Test
+ public void testImport94Table() throws Exception {
+ final String name = "exportedTableIn94Format";
+ URL url = TestImportExport.class.getResource(name);
+ File f = new File(url.toURI());
+ if (!f.exists()) {
+ LOG.warn("FAILED TO FIND " + f + "; skipping out on test");
+ return;
+ }
+ assertTrue(f.exists());
+ LOG.info("FILE=" + f);
+ Path importPath = new Path(f.toURI());
+ FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+ fs.copyFromLocalFile(importPath, new Path(FQ_OUTPUT_DIR + Path.SEPARATOR + name));
+ String IMPORT_TABLE = name;
+ try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), Bytes.toBytes("f1"), 3);) {
+ String[] args = new String[] {
+ "-Dhbase.import.version=0.94" ,
+ IMPORT_TABLE, FQ_OUTPUT_DIR
+ };
+ assertTrue(runImport(args));
+ /* exportedTableIn94Format contains 5 rows
+ ROW COLUMN+CELL
+ r1 column=f1:c1, timestamp=1383766761171, value=val1
+ r2 column=f1:c1, timestamp=1383766771642, value=val2
+ r3 column=f1:c1, timestamp=1383766777615, value=val3
+ r4 column=f1:c1, timestamp=1383766785146, value=val4
+ r5 column=f1:c1, timestamp=1383766791506, value=val5
+ */
+ assertEquals(5, UTIL.countRows(t));
+ }
+ }
+
+ /**
+ * Test export scanner batching
+ */
+ @Test
+ public void testExportScannerBatching() throws Exception {
+ HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+ desc.addFamily(new HColumnDescriptor(FAMILYA)
+ .setMaxVersions(1)
+ );
+ UTIL.getAdmin().createTable(desc);
+ try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+ t.put(p);
+
+ String[] args = new String[] {
+ "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE, // added scanner batching arg.
+ name.getMethodName(),
+ FQ_OUTPUT_DIR
+ };
+ assertTrue(runExport(args));
+
+ FileSystem fs = FileSystem.get(UTIL.getConfiguration());
+ fs.delete(new Path(FQ_OUTPUT_DIR), true);
+ }
+ }
+
+ @Test
+ public void testWithDeletes() throws Exception {
+ HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+ desc.addFamily(new HColumnDescriptor(FAMILYA)
+ .setMaxVersions(5)
+ .setKeepDeletedCells(KeepDeletedCells.TRUE)
+ );
+ UTIL.getAdmin().createTable(desc);
+ try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+ p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+ t.put(p);
+
+ Delete d = new Delete(ROW1, now+3);
+ t.delete(d);
+ d = new Delete(ROW1);
+ d.addColumns(FAMILYA, QUAL, now+2);
+ t.delete(d);
+ }
+
+ String[] args = new String[] {
+ "-D" + Export.RAW_SCAN + "=true",
+ name.getMethodName(),
+ FQ_OUTPUT_DIR,
+ "1000", // max number of key versions per key to export
+ };
+ assertTrue(runExport(args));
+
+ final String IMPORT_TABLE = name.getMethodName() + "import";
+ desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
+ desc.addFamily(new HColumnDescriptor(FAMILYA)
+ .setMaxVersions(5)
+ .setKeepDeletedCells(KeepDeletedCells.TRUE)
+ );
+ UTIL.getAdmin().createTable(desc);
+ try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
+ args = new String[] {
+ IMPORT_TABLE,
+ FQ_OUTPUT_DIR
+ };
+ assertTrue(runImport(args));
+
+ Scan s = new Scan();
+ s.setMaxVersions();
+ s.setRaw(true);
+ ResultScanner scanner = t.getScanner(s);
+ Result r = scanner.next();
+ Cell[] res = r.rawCells();
+ assertTrue(CellUtil.isDeleteFamily(res[0]));
+ assertEquals(now+4, res[1].getTimestamp());
+ assertEquals(now+3, res[2].getTimestamp());
+ assertTrue(CellUtil.isDelete(res[3]));
+ assertEquals(now+2, res[4].getTimestamp());
+ assertEquals(now+1, res[5].getTimestamp());
+ assertEquals(now, res[6].getTimestamp());
+ }
+ }
+
+
+ @Test
+ public void testWithMultipleDeleteFamilyMarkersOfSameRowSameFamily() throws Exception {
+ final TableName exportTable = TableName.valueOf(name.getMethodName());
+ HTableDescriptor desc = new HTableDescriptor(exportTable);
+ desc.addFamily(new HColumnDescriptor(FAMILYA)
+ .setMaxVersions(5)
+ .setKeepDeletedCells(KeepDeletedCells.TRUE)
+ );
+ UTIL.getAdmin().createTable(desc);
+
+ Table exportT = UTIL.getConnection().getTable(exportTable);
+
+ //Add first version of QUAL
+ Put p = new Put(ROW1);
+ p.addColumn(FAMILYA, QUAL, now, QUAL);
+ exportT.put(p);
+
+ //Add Delete family marker
+ Delete d = new Delete(ROW1, now+3);
+ exportT.delete(d);
+
+ //Add second version of QUAL
+ p = new Put(ROW1);
+ p.addColumn(FAMILYA, QUAL, now + 5, "s".getBytes());
+ exportT.put(p);
+
+ //Add second Delete family marker
+ d = new Delete(ROW1, now+7);
+ exportT.delete(d);
+
+
+ String[] args = new String[] {
+ "-D" + Export.RAW_SCAN + "=true", exportTable.getNameAsString(),
+ FQ_OUTPUT_DIR,
+ "1000", // max number of key versions per key to export
+ };
+ assertTrue(runExport(args));
+
+ final String importTable = name.getMethodName() + "import";
+ desc = new HTableDescriptor(TableName.valueOf(importTable));
+ desc.addFamily(new HColumnDescriptor(FAMILYA)
+ .setMaxVersions(5)
+ .setKeepDeletedCells(KeepDeletedCells.TRUE)
+ );
+ UTIL.getAdmin().createTable(desc);
+
+ Table importT = UTIL.getConnection().getTable(TableName.valueOf(importTable));
+ args = new String[] {
+ importTable,
+ FQ_OUTPUT_DIR
+ };
+ assertTrue(runImport(args));
+
+ Scan s = new Scan();
+ s.setMaxVersions();
+ s.setRaw(true);
+
+ ResultScanner importedTScanner = importT.getScanner(s);
+ Result importedTResult = importedTScanner.next();
+
+ ResultScanner exportedTScanner = exportT.getScanner(s);
+ Result exportedTResult = exportedTScanner.next();
+ try {
+ Result.compareResults(exportedTResult, importedTResult);
+ } catch (Exception e) {
+ fail("Original and imported tables data comparision failed with error:"+e.getMessage());
+ } finally {
+ exportT.close();
+ importT.close();
+ }
+ }
+
+ /**
+ * Create a simple table, run an Export Job on it, Import with filtering on, verify counts,
+ * attempt with invalid values.
+ */
+ @Test
+ public void testWithFilter() throws Exception {
+ // Create simple table to export
+ HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
+ desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
+ UTIL.getAdmin().createTable(desc);
+ Table exportTable = UTIL.getConnection().getTable(desc.getTableName());
+
+ Put p1 = new Put(ROW1);
+ p1.addColumn(FAMILYA, QUAL, now, QUAL);
+ p1.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ p1.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ p1.addColumn(FAMILYA, QUAL, now + 3, QUAL);
+ p1.addColumn(FAMILYA, QUAL, now + 4, QUAL);
+
+ // Having another row would actually test the filter.
+ Put p2 = new Put(ROW2);
+ p2.addColumn(FAMILYA, QUAL, now, QUAL);
+
+ exportTable.put(Arrays.asList(p1, p2));
+
+ // Export the simple table
+ String[] args = new String[] { name.getMethodName(), FQ_OUTPUT_DIR, "1000" };
+ assertTrue(runExport(args));
+
+ // Import to a new table
+ final String IMPORT_TABLE = name.getMethodName() + "import";
+ desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
+ desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
+ UTIL.getAdmin().createTable(desc);
+
+ Table importTable = UTIL.getConnection().getTable(desc.getTableName());
+ args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
+ "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE,
+ FQ_OUTPUT_DIR,
+ "1000" };
+ assertTrue(runImport(args));
+
+ // get the count of the source table for that time range
+ PrefixFilter filter = new PrefixFilter(ROW1);
+ int count = getCount(exportTable, filter);
+
+ Assert.assertEquals("Unexpected row count between export and import tables", count,
+ getCount(importTable, null));
+
+ // and then test that a broken command doesn't bork everything - easier here because we don't
+ // need to re-run the export job
+
+ args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
+ "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", name.getMethodName(),
+ FQ_OUTPUT_DIR, "1000" };
+ assertFalse(runImport(args));
+
+ // cleanup
+ exportTable.close();
+ importTable.close();
+ }
+
+ /**
+ * Count the number of keyvalues in the specified table for the given timerange
+ * @param start
+ * @param end
+ * @param table
+ * @return
+ * @throws IOException
+ */
+ private int getCount(Table table, Filter filter) throws IOException {
+ Scan scan = new Scan();
+ scan.setFilter(filter);
+ ResultScanner results = table.getScanner(scan);
+ int count = 0;
+ for (Result res : results) {
+ count += res.size();
+ }
+ results.close();
+ return count;
+ }
+
+ /**
+ * test main method. Import should print help and call System.exit
+ */
+ @Test
+ public void testImportMain() throws Exception {
+ PrintStream oldPrintStream = System.err;
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ String[] args = {};
+ System.setErr(new PrintStream(data));
+ try {
+ System.setErr(new PrintStream(data));
+ Import.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ assertTrue(data.toString().contains("Wrong number of arguments:"));
+ assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
+ assertTrue(data.toString().contains("-Dimport.filter.class=<name of filter class>"));
+ assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
+ assertTrue(data.toString().contains("-Dmapreduce.reduce.speculative=false"));
+ } finally {
+ System.setErr(oldPrintStream);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+ }
+
+ /**
+ * test main method. Export should print help and call System.exit
+ */
+ @Test
+ public void testExportMain() throws Exception {
+ PrintStream oldPrintStream = System.err;
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ String[] args = {};
+ System.setErr(new PrintStream(data));
+ try {
+ System.setErr(new PrintStream(data));
+ Export.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ String errMsg = data.toString();
+ assertTrue(errMsg.contains("Wrong number of arguments:"));
+ assertTrue(errMsg.contains(
+ "Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+ "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]"));
+ assertTrue(
+ errMsg.contains("-D hbase.mapreduce.scan.column.family=<family1>,<family2>, ..."));
+ assertTrue(errMsg.contains("-D hbase.mapreduce.include.deleted.rows=true"));
+ assertTrue(errMsg.contains("-Dhbase.client.scanner.caching=100"));
+ assertTrue(errMsg.contains("-Dmapreduce.map.speculative=false"));
+ assertTrue(errMsg.contains("-Dmapreduce.reduce.speculative=false"));
+ assertTrue(errMsg.contains("-Dhbase.export.scanner.batch=10"));
+ } finally {
+ System.setErr(oldPrintStream);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+ }
+
+ /**
+ * Test map method of Importer
+ */
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ @Test
+ public void testKeyValueImporter() throws Exception {
+ KeyValueImporter importer = new KeyValueImporter();
+ Configuration configuration = new Configuration();
+ Context ctx = mock(Context.class);
+ when(ctx.getConfiguration()).thenReturn(configuration);
+
+ doAnswer(new Answer<Void>() {
+
+ @Override
+ public Void answer(InvocationOnMock invocation) throws Throwable {
+ ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
+ KeyValue key = (KeyValue) invocation.getArguments()[1];
+ assertEquals("Key", Bytes.toString(writer.get()));
+ assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
+ return null;
+ }
+ }).when(ctx).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
+
+ importer.setup(ctx);
+ Result value = mock(Result.class);
+ KeyValue[] keys = {
+ new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
+ Bytes.toBytes("value")),
+ new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
+ Bytes.toBytes("value1")) };
+ when(value.rawCells()).thenReturn(keys);
+ importer.map(new ImmutableBytesWritable(Bytes.toBytes("Key")), value, ctx);
+
+ }
+
+ /**
+ * Test addFilterAndArguments method of Import This method set couple
+ * parameters into Configuration
+ */
+ @Test
+ public void testAddFilterAndArguments() throws IOException {
+ Configuration configuration = new Configuration();
+
+ List<String> args = new ArrayList<>();
+ args.add("param1");
+ args.add("param2");
+
+ Import.addFilterAndArguments(configuration, FilterBase.class, args);
+ assertEquals("org.apache.hadoop.hbase.filter.FilterBase",
+ configuration.get(Import.FILTER_CLASS_CONF_KEY));
+ assertEquals("param1,param2", configuration.get(Import.FILTER_ARGS_CONF_KEY));
+ }
+
+ @Test
+ public void testDurability() throws Exception {
+ // Create an export table.
+ String exportTableName = name.getMethodName() + "export";
+ try (Table exportTable = UTIL.createTable(TableName.valueOf(exportTableName), FAMILYA, 3);) {
+
+ // Insert some data
+ Put put = new Put(ROW1);
+ put.addColumn(FAMILYA, QUAL, now, QUAL);
+ put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ exportTable.put(put);
+
+ put = new Put(ROW2);
+ put.addColumn(FAMILYA, QUAL, now, QUAL);
+ put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+ put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+ exportTable.put(put);
+
+ // Run the export
+ String[] args = new String[] { exportTableName, FQ_OUTPUT_DIR, "1000"};
+ assertTrue(runExport(args));
+
+ // Create the table for import
+ String importTableName = name.getMethodName() + "import1";
+ Table importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
+
+ // Register the wal listener for the import table
+ HRegionInfo region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
+ .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
+ TableWALActionListener walListener = new TableWALActionListener(region);
+ WAL wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
+ wal.registerWALActionsListener(walListener);
+
+ // Run the import with SKIP_WAL
+ args =
+ new String[] { "-D" + Import.WAL_DURABILITY + "=" + Durability.SKIP_WAL.name(),
+ importTableName, FQ_OUTPUT_DIR };
+ assertTrue(runImport(args));
+ //Assert that the wal is not visisted
+ assertTrue(!walListener.isWALVisited());
+ //Ensure that the count is 2 (only one version of key value is obtained)
+ assertTrue(getCount(importTable, null) == 2);
+
+ // Run the import with the default durability option
+ importTableName = name.getMethodName() + "import2";
+ importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
+ region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
+ .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
+ wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
+ walListener = new TableWALActionListener(region);
+ wal.registerWALActionsListener(walListener);
+ args = new String[] { importTableName, FQ_OUTPUT_DIR };
+ assertTrue(runImport(args));
+ //Assert that the wal is visisted
+ assertTrue(walListener.isWALVisited());
+ //Ensure that the count is 2 (only one version of key value is obtained)
+ assertTrue(getCount(importTable, null) == 2);
+ }
+ }
+
+ /**
+ * This listens to the {@link #visitLogEntryBeforeWrite(HRegionInfo, WALKey, WALEdit)} to
+ * identify that an entry is written to the Write Ahead Log for the given table.
+ */
+ private static class TableWALActionListener extends WALActionsListener.Base {
+
+ private HRegionInfo regionInfo;
+ private boolean isVisited = false;
+
+ public TableWALActionListener(HRegionInfo region) {
+ this.regionInfo = region;
+ }
+
+ @Override
+ public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) {
+ if (logKey.getTablename().getNameAsString().equalsIgnoreCase(
+ this.regionInfo.getTable().getNameAsString()) && (!logEdit.isMetaEdit())) {
+ isVisited = true;
+ }
+ }
+
+ public boolean isWALVisited() {
+ return isVisited;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
new file mode 100644
index 0000000..7d6d74f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithOperationAttributes implements Configurable {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private static final Log LOG = LogFactory.getLog(TestImportTSVWithOperationAttributes.class);
+ protected static final String NAME = TestImportTsv.class.getSimpleName();
+ protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+ /**
+ * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+ * false.
+ */
+ protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+ /**
+ * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+ */
+ protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+ private static Configuration conf;
+
+ private static final String TEST_ATR_KEY = "test";
+
+ private final String FAMILY = "FAM";
+
+ @Rule
+ public TestName name = new TestName();
+
+ public Configuration getConf() {
+ return util.getConfiguration();
+ }
+
+ public void setConf(Configuration conf) {
+ throw new IllegalArgumentException("setConf not supported");
+ }
+
+ @BeforeClass
+ public static void provisionCluster() throws Exception {
+ conf = util.getConfiguration();
+ conf.set("hbase.coprocessor.master.classes", OperationAttributesTestController.class.getName());
+ conf.set("hbase.coprocessor.region.classes", OperationAttributesTestController.class.getName());
+ util.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void releaseCluster() throws Exception {
+ util.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testMROnTable() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest=>myvalue\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1, true);
+ util.deleteTable(tableName);
+ }
+
+ @Test
+ public void testMROnTableWithInvalidOperationAttr() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest1=>myvalue\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1, false);
+ util.deleteTable(tableName);
+ }
+
+ /**
+ * Run an ImportTsv job and perform basic validation on the results. Returns
+ * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
+ * for further validation as necessary. This method is static to insure
+ * non-reliance on instance's util/conf facilities.
+ *
+ * @param args
+ * Any arguments to pass BEFORE inputFile path is appended.
+ * @param dataAvailable
+ * @return The Tool instance used to run the test.
+ */
+ private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
+ int valueMultiplier, boolean dataAvailable) throws Exception {
+ String table = args[args.length - 1];
+ Configuration conf = new Configuration(util.getConfiguration());
+
+ // populate input file
+ FileSystem fs = FileSystem.get(conf);
+ Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
+ FSDataOutputStream op = fs.create(inputPath, true);
+ op.write(Bytes.toBytes(data));
+ op.close();
+ LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+ if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+ LOG.debug("Forcing combiner.");
+ conf.setInt("mapreduce.map.combine.minspills", 1);
+ }
+
+ // run the import
+ List<String> argv = new ArrayList<>(Arrays.asList(args));
+ argv.add(inputPath.toString());
+ Tool tool = new ImportTsv();
+ LOG.debug("Running ImportTsv with arguments: " + argv);
+ assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+
+ validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);
+
+ if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+ LOG.debug("Deleting test subdirectory");
+ util.cleanupDataTestDirOnTestFS(table);
+ }
+ return tool;
+ }
+
+ /**
+ * Confirm ImportTsv via data in online table.
+ *
+ * @param dataAvailable
+ */
+ private static void validateTable(Configuration conf, TableName tableName, String family,
+ int valueMultiplier, boolean dataAvailable) throws IOException {
+
+ LOG.debug("Validating table.");
+ Connection connection = ConnectionFactory.createConnection(conf);
+ Table table = connection.getTable(tableName);
+ boolean verified = false;
+ long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ Scan scan = new Scan();
+ // Scan entire family.
+ scan.addFamily(Bytes.toBytes(family));
+ if (dataAvailable) {
+ ResultScanner resScanner = table.getScanner(scan);
+ for (Result res : resScanner) {
+ LOG.debug("Getting results " + res.size());
+ assertTrue(res.size() == 2);
+ List<Cell> kvs = res.listCells();
+ assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+ assertTrue(CellUtil.matchingValue(kvs.get(1),
+ Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+ // Only one result set is expected, so let it loop.
+ verified = true;
+ }
+ } else {
+ ResultScanner resScanner = table.getScanner(scan);
+ Result[] next = resScanner.next(2);
+ assertEquals(0, next.length);
+ verified = true;
+ }
+
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ table.close();
+ connection.close();
+ assertTrue(verified);
+ }
+
+ public static class OperationAttributesTestController implements RegionObserver {
+
+ @Override
+ public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
+ Durability durability) throws IOException {
+ Region region = e.getEnvironment().getRegion();
+ if (!region.getRegionInfo().isMetaTable()
+ && !region.getRegionInfo().getTable().isSystemTable()) {
+ if (put.getAttribute(TEST_ATR_KEY) != null) {
+ LOG.debug("allow any put to happen " + region.getRegionInfo().getRegionNameAsString());
+ } else {
+ e.bypass();
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
new file mode 100644
index 0000000..4ab3d29
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithTTLs implements Configurable {
+
+ protected static final Log LOG = LogFactory.getLog(TestImportTSVWithTTLs.class);
+ protected static final String NAME = TestImportTsv.class.getSimpleName();
+ protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+ /**
+ * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+ * false.
+ */
+ protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+ /**
+ * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+ */
+ protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+ private final String FAMILY = "FAM";
+ private static Configuration conf;
+
+ @Rule
+ public TestName name = new TestName();
+
+ @Override
+ public Configuration getConf() {
+ return util.getConfiguration();
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ throw new IllegalArgumentException("setConf not supported");
+ }
+
+ @BeforeClass
+ public static void provisionCluster() throws Exception {
+ conf = util.getConfiguration();
+ // We don't check persistence in HFiles in this test, but if we ever do we will
+ // need this where the default hfile version is not 3 (i.e. 0.98)
+ conf.setInt("hfile.format.version", 3);
+ conf.set("hbase.coprocessor.region.classes", TTLCheckingObserver.class.getName());
+ util.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void releaseCluster() throws Exception {
+ util.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testMROnTable() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_TTL",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001b1000000\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1);
+ util.deleteTable(tableName);
+ }
+
+ protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+ String[] args, int valueMultiplier) throws Exception {
+ TableName table = TableName.valueOf(args[args.length - 1]);
+ Configuration conf = new Configuration(util.getConfiguration());
+
+ // populate input file
+ FileSystem fs = FileSystem.get(conf);
+ Path inputPath = fs.makeQualified(new Path(util
+ .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+ FSDataOutputStream op = fs.create(inputPath, true);
+ op.write(Bytes.toBytes(data));
+ op.close();
+ LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+ if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+ LOG.debug("Forcing combiner.");
+ conf.setInt("mapreduce.map.combine.minspills", 1);
+ }
+
+ // run the import
+ List<String> argv = new ArrayList<>(Arrays.asList(args));
+ argv.add(inputPath.toString());
+ Tool tool = new ImportTsv();
+ LOG.debug("Running ImportTsv with arguments: " + argv);
+ try {
+ // Job will fail if observer rejects entries without TTL
+ assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+ } finally {
+ // Clean up
+ if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+ LOG.debug("Deleting test subdirectory");
+ util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+ }
+ }
+
+ return tool;
+ }
+
+ public static class TTLCheckingObserver implements RegionObserver {
+
+ @Override
+ public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
+ Durability durability) throws IOException {
+ Region region = e.getEnvironment().getRegion();
+ if (!region.getRegionInfo().isMetaTable()
+ && !region.getRegionInfo().getTable().isSystemTable()) {
+ // The put carries the TTL attribute
+ if (put.getTTL() != Long.MAX_VALUE) {
+ return;
+ }
+ throw new IOException("Operation does not have TTL set");
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
new file mode 100644
index 0000000..8967ac7
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
@@ -0,0 +1,495 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.visibility.Authorizations;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
+import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
+import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
+import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
+import org.apache.hadoop.hbase.security.visibility.VisibilityController;
+import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestImportTSVWithVisibilityLabels implements Configurable {
+
+ private static final Log LOG = LogFactory.getLog(TestImportTSVWithVisibilityLabels.class);
+ protected static final String NAME = TestImportTsv.class.getSimpleName();
+ protected static HBaseTestingUtility util = new HBaseTestingUtility();
+
+ /**
+ * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
+ * false.
+ */
+ protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
+
+ /**
+ * Force use of combiner in doMROnTableTest. Boolean. Default is true.
+ */
+ protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
+
+ private final String FAMILY = "FAM";
+ private final static String TOPSECRET = "topsecret";
+ private final static String PUBLIC = "public";
+ private final static String PRIVATE = "private";
+ private final static String CONFIDENTIAL = "confidential";
+ private final static String SECRET = "secret";
+ private static User SUPERUSER;
+ private static Configuration conf;
+
+ @Rule
+ public TestName name = new TestName();
+
+ @Override
+ public Configuration getConf() {
+ return util.getConfiguration();
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ throw new IllegalArgumentException("setConf not supported");
+ }
+
+ @BeforeClass
+ public static void provisionCluster() throws Exception {
+ conf = util.getConfiguration();
+ SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
+ conf.set("hbase.superuser", "admin,"+User.getCurrent().getName());
+ conf.setInt("hfile.format.version", 3);
+ conf.set("hbase.coprocessor.master.classes", VisibilityController.class.getName());
+ conf.set("hbase.coprocessor.region.classes", VisibilityController.class.getName());
+ conf.setClass(VisibilityUtils.VISIBILITY_LABEL_GENERATOR_CLASS, SimpleScanLabelGenerator.class,
+ ScanLabelGenerator.class);
+ util.startMiniCluster();
+ // Wait for the labels table to become available
+ util.waitTableEnabled(VisibilityConstants.LABELS_TABLE_NAME.getName(), 50000);
+ createLabels();
+ }
+
+ private static void createLabels() throws IOException, InterruptedException {
+ PrivilegedExceptionAction<VisibilityLabelsResponse> action =
+ new PrivilegedExceptionAction<VisibilityLabelsResponse>() {
+ @Override
+ public VisibilityLabelsResponse run() throws Exception {
+ String[] labels = { SECRET, TOPSECRET, CONFIDENTIAL, PUBLIC, PRIVATE };
+ try (Connection conn = ConnectionFactory.createConnection(conf)) {
+ VisibilityClient.addLabels(conn, labels);
+ LOG.info("Added labels ");
+ } catch (Throwable t) {
+ LOG.error("Error in adding labels" , t);
+ throw new IOException(t);
+ }
+ return null;
+ }
+ };
+ SUPERUSER.runAs(action);
+ }
+
+ @AfterClass
+ public static void releaseCluster() throws Exception {
+ util.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testMROnTable() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1);
+ util.deleteTable(tableName);
+ }
+
+ @Test
+ public void testMROnTableWithDeletes() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1);
+ issueDeleteAndVerifyData(tableName);
+ util.deleteTable(tableName);
+ }
+
+ private void issueDeleteAndVerifyData(TableName tableName) throws IOException {
+ LOG.debug("Validating table after delete.");
+ Table table = util.getConnection().getTable(tableName);
+ boolean verified = false;
+ long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ Delete d = new Delete(Bytes.toBytes("KEY"));
+ d.addFamily(Bytes.toBytes(FAMILY));
+ d.setCellVisibility(new CellVisibility("private&secret"));
+ table.delete(d);
+
+ Scan scan = new Scan();
+ // Scan entire family.
+ scan.addFamily(Bytes.toBytes(FAMILY));
+ scan.setAuthorizations(new Authorizations("secret", "private"));
+ ResultScanner resScanner = table.getScanner(scan);
+ Result[] next = resScanner.next(5);
+ assertEquals(0, next.length);
+ verified = true;
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ table.close();
+ assertTrue(verified);
+ }
+
+ @Test
+ public void testMROnTableWithBulkload() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+ "-D" + ImportTsv.COLUMNS_CONF_KEY
+ + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1);
+ util.deleteTable(tableName);
+ }
+
+ @Test
+ public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
+ final TableName table = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+ String FAMILY = "FAM";
+ Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table.getNameAsString()),"hfiles");
+ // Prepare the arguments required for the test.
+ String[] args =
+ new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+ "-D" + ImportTsv.COLUMNS_CONF_KEY
+ + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
+ table.getNameAsString()
+ };
+ String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
+ doMROnTableTest(util, FAMILY, data, args, 4);
+ util.deleteTable(table);
+ }
+
+ @Test
+ public void testMRWithOutputFormat() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+ // Prepare the arguments required for the test.
+ String[] args = new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+ String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1);
+ util.deleteTable(tableName);
+ }
+
+ @Test
+ public void testBulkOutputWithInvalidLabels() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+ // Prepare the arguments required for the test.
+ String[] args =
+ new String[] { "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+
+ // 2 Data rows, one with valid label and one with invalid label
+ String data =
+ "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1, 2);
+ util.deleteTable(tableName);
+ }
+
+ @Test
+ public void testBulkOutputWithTsvImporterTextMapperWithInvalidLabels() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
+ Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
+ // Prepare the arguments required for the test.
+ String[] args =
+ new String[] {
+ "-D" + ImportTsv.MAPPER_CONF_KEY
+ + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
+ "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
+ "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
+ "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
+
+ // 2 Data rows, one with valid label and one with invalid label
+ String data =
+ "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
+ util.createTable(tableName, FAMILY);
+ doMROnTableTest(util, FAMILY, data, args, 1, 2);
+ util.deleteTable(tableName);
+ }
+
+ protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+ String[] args, int valueMultiplier) throws Exception {
+ return doMROnTableTest(util, family, data, args, valueMultiplier, -1);
+ }
+
+ /**
+ * Run an ImportTsv job and perform basic validation on the results. Returns
+ * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
+ * for further validation as necessary. This method is static to insure
+ * non-reliance on instance's util/conf facilities.
+ *
+ * @param args
+ * Any arguments to pass BEFORE inputFile path is appended.
+ *
+ * @param expectedKVCount Expected KV count. pass -1 to skip the kvcount check
+ *
+ * @return The Tool instance used to run the test.
+ */
+ protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
+ String[] args, int valueMultiplier,int expectedKVCount) throws Exception {
+ TableName table = TableName.valueOf(args[args.length - 1]);
+ Configuration conf = new Configuration(util.getConfiguration());
+
+ // populate input file
+ FileSystem fs = FileSystem.get(conf);
+ Path inputPath = fs.makeQualified(new Path(util
+ .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
+ FSDataOutputStream op = fs.create(inputPath, true);
+ if (data == null) {
+ data = "KEY\u001bVALUE1\u001bVALUE2\n";
+ }
+ op.write(Bytes.toBytes(data));
+ op.close();
+ LOG.debug(String.format("Wrote test data to file: %s", inputPath));
+
+ if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
+ LOG.debug("Forcing combiner.");
+ conf.setInt("mapreduce.map.combine.minspills", 1);
+ }
+
+ // run the import
+ List<String> argv = new ArrayList<>(Arrays.asList(args));
+ argv.add(inputPath.toString());
+ Tool tool = new ImportTsv();
+ LOG.debug("Running ImportTsv with arguments: " + argv);
+ assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
+
+ // Perform basic validation. If the input args did not include
+ // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
+ // Otherwise, validate presence of hfiles.
+ boolean createdHFiles = false;
+ String outputPath = null;
+ for (String arg : argv) {
+ if (arg.contains(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
+ createdHFiles = true;
+ // split '-Dfoo=bar' on '=' and keep 'bar'
+ outputPath = arg.split("=")[1];
+ break;
+ }
+ }
+ LOG.debug("validating the table " + createdHFiles);
+ if (createdHFiles)
+ validateHFiles(fs, outputPath, family,expectedKVCount);
+ else
+ validateTable(conf, table, family, valueMultiplier);
+
+ if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
+ LOG.debug("Deleting test subdirectory");
+ util.cleanupDataTestDirOnTestFS(table.getNameAsString());
+ }
+ return tool;
+ }
+
+ /**
+ * Confirm ImportTsv via HFiles on fs.
+ */
+ private static void validateHFiles(FileSystem fs, String outputPath, String family,
+ int expectedKVCount) throws IOException {
+
+ // validate number and content of output columns
+ LOG.debug("Validating HFiles.");
+ Set<String> configFamilies = new HashSet<>();
+ configFamilies.add(family);
+ Set<String> foundFamilies = new HashSet<>();
+ int actualKVCount = 0;
+ for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
+ LOG.debug("The output path has files");
+ String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
+ String cf = elements[elements.length - 1];
+ foundFamilies.add(cf);
+ assertTrue(String.format(
+ "HFile ouput contains a column family (%s) not present in input families (%s)", cf,
+ configFamilies), configFamilies.contains(cf));
+ for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
+ assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),
+ hfile.getLen() > 0);
+ if (expectedKVCount > -1) {
+ actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
+ }
+ }
+ }
+ if (expectedKVCount > -1) {
+ assertTrue(String.format(
+ "KV count in output hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
+ expectedKVCount), actualKVCount == expectedKVCount);
+ }
+ }
+
+ /**
+ * Confirm ImportTsv via data in online table.
+ */
+ private static void validateTable(Configuration conf, TableName tableName, String family,
+ int valueMultiplier) throws IOException {
+
+ LOG.debug("Validating table.");
+ Table table = util.getConnection().getTable(tableName);
+ boolean verified = false;
+ long pause = conf.getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ Scan scan = new Scan();
+ // Scan entire family.
+ scan.addFamily(Bytes.toBytes(family));
+ scan.setAuthorizations(new Authorizations("secret","private"));
+ ResultScanner resScanner = table.getScanner(scan);
+ Result[] next = resScanner.next(5);
+ assertEquals(1, next.length);
+ for (Result res : resScanner) {
+ LOG.debug("Getting results " + res.size());
+ assertTrue(res.size() == 2);
+ List<Cell> kvs = res.listCells();
+ assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
+ assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
+ assertTrue(CellUtil.matchingValue(kvs.get(1),
+ Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
+ // Only one result set is expected, so let it loop.
+ }
+ verified = true;
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ table.close();
+ assertTrue(verified);
+ }
+
+ /**
+ * Method returns the total KVs in given hfile
+ * @param fs File System
+ * @param p HFile path
+ * @return KV count in the given hfile
+ * @throws IOException
+ */
+ private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
+ Configuration conf = util.getConfiguration();
+ HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
+ reader.loadFileInfo();
+ HFileScanner scanner = reader.getScanner(false, false);
+ scanner.seekTo();
+ int count = 0;
+ do {
+ count++;
+ } while (scanner.next());
+ reader.close();
+ return count;
+ }
+
+}
[13/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
deleted file mode 100644
index bf11473..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
-import org.apache.hadoop.hbase.client.IsolationLevel;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
-import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
-import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.Writable;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.UUID;
-
-/**
- * Hadoop MR API-agnostic implementation for mapreduce over table snapshots.
- */
-@InterfaceAudience.Private
-@InterfaceStability.Evolving
-public class TableSnapshotInputFormatImpl {
- // TODO: Snapshots files are owned in fs by the hbase user. There is no
- // easy way to delegate access.
-
- public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
-
- private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
- // key for specifying the root dir of the restored snapshot
- protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
-
- /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
- private static final String LOCALITY_CUTOFF_MULTIPLIER =
- "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
- private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
-
- /**
- * Implementation class for InputSplit logic common between mapred and mapreduce.
- */
- public static class InputSplit implements Writable {
-
- private TableDescriptor htd;
- private HRegionInfo regionInfo;
- private String[] locations;
- private String scan;
- private String restoreDir;
-
- // constructor for mapreduce framework / Writable
- public InputSplit() {}
-
- public InputSplit(TableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
- Scan scan, Path restoreDir) {
- this.htd = htd;
- this.regionInfo = regionInfo;
- if (locations == null || locations.isEmpty()) {
- this.locations = new String[0];
- } else {
- this.locations = locations.toArray(new String[locations.size()]);
- }
- try {
- this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
- } catch (IOException e) {
- LOG.warn("Failed to convert Scan to String", e);
- }
-
- this.restoreDir = restoreDir.toString();
- }
-
- public TableDescriptor getHtd() {
- return htd;
- }
-
- public String getScan() {
- return scan;
- }
-
- public String getRestoreDir() {
- return restoreDir;
- }
-
- public long getLength() {
- //TODO: We can obtain the file sizes of the snapshot here.
- return 0;
- }
-
- public String[] getLocations() {
- return locations;
- }
-
- public TableDescriptor getTableDescriptor() {
- return htd;
- }
-
- public HRegionInfo getRegionInfo() {
- return regionInfo;
- }
-
- // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
- // doing this wrapping with Writables.
- @Override
- public void write(DataOutput out) throws IOException {
- TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
- .setTable(ProtobufUtil.toTableSchema(htd))
- .setRegion(HRegionInfo.convert(regionInfo));
-
- for (String location : locations) {
- builder.addLocations(location);
- }
-
- TableSnapshotRegionSplit split = builder.build();
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- split.writeTo(baos);
- baos.close();
- byte[] buf = baos.toByteArray();
- out.writeInt(buf.length);
- out.write(buf);
-
- Bytes.writeByteArray(out, Bytes.toBytes(scan));
- Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
-
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- int len = in.readInt();
- byte[] buf = new byte[len];
- in.readFully(buf);
- TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
- this.htd = ProtobufUtil.toTableDescriptor(split.getTable());
- this.regionInfo = HRegionInfo.convert(split.getRegion());
- List<String> locationsList = split.getLocationsList();
- this.locations = locationsList.toArray(new String[locationsList.size()]);
-
- this.scan = Bytes.toString(Bytes.readByteArray(in));
- this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
- }
- }
-
- /**
- * Implementation class for RecordReader logic common between mapred and mapreduce.
- */
- public static class RecordReader {
- private InputSplit split;
- private Scan scan;
- private Result result = null;
- private ImmutableBytesWritable row = null;
- private ClientSideRegionScanner scanner;
-
- public ClientSideRegionScanner getScanner() {
- return scanner;
- }
-
- public void initialize(InputSplit split, Configuration conf) throws IOException {
- this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
- this.split = split;
- TableDescriptor htd = split.htd;
- HRegionInfo hri = this.split.getRegionInfo();
- FileSystem fs = FSUtils.getCurrentFileSystem(conf);
-
-
- // region is immutable, this should be fine,
- // otherwise we have to set the thread read point
- scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
- // disable caching of data blocks
- scan.setCacheBlocks(false);
-
- scanner =
- new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
- }
-
- public boolean nextKeyValue() throws IOException {
- result = scanner.next();
- if (result == null) {
- //we are done
- return false;
- }
-
- if (this.row == null) {
- this.row = new ImmutableBytesWritable();
- }
- this.row.set(result.getRow());
- return true;
- }
-
- public ImmutableBytesWritable getCurrentKey() {
- return row;
- }
-
- public Result getCurrentValue() {
- return result;
- }
-
- public long getPos() {
- return 0;
- }
-
- public float getProgress() {
- return 0; // TODO: use total bytes to estimate
- }
-
- public void close() {
- if (this.scanner != null) {
- this.scanner.close();
- }
- }
- }
-
- public static List<InputSplit> getSplits(Configuration conf) throws IOException {
- String snapshotName = getSnapshotName(conf);
-
- Path rootDir = FSUtils.getRootDir(conf);
- FileSystem fs = rootDir.getFileSystem(conf);
-
- SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
-
- List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
-
- // TODO: mapred does not support scan as input API. Work around for now.
- Scan scan = extractScanFromConf(conf);
- // the temp dir where the snapshot is restored
- Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
-
- return getSplits(scan, manifest, regionInfos, restoreDir, conf);
- }
-
- public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
- List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
- if (regionManifests == null) {
- throw new IllegalArgumentException("Snapshot seems empty");
- }
-
- List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
-
- for (SnapshotRegionManifest regionManifest : regionManifests) {
- HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
- if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
- continue;
- }
- regionInfos.add(hri);
- }
- return regionInfos;
- }
-
- public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
- Path rootDir, FileSystem fs) throws IOException {
- Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
- SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
- return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
- }
-
- public static Scan extractScanFromConf(Configuration conf) throws IOException {
- Scan scan = null;
- if (conf.get(TableInputFormat.SCAN) != null) {
- scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
- } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
- String[] columns =
- conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
- scan = new Scan();
- for (String col : columns) {
- scan.addFamily(Bytes.toBytes(col));
- }
- } else {
- throw new IllegalArgumentException("Unable to create scan");
- }
- return scan;
- }
-
- public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
- List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
- // load table descriptor
- TableDescriptor htd = manifest.getTableDescriptor();
-
- Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
-
- List<InputSplit> splits = new ArrayList<>();
- for (HRegionInfo hri : regionManifests) {
- // load region descriptor
-
- if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
- hri.getEndKey())) {
- // compute HDFS locations from snapshot files (which will get the locations for
- // referred hfiles)
- List<String> hosts = getBestLocations(conf,
- HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
-
- int len = Math.min(3, hosts.size());
- hosts = hosts.subList(0, len);
- splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
- }
- }
-
- return splits;
-
- }
-
- /**
- * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
- * weights into account, thus will treat every location passed from the input split as equal. We
- * do not want to blindly pass all the locations, since we are creating one split per region, and
- * the region's blocks are all distributed throughout the cluster unless favorite node assignment
- * is used. On the expected stable case, only one location will contain most of the blocks as
- * local.
- * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
- * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
- * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
- * host with the best locality.
- */
- public static List<String> getBestLocations(
- Configuration conf, HDFSBlocksDistribution blockDistribution) {
- List<String> locations = new ArrayList<>(3);
-
- HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
-
- if (hostAndWeights.length == 0) {
- return locations;
- }
-
- HostAndWeight topHost = hostAndWeights[0];
- locations.add(topHost.getHost());
-
- // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
- double cutoffMultiplier
- = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
-
- double filterWeight = topHost.getWeight() * cutoffMultiplier;
-
- for (int i = 1; i < hostAndWeights.length; i++) {
- if (hostAndWeights[i].getWeight() >= filterWeight) {
- locations.add(hostAndWeights[i].getHost());
- } else {
- break;
- }
- }
-
- return locations;
- }
-
- private static String getSnapshotName(Configuration conf) {
- String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
- if (snapshotName == null) {
- throw new IllegalArgumentException("Snapshot name must be provided");
- }
- return snapshotName;
- }
-
- /**
- * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
- * @param conf the job to configuration
- * @param snapshotName the name of the snapshot to read from
- * @param restoreDir a temporary directory to restore the snapshot into. Current user should
- * have write permissions to this directory, and this should not be a subdirectory of rootdir.
- * After the job is finished, restoreDir can be deleted.
- * @throws IOException if an error occurs
- */
- public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
- throws IOException {
- conf.set(SNAPSHOT_NAME_KEY, snapshotName);
-
- Path rootDir = FSUtils.getRootDir(conf);
- FileSystem fs = rootDir.getFileSystem(conf);
-
- restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
-
- // TODO: restore from record readers to parallelize.
- RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
-
- conf.set(RESTORE_DIR_KEY, restoreDir.toString());
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
deleted file mode 100644
index 13c7c67..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
+++ /dev/null
@@ -1,395 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-import org.apache.hadoop.mapreduce.InputSplit;
-
-/**
- * A table split corresponds to a key range (low, high) and an optional scanner.
- * All references to row below refer to the key of the row.
- */
-@InterfaceAudience.Public
-public class TableSplit extends InputSplit
-implements Writable, Comparable<TableSplit> {
- /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
- @Deprecated
- public static final Log LOG = LogFactory.getLog(TableSplit.class);
-
- // should be < 0 (@see #readFields(DataInput))
- // version 1 supports Scan data member
- enum Version {
- UNVERSIONED(0),
- // Initial number we put on TableSplit when we introduced versioning.
- INITIAL(-1),
- // Added an encoded region name field for easier identification of split -> region
- WITH_ENCODED_REGION_NAME(-2);
-
- final int code;
- static final Version[] byCode;
- static {
- byCode = Version.values();
- for (int i = 0; i < byCode.length; i++) {
- if (byCode[i].code != -1 * i) {
- throw new AssertionError("Values in this enum should be descending by one");
- }
- }
- }
-
- Version(int code) {
- this.code = code;
- }
-
- boolean atLeast(Version other) {
- return code <= other.code;
- }
-
- static Version fromCode(int code) {
- return byCode[code * -1];
- }
- }
-
- private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
- private TableName tableName;
- private byte [] startRow;
- private byte [] endRow;
- private String regionLocation;
- private String encodedRegionName = "";
- private String scan = ""; // stores the serialized form of the Scan
- private long length; // Contains estimation of region size in bytes
-
- /** Default constructor. */
- public TableSplit() {
- this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
- HConstants.EMPTY_BYTE_ARRAY, "");
- }
-
- /**
- * Creates a new instance while assigning all variables.
- * Length of region is set to 0
- * Encoded name of the region is set to blank
- *
- * @param tableName The name of the current table.
- * @param scan The scan associated with this split.
- * @param startRow The start row of the split.
- * @param endRow The end row of the split.
- * @param location The location of the region.
- */
- public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
- final String location) {
- this(tableName, scan, startRow, endRow, location, 0L);
- }
-
- /**
- * Creates a new instance while assigning all variables.
- * Encoded name of region is set to blank
- *
- * @param tableName The name of the current table.
- * @param scan The scan associated with this split.
- * @param startRow The start row of the split.
- * @param endRow The end row of the split.
- * @param location The location of the region.
- */
- public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
- final String location, long length) {
- this(tableName, scan, startRow, endRow, location, "", length);
- }
-
- /**
- * Creates a new instance while assigning all variables.
- *
- * @param tableName The name of the current table.
- * @param scan The scan associated with this split.
- * @param startRow The start row of the split.
- * @param endRow The end row of the split.
- * @param encodedRegionName The region ID.
- * @param location The location of the region.
- */
- public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
- final String location, final String encodedRegionName, long length) {
- this.tableName = tableName;
- try {
- this.scan =
- (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
- } catch (IOException e) {
- LOG.warn("Failed to convert Scan to String", e);
- }
- this.startRow = startRow;
- this.endRow = endRow;
- this.regionLocation = location;
- this.encodedRegionName = encodedRegionName;
- this.length = length;
- }
-
- /**
- * Creates a new instance without a scanner.
- * Length of region is set to 0
- *
- * @param tableName The name of the current table.
- * @param startRow The start row of the split.
- * @param endRow The end row of the split.
- * @param location The location of the region.
- */
- public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
- final String location) {
- this(tableName, null, startRow, endRow, location);
- }
-
- /**
- * Creates a new instance without a scanner.
- *
- * @param tableName The name of the current table.
- * @param startRow The start row of the split.
- * @param endRow The end row of the split.
- * @param location The location of the region.
- * @param length Size of region in bytes
- */
- public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
- final String location, long length) {
- this(tableName, null, startRow, endRow, location, length);
- }
-
- /**
- * Returns a Scan object from the stored string representation.
- *
- * @return Returns a Scan object based on the stored scanner.
- * @throws IOException
- */
- public Scan getScan() throws IOException {
- return TableMapReduceUtil.convertStringToScan(this.scan);
- }
-
- /**
- * Returns the table name converted to a byte array.
- * @see #getTable()
- * @return The table name.
- */
- public byte [] getTableName() {
- return tableName.getName();
- }
-
- /**
- * Returns the table name.
- *
- * @return The table name.
- */
- public TableName getTable() {
- // It is ugly that usually to get a TableName, the method is called getTableName. We can't do
- // that in here though because there was an existing getTableName in place already since
- // deprecated.
- return tableName;
- }
-
- /**
- * Returns the start row.
- *
- * @return The start row.
- */
- public byte [] getStartRow() {
- return startRow;
- }
-
- /**
- * Returns the end row.
- *
- * @return The end row.
- */
- public byte [] getEndRow() {
- return endRow;
- }
-
- /**
- * Returns the region location.
- *
- * @return The region's location.
- */
- public String getRegionLocation() {
- return regionLocation;
- }
-
- /**
- * Returns the region's location as an array.
- *
- * @return The array containing the region location.
- * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
- */
- @Override
- public String[] getLocations() {
- return new String[] {regionLocation};
- }
-
- /**
- * Returns the region's encoded name.
- *
- * @return The region's encoded name.
- */
- public String getEncodedRegionName() {
- return encodedRegionName;
- }
-
- /**
- * Returns the length of the split.
- *
- * @return The length of the split.
- * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
- */
- @Override
- public long getLength() {
- return length;
- }
-
- /**
- * Reads the values of each field.
- *
- * @param in The input to read from.
- * @throws IOException When reading the input fails.
- */
- @Override
- public void readFields(DataInput in) throws IOException {
- Version version = Version.UNVERSIONED;
- // TableSplit was not versioned in the beginning.
- // In order to introduce it now, we make use of the fact
- // that tableName was written with Bytes.writeByteArray,
- // which encodes the array length as a vint which is >= 0.
- // Hence if the vint is >= 0 we have an old version and the vint
- // encodes the length of tableName.
- // If < 0 we just read the version and the next vint is the length.
- // @see Bytes#readByteArray(DataInput)
- int len = WritableUtils.readVInt(in);
- if (len < 0) {
- // what we just read was the version
- version = Version.fromCode(len);
- len = WritableUtils.readVInt(in);
- }
- byte[] tableNameBytes = new byte[len];
- in.readFully(tableNameBytes);
- tableName = TableName.valueOf(tableNameBytes);
- startRow = Bytes.readByteArray(in);
- endRow = Bytes.readByteArray(in);
- regionLocation = Bytes.toString(Bytes.readByteArray(in));
- if (version.atLeast(Version.INITIAL)) {
- scan = Bytes.toString(Bytes.readByteArray(in));
- }
- length = WritableUtils.readVLong(in);
- if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
- encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
- }
- }
-
- /**
- * Writes the field values to the output.
- *
- * @param out The output to write to.
- * @throws IOException When writing the values to the output fails.
- */
- @Override
- public void write(DataOutput out) throws IOException {
- WritableUtils.writeVInt(out, VERSION.code);
- Bytes.writeByteArray(out, tableName.getName());
- Bytes.writeByteArray(out, startRow);
- Bytes.writeByteArray(out, endRow);
- Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
- Bytes.writeByteArray(out, Bytes.toBytes(scan));
- WritableUtils.writeVLong(out, length);
- Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
- }
-
- /**
- * Returns the details about this instance as a string.
- *
- * @return The values of this instance as a string.
- * @see java.lang.Object#toString()
- */
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("HBase table split(");
- sb.append("table name: ").append(tableName);
- // null scan input is represented by ""
- String printScan = "";
- if (!scan.equals("")) {
- try {
- // get the real scan here in toString, not the Base64 string
- printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
- }
- catch (IOException e) {
- printScan = "";
- }
- }
- sb.append(", scan: ").append(printScan);
- sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
- sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
- sb.append(", region location: ").append(regionLocation);
- sb.append(", encoded region name: ").append(encodedRegionName);
- sb.append(")");
- return sb.toString();
- }
-
- /**
- * Compares this split against the given one.
- *
- * @param split The split to compare to.
- * @return The result of the comparison.
- * @see java.lang.Comparable#compareTo(java.lang.Object)
- */
- @Override
- public int compareTo(TableSplit split) {
- // If The table name of the two splits is the same then compare start row
- // otherwise compare based on table names
- int tableNameComparison =
- getTable().compareTo(split.getTable());
- return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
- getStartRow(), split.getStartRow());
- }
-
- @Override
- public boolean equals(Object o) {
- if (o == null || !(o instanceof TableSplit)) {
- return false;
- }
- return tableName.equals(((TableSplit)o).tableName) &&
- Bytes.equals(startRow, ((TableSplit)o).startRow) &&
- Bytes.equals(endRow, ((TableSplit)o).endRow) &&
- regionLocation.equals(((TableSplit)o).regionLocation);
- }
-
- @Override
- public int hashCode() {
- int result = tableName != null ? tableName.hashCode() : 0;
- result = 31 * result + (scan != null ? scan.hashCode() : 0);
- result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
- result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
- result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
- result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
deleted file mode 100644
index 84324e2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Emits Sorted KeyValues. Parse the passed text and creates KeyValues. Sorts them before emit.
- * @see HFileOutputFormat2
- * @see KeyValueSortReducer
- * @see PutSortReducer
- */
-@InterfaceAudience.Public
-public class TextSortReducer extends
- Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
-
- /** Timestamp for all inserted rows */
- private long ts;
-
- /** Column seperator */
- private String separator;
-
- /** Should skip bad lines */
- private boolean skipBadLines;
-
- private Counter badLineCount;
-
- private ImportTsv.TsvParser parser;
-
- /** Cell visibility expr **/
- private String cellVisibilityExpr;
-
- /** Cell TTL */
- private long ttl;
-
- private CellCreator kvCreator;
-
- public long getTs() {
- return ts;
- }
-
- public boolean getSkipBadLines() {
- return skipBadLines;
- }
-
- public Counter getBadLineCount() {
- return badLineCount;
- }
-
- public void incrementBadLineCount(int count) {
- this.badLineCount.increment(count);
- }
-
- /**
- * Handles initializing this class with objects specific to it (i.e., the parser).
- * Common initialization that might be leveraged by a subsclass is done in
- * <code>doSetup</code>. Hence a subclass may choose to override this method
- * and call <code>doSetup</code> as well before handling it's own custom params.
- *
- * @param context
- */
- @Override
- protected void setup(Context context) {
- Configuration conf = context.getConfiguration();
- doSetup(context, conf);
-
- parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
- if (parser.getRowKeyColumnIndex() == -1) {
- throw new RuntimeException("No row key column specified");
- }
- this.kvCreator = new CellCreator(conf);
- }
-
- /**
- * Handles common parameter initialization that a subclass might want to leverage.
- * @param context
- * @param conf
- */
- protected void doSetup(Context context, Configuration conf) {
- // If a custom separator has been used,
- // decode it back from Base64 encoding.
- separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
- if (separator == null) {
- separator = ImportTsv.DEFAULT_SEPARATOR;
- } else {
- separator = new String(Base64.decode(separator));
- }
-
- // Should never get 0 as we are setting this to a valid value in job configuration.
- ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
-
- skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
- badLineCount = context.getCounter("ImportTsv", "Bad Lines");
- }
-
- @Override
- protected void reduce(
- ImmutableBytesWritable rowKey,
- java.lang.Iterable<Text> lines,
- Reducer<ImmutableBytesWritable, Text,
- ImmutableBytesWritable, KeyValue>.Context context)
- throws java.io.IOException, InterruptedException
- {
- // although reduce() is called per-row, handle pathological case
- long threshold = context.getConfiguration().getLong(
- "reducer.row.threshold", 1L * (1<<30));
- Iterator<Text> iter = lines.iterator();
- while (iter.hasNext()) {
- Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
- long curSize = 0;
- // stop at the end or the RAM threshold
- while (iter.hasNext() && curSize < threshold) {
- Text line = iter.next();
- byte[] lineBytes = line.getBytes();
- try {
- ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
- // Retrieve timestamp if exists
- ts = parsed.getTimestamp(ts);
- cellVisibilityExpr = parsed.getCellVisibility();
- ttl = parsed.getCellTTL();
-
- // create tags for the parsed line
- List<Tag> tags = new ArrayList<>();
- if (cellVisibilityExpr != null) {
- tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
- cellVisibilityExpr));
- }
- // Add TTL directly to the KV so we can vary them when packing more than one KV
- // into puts
- if (ttl > 0) {
- tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
- }
- for (int i = 0; i < parsed.getColumnCount(); i++) {
- if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
- || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
- || i == parser.getCellTTLColumnIndex()) {
- continue;
- }
- // Creating the KV which needs to be directly written to HFiles. Using the Facade
- // KVCreator for creation of kvs.
- Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(),
- parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length,
- parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes,
- parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
- KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
- kvs.add(kv);
- curSize += kv.heapSize();
- }
- } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
- | InvalidLabelException badLine) {
- if (skipBadLines) {
- System.err.println("Bad line." + badLine.getMessage());
- incrementBadLineCount(1);
- continue;
- }
- throw new IOException(badLine);
- }
- }
- context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass()
- + "(" + StringUtils.humanReadableInt(curSize) + ")");
- int index = 0;
- for (KeyValue kv : kvs) {
- context.write(rowKey, kv);
- if (++index > 0 && index % 100 == 0)
- context.setStatus("Wrote " + index + " key values.");
- }
-
- // if we have more entries to process
- if (iter.hasNext()) {
- // force flush because we cannot guarantee intra-row sorted order
- context.write(null, null);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
deleted file mode 100644
index a9d8e03..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Mapper;
-
-/**
- * Write table content out to files in hdfs.
- */
-@InterfaceAudience.Public
-public class TsvImporterMapper
-extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>
-{
-
- /** Timestamp for all inserted rows */
- protected long ts;
-
- /** Column seperator */
- private String separator;
-
- /** Should skip bad lines */
- private boolean skipBadLines;
- /** Should skip empty columns*/
- private boolean skipEmptyColumns;
- private Counter badLineCount;
- private boolean logBadLines;
-
- protected ImportTsv.TsvParser parser;
-
- protected Configuration conf;
-
- protected String cellVisibilityExpr;
-
- protected long ttl;
-
- protected CellCreator kvCreator;
-
- private String hfileOutPath;
-
- /** List of cell tags */
- private List<Tag> tags;
-
- public long getTs() {
- return ts;
- }
-
- public boolean getSkipBadLines() {
- return skipBadLines;
- }
-
- public Counter getBadLineCount() {
- return badLineCount;
- }
-
- public void incrementBadLineCount(int count) {
- this.badLineCount.increment(count);
- }
-
- /**
- * Handles initializing this class with objects specific to it (i.e., the parser).
- * Common initialization that might be leveraged by a subsclass is done in
- * <code>doSetup</code>. Hence a subclass may choose to override this method
- * and call <code>doSetup</code> as well before handling it's own custom params.
- *
- * @param context
- */
- @Override
- protected void setup(Context context) {
- doSetup(context);
-
- conf = context.getConfiguration();
- parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY),
- separator);
- if (parser.getRowKeyColumnIndex() == -1) {
- throw new RuntimeException("No row key column specified");
- }
- this.kvCreator = new CellCreator(conf);
- tags = new ArrayList<>();
- }
-
- /**
- * Handles common parameter initialization that a subclass might want to leverage.
- * @param context
- */
- protected void doSetup(Context context) {
- Configuration conf = context.getConfiguration();
-
- // If a custom separator has been used,
- // decode it back from Base64 encoding.
- separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
- if (separator == null) {
- separator = ImportTsv.DEFAULT_SEPARATOR;
- } else {
- separator = new String(Base64.decode(separator));
- }
- // Should never get 0 as we are setting this to a valid value in job
- // configuration.
- ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
-
- skipEmptyColumns = context.getConfiguration().getBoolean(
- ImportTsv.SKIP_EMPTY_COLUMNS, false);
- skipBadLines = context.getConfiguration().getBoolean(
- ImportTsv.SKIP_LINES_CONF_KEY, true);
- badLineCount = context.getCounter("ImportTsv", "Bad Lines");
- logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
- hfileOutPath = conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY);
- }
-
- /**
- * Convert a line of TSV text into an HBase table row.
- */
- @Override
- public void map(LongWritable offset, Text value,
- Context context)
- throws IOException {
- byte[] lineBytes = value.getBytes();
-
- try {
- ImportTsv.TsvParser.ParsedLine parsed = parser.parse(
- lineBytes, value.getLength());
- ImmutableBytesWritable rowKey =
- new ImmutableBytesWritable(lineBytes,
- parsed.getRowKeyOffset(),
- parsed.getRowKeyLength());
- // Retrieve timestamp if exists
- ts = parsed.getTimestamp(ts);
- cellVisibilityExpr = parsed.getCellVisibility();
- ttl = parsed.getCellTTL();
-
- // create tags for the parsed line
- if (hfileOutPath != null) {
- tags.clear();
- if (cellVisibilityExpr != null) {
- tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
- cellVisibilityExpr));
- }
- // Add TTL directly to the KV so we can vary them when packing more than one KV
- // into puts
- if (ttl > 0) {
- tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
- }
- }
- Put put = new Put(rowKey.copyBytes());
- for (int i = 0; i < parsed.getColumnCount(); i++) {
- if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
- || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
- || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns
- && parsed.getColumnLength(i) == 0)) {
- continue;
- }
- populatePut(lineBytes, parsed, put, i);
- }
- context.write(rowKey, put);
- } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
- | InvalidLabelException badLine) {
- if (logBadLines) {
- System.err.println(value);
- }
- System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
- if (skipBadLines) {
- incrementBadLineCount(1);
- return;
- }
- throw new IOException(badLine);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put,
- int i) throws BadTsvLineException, IOException {
- Cell cell = null;
- if (hfileOutPath == null) {
- cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
- parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
- parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes,
- parsed.getColumnOffset(i), parsed.getColumnLength(i));
- if (cellVisibilityExpr != null) {
- // We won't be validating the expression here. The Visibility CP will do
- // the validation
- put.setCellVisibility(new CellVisibility(cellVisibilityExpr));
- }
- if (ttl > 0) {
- put.setTTL(ttl);
- }
- } else {
- // Creating the KV which needs to be directly written to HFiles. Using the Facade
- // KVCreator for creation of kvs.
- cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
- parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
- parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i),
- parsed.getColumnLength(i), tags);
- }
- put.add(cell);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
deleted file mode 100644
index 581f0d0..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-
-import java.io.IOException;
-
-/**
- * Write table content out to map output files.
- */
-@InterfaceAudience.Public
-public class TsvImporterTextMapper
-extends Mapper<LongWritable, Text, ImmutableBytesWritable, Text>
-{
-
- /** Column seperator */
- private String separator;
-
- /** Should skip bad lines */
- private boolean skipBadLines;
- private Counter badLineCount;
- private boolean logBadLines;
-
- private ImportTsv.TsvParser parser;
-
- public boolean getSkipBadLines() {
- return skipBadLines;
- }
-
- public Counter getBadLineCount() {
- return badLineCount;
- }
-
- public void incrementBadLineCount(int count) {
- this.badLineCount.increment(count);
- }
-
- /**
- * Handles initializing this class with objects specific to it (i.e., the parser).
- * Common initialization that might be leveraged by a subsclass is done in
- * <code>doSetup</code>. Hence a subclass may choose to override this method
- * and call <code>doSetup</code> as well before handling it's own custom params.
- *
- * @param context
- */
- @Override
- protected void setup(Context context) {
- doSetup(context);
-
- Configuration conf = context.getConfiguration();
-
- parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
- if (parser.getRowKeyColumnIndex() == -1) {
- throw new RuntimeException("No row key column specified");
- }
- }
-
- /**
- * Handles common parameter initialization that a subclass might want to leverage.
- * @param context
- */
- protected void doSetup(Context context) {
- Configuration conf = context.getConfiguration();
-
- // If a custom separator has been used,
- // decode it back from Base64 encoding.
- separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
- if (separator == null) {
- separator = ImportTsv.DEFAULT_SEPARATOR;
- } else {
- separator = new String(Base64.decode(separator));
- }
-
- skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
- logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
- badLineCount = context.getCounter("ImportTsv", "Bad Lines");
- }
-
- /**
- * Convert a line of TSV text into an HBase table row.
- */
- @Override
- public void map(LongWritable offset, Text value, Context context) throws IOException {
- try {
- Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
- ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
- value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
- context.write(rowKey, value);
- } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
- if (logBadLines) {
- System.err.println(value);
- }
- System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
- if (skipBadLines) {
- incrementBadLineCount(1);
- return;
- }
- throw new IOException(badLine);
- } catch (InterruptedException e) {
- e.printStackTrace();
- Thread.currentThread().interrupt();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
deleted file mode 100644
index a83a88f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.hbase.Tag;
-
-/**
- * Interface to convert visibility expressions into Tags for storing along with Cells in HFiles.
- */
-@InterfaceAudience.Public
-public interface VisibilityExpressionResolver extends Configurable {
-
- /**
- * Giving a chance for the initialization.
- */
- void init();
-
- /**
- * Convert visibility expression into tags to be serialized.
- * @param visExpression the label expression
- * @return The list of tags corresponds to the visibility expression. These tags will be stored
- * along with the Cells.
- */
- List<Tag> createVisibilityExpTags(String visExpression) throws IOException;
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
deleted file mode 100644
index 8b4e967..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
+++ /dev/null
@@ -1,344 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.EOFException;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WAL.Entry;
-import org.apache.hadoop.hbase.wal.WAL.Reader;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Simple {@link InputFormat} for {@link org.apache.hadoop.hbase.wal.WAL} files.
- */
-@InterfaceAudience.Public
-public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
- private static final Log LOG = LogFactory.getLog(WALInputFormat.class);
-
- public static final String START_TIME_KEY = "wal.start.time";
- public static final String END_TIME_KEY = "wal.end.time";
-
- /**
- * {@link InputSplit} for {@link WAL} files. Each split represent
- * exactly one log file.
- */
- static class WALSplit extends InputSplit implements Writable {
- private String logFileName;
- private long fileSize;
- private long startTime;
- private long endTime;
-
- /** for serialization */
- public WALSplit() {}
-
- /**
- * Represent an WALSplit, i.e. a single WAL file.
- * Start- and EndTime are managed by the split, so that WAL files can be
- * filtered before WALEdits are passed to the mapper(s).
- * @param logFileName
- * @param fileSize
- * @param startTime
- * @param endTime
- */
- public WALSplit(String logFileName, long fileSize, long startTime, long endTime) {
- this.logFileName = logFileName;
- this.fileSize = fileSize;
- this.startTime = startTime;
- this.endTime = endTime;
- }
-
- @Override
- public long getLength() throws IOException, InterruptedException {
- return fileSize;
- }
-
- @Override
- public String[] getLocations() throws IOException, InterruptedException {
- // TODO: Find the data node with the most blocks for this WAL?
- return new String[] {};
- }
-
- public String getLogFileName() {
- return logFileName;
- }
-
- public long getStartTime() {
- return startTime;
- }
-
- public long getEndTime() {
- return endTime;
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- logFileName = in.readUTF();
- fileSize = in.readLong();
- startTime = in.readLong();
- endTime = in.readLong();
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeUTF(logFileName);
- out.writeLong(fileSize);
- out.writeLong(startTime);
- out.writeLong(endTime);
- }
-
- @Override
- public String toString() {
- return logFileName + " (" + startTime + ":" + endTime + ") length:" + fileSize;
- }
- }
-
- /**
- * {@link RecordReader} for an {@link WAL} file.
- * Implementation shared with deprecated HLogInputFormat.
- */
- static abstract class WALRecordReader<K extends WALKey> extends RecordReader<K, WALEdit> {
- private Reader reader = null;
- // visible until we can remove the deprecated HLogInputFormat
- Entry currentEntry = new Entry();
- private long startTime;
- private long endTime;
- private Configuration conf;
- private Path logFile;
- private long currentPos;
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- WALSplit hsplit = (WALSplit)split;
- logFile = new Path(hsplit.getLogFileName());
- conf = context.getConfiguration();
- LOG.info("Opening reader for "+split);
- openReader(logFile);
- this.startTime = hsplit.getStartTime();
- this.endTime = hsplit.getEndTime();
- }
-
- private void openReader(Path path) throws IOException
- {
- closeReader();
- reader = AbstractFSWALProvider.openReader(path, conf);
- seek();
- setCurrentPath(path);
- }
-
- private void setCurrentPath(Path path) {
- this.logFile = path;
- }
-
- private void closeReader() throws IOException {
- if (reader != null) {
- reader.close();
- reader = null;
- }
- }
-
- private void seek() throws IOException {
- if (currentPos != 0) {
- reader.seek(currentPos);
- }
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (reader == null) return false;
- this.currentPos = reader.getPosition();
- Entry temp;
- long i = -1;
- try {
- do {
- // skip older entries
- try {
- temp = reader.next(currentEntry);
- i++;
- } catch (EOFException x) {
- LOG.warn("Corrupted entry detected. Ignoring the rest of the file."
- + " (This is normal when a RegionServer crashed.)");
- return false;
- }
- } while (temp != null && temp.getKey().getWriteTime() < startTime);
-
- if (temp == null) {
- if (i > 0) LOG.info("Skipped " + i + " entries.");
- LOG.info("Reached end of file.");
- return false;
- } else if (i > 0) {
- LOG.info("Skipped " + i + " entries, until ts: " + temp.getKey().getWriteTime() + ".");
- }
- boolean res = temp.getKey().getWriteTime() <= endTime;
- if (!res) {
- LOG.info("Reached ts: " + temp.getKey().getWriteTime()
- + " ignoring the rest of the file.");
- }
- return res;
- } catch (IOException e) {
- Path archivedLog = AbstractFSWALProvider.getArchivedLogPath(logFile, conf);
- if (logFile != archivedLog) {
- openReader(archivedLog);
- // Try call again in recursion
- return nextKeyValue();
- } else {
- throw e;
- }
- }
- }
-
- @Override
- public WALEdit getCurrentValue() throws IOException, InterruptedException {
- return currentEntry.getEdit();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- // N/A depends on total number of entries, which is unknown
- return 0;
- }
-
- @Override
- public void close() throws IOException {
- LOG.info("Closing reader");
- if (reader != null) this.reader.close();
- }
- }
-
- /**
- * handler for non-deprecated WALKey version. fold into WALRecordReader once we no longer
- * need to support HLogInputFormat.
- */
- static class WALKeyRecordReader extends WALRecordReader<WALKey> {
- @Override
- public WALKey getCurrentKey() throws IOException, InterruptedException {
- return currentEntry.getKey();
- }
- }
-
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException,
- InterruptedException {
- return getSplits(context, START_TIME_KEY, END_TIME_KEY);
- }
-
- /**
- * implementation shared with deprecated HLogInputFormat
- */
- List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
- throws IOException, InterruptedException {
- Configuration conf = context.getConfiguration();
- boolean ignoreMissing = conf.getBoolean(WALPlayer.IGNORE_MISSING_FILES, false);
- Path[] inputPaths = getInputPaths(conf);
- long startTime = conf.getLong(startKey, Long.MIN_VALUE);
- long endTime = conf.getLong(endKey, Long.MAX_VALUE);
-
- List<FileStatus> allFiles = new ArrayList<FileStatus>();
- for(Path inputPath: inputPaths){
- FileSystem fs = inputPath.getFileSystem(conf);
- try {
- List<FileStatus> files = getFiles(fs, inputPath, startTime, endTime);
- allFiles.addAll(files);
- } catch (FileNotFoundException e) {
- if (ignoreMissing) {
- LOG.warn("File "+ inputPath +" is missing. Skipping it.");
- continue;
- }
- throw e;
- }
- }
- List<InputSplit> splits = new ArrayList<InputSplit>(allFiles.size());
- for (FileStatus file : allFiles) {
- splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
- }
- return splits;
- }
-
- private Path[] getInputPaths(Configuration conf) {
- String inpDirs = conf.get(FileInputFormat.INPUT_DIR);
- return StringUtils.stringToPath(
- inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ",")));
- }
-
- private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
- throws IOException {
- List<FileStatus> result = new ArrayList<>();
- LOG.debug("Scanning " + dir.toString() + " for WAL files");
-
- RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
- if (!iter.hasNext()) return Collections.emptyList();
- while (iter.hasNext()) {
- LocatedFileStatus file = iter.next();
- if (file.isDirectory()) {
- // recurse into sub directories
- result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
- } else {
- String name = file.getPath().toString();
- int idx = name.lastIndexOf('.');
- if (idx > 0) {
- try {
- long fileStartTime = Long.parseLong(name.substring(idx+1));
- if (fileStartTime <= endTime) {
- LOG.info("Found: " + file);
- result.add(file);
- }
- } catch (NumberFormatException x) {
- idx = 0;
- }
- }
- if (idx == 0) {
- LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
- }
- }
- }
- return result;
- }
-
- @Override
- public RecordReader<WALKey, WALEdit> createRecordReader(InputSplit split,
- TaskAttemptContext context) throws IOException, InterruptedException {
- return new WALKeyRecordReader();
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
deleted file mode 100644
index b1e655c..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * A tool to replay WAL files as a M/R job.
- * The WAL can be replayed for a set of tables or all tables,
- * and a time range can be provided (in milliseconds).
- * The WAL is filtered to the passed set of tables and the output
- * can optionally be mapped to another set of tables.
- *
- * WAL replay can also generate HFiles for later bulk importing,
- * in that case the WAL is replayed for a single table only.
- */
-@InterfaceAudience.Public
-public class WALPlayer extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(WALPlayer.class);
- final static String NAME = "WALPlayer";
- public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
- public final static String TABLES_KEY = "wal.input.tables";
- public final static String TABLE_MAP_KEY = "wal.input.tablesmap";
- public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator";
- public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files";
-
-
- // This relies on Hadoop Configuration to handle warning about deprecated configs and
- // to set the correct non-deprecated configs when an old one shows up.
- static {
- Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
- Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
- Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
- }
-
- private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
-
- public WALPlayer(){
- }
-
- protected WALPlayer(final Configuration c) {
- super(c);
- }
-
- /**
- * A mapper that just writes out KeyValues.
- * This one can be used together with {@link KeyValueSortReducer}
- */
- static class WALKeyValueMapper
- extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
- private byte[] table;
-
- @Override
- public void map(WALKey key, WALEdit value,
- Context context)
- throws IOException {
- try {
- // skip all other tables
- if (Bytes.equals(table, key.getTablename().getName())) {
- for (Cell cell : value.getCells()) {
- KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
- if (WALEdit.isMetaEditFamily(kv)) {
- continue;
- }
- context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv);
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void setup(Context context) throws IOException {
- // only a single table is supported when HFiles are generated with HFileOutputFormat
- String[] tables = context.getConfiguration().getStrings(TABLES_KEY);
- if (tables == null || tables.length != 1) {
- // this can only happen when WALMapper is used directly by a class other than WALPlayer
- throw new IOException("Exactly one table must be specified for bulk HFile case.");
- }
- table = Bytes.toBytes(tables[0]);
-
- }
-
- }
-
- /**
- * A mapper that writes out {@link Mutation} to be directly applied to
- * a running HBase instance.
- */
- protected static class WALMapper
- extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
- private Map<TableName, TableName> tables = new TreeMap<>();
-
- @Override
- public void map(WALKey key, WALEdit value, Context context)
- throws IOException {
- try {
- if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
- TableName targetTable = tables.isEmpty() ?
- key.getTablename() :
- tables.get(key.getTablename());
- ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
- Put put = null;
- Delete del = null;
- Cell lastCell = null;
- for (Cell cell : value.getCells()) {
- // filtering WAL meta entries
- if (WALEdit.isMetaEditFamily(cell)) {
- continue;
- }
-
- // Allow a subclass filter out this cell.
- if (filter(context, cell)) {
- // A WALEdit may contain multiple operations (HBASE-3584) and/or
- // multiple rows (HBASE-5229).
- // Aggregate as much as possible into a single Put/Delete
- // operation before writing to the context.
- if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
- || !CellUtil.matchingRow(lastCell, cell)) {
- // row or type changed, write out aggregate KVs.
- if (put != null) {
- context.write(tableOut, put);
- }
- if (del != null) {
- context.write(tableOut, del);
- }
- if (CellUtil.isDelete(cell)) {
- del = new Delete(CellUtil.cloneRow(cell));
- } else {
- put = new Put(CellUtil.cloneRow(cell));
- }
- }
- if (CellUtil.isDelete(cell)) {
- del.add(cell);
- } else {
- put.add(cell);
- }
- }
- lastCell = cell;
- }
- // write residual KVs
- if (put != null) {
- context.write(tableOut, put);
- }
- if (del != null) {
- context.write(tableOut, del);
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- protected boolean filter(Context context, final Cell cell) {
- return true;
- }
-
- @Override
- protected void
- cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
- throws IOException, InterruptedException {
- super.cleanup(context);
- }
-
- @Override
- public void setup(Context context) throws IOException {
- String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
- String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
- if (tableMap == null) {
- tableMap = tablesToUse;
- }
- if (tablesToUse == null) {
- // Then user wants all tables.
- } else if (tablesToUse.length != tableMap.length) {
- // this can only happen when WALMapper is used directly by a class other than WALPlayer
- throw new IOException("Incorrect table mapping specified .");
- }
- int i = 0;
- if (tablesToUse != null) {
- for (String table : tablesToUse) {
- tables.put(TableName.valueOf(table),
- TableName.valueOf(tableMap[i++]));
- }
- }
- }
- }
-
- void setupTime(Configuration conf, String option) throws IOException {
- String val = conf.get(option);
- if (null == val) {
- return;
- }
- long ms;
- try {
- // first try to parse in user friendly form
- ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
- } catch (ParseException pe) {
- try {
- // then see if just a number of ms's was specified
- ms = Long.parseLong(val);
- } catch (NumberFormatException nfe) {
- throw new IOException(option
- + " must be specified either in the form 2001-02-20T16:35:06.99 "
- + "or as number of milliseconds");
- }
- }
- conf.setLong(option, ms);
- }
-
- /**
- * Sets up the actual job.
- *
- * @param args The command line parameters.
- * @return The newly created job.
- * @throws IOException When setting up the job fails.
- */
- public Job createSubmittableJob(String[] args) throws IOException {
- Configuration conf = getConf();
- setupTime(conf, WALInputFormat.START_TIME_KEY);
- setupTime(conf, WALInputFormat.END_TIME_KEY);
- String inputDirs = args[0];
- String[] tables = args[1].split(",");
- String[] tableMap;
- if (args.length > 2) {
- tableMap = args[2].split(",");
- if (tableMap.length != tables.length) {
- throw new IOException("The same number of tables and mapping must be provided.");
- }
- } else {
- // if not mapping is specified map each table to itself
- tableMap = tables;
- }
- conf.setStrings(TABLES_KEY, tables);
- conf.setStrings(TABLE_MAP_KEY, tableMap);
- conf.set(FileInputFormat.INPUT_DIR, inputDirs);
- Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
- job.setJarByClass(WALPlayer.class);
-
- job.setInputFormatClass(WALInputFormat.class);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
-
- String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
- if (hfileOutPath != null) {
- LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
-
- // the bulk HFile case
- if (tables.length != 1) {
- throw new IOException("Exactly one table must be specified for the bulk export option");
- }
- TableName tableName = TableName.valueOf(tables[0]);
- job.setMapperClass(WALKeyValueMapper.class);
- job.setReducerClass(KeyValueSortReducer.class);
- Path outputDir = new Path(hfileOutPath);
- FileOutputFormat.setOutputPath(job, outputDir);
- job.setMapOutputValueClass(KeyValue.class);
- try (Connection conn = ConnectionFactory.createConnection(conf);
- Table table = conn.getTable(tableName);
- RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
- HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
- }
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
- } else {
- // output to live cluster
- job.setMapperClass(WALMapper.class);
- job.setOutputFormatClass(MultiTableOutputFormat.class);
- TableMapReduceUtil.addDependencyJars(job);
- TableMapReduceUtil.initCredentials(job);
- // No reducers.
- job.setNumReduceTasks(0);
- }
- String codecCls = WALCellCodec.getWALCellCodecClass(conf);
- try {
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls));
- } catch (Exception e) {
- throw new IOException("Cannot determine wal codec class " + codecCls, e);
- }
- return job;
- }
-
-
- /**
- * Print usage
- * @param errorMsg Error message. Can be null.
- */
- private void usage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- }
- System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
- System.err.println("Read all WAL entries for <tables>.");
- System.err.println("If no tables (\"\") are specific, all tables are imported.");
- System.err.println("(Careful, even hbase:meta entries will be imported"+
- " in that case.)");
- System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
- System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
- System.err.println("<tableMapping> is a command separated list of targettables.");
- System.err.println("If specified, each table in <tables> must have a mapping.\n");
- System.err.println("By default " + NAME + " will load data directly into HBase.");
- System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
- System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
- System.err.println(" (Only one table can be specified, and no mapping is allowed!)");
- System.err.println("Other options: (specify time range to WAL edit to consider)");
- System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
- System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
- System.err.println(" -D " + JOB_NAME_CONF_KEY
- + "=jobName - use the specified mapreduce job name for the wal player");
- System.err.println("For performance also consider the following options:\n"
- + " -Dmapreduce.map.speculative=false\n"
- + " -Dmapreduce.reduce.speculative=false");
- }
-
- /**
- * Main entry point.
- *
- * @param args The command line parameters.
- * @throws Exception When running the job fails.
- */
- public static void main(String[] args) throws Exception {
- int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
- System.exit(ret);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length < 2) {
- usage("Wrong number of arguments: " + args.length);
- System.exit(-1);
- }
- Job job = createSubmittableJob(args);
- return job.waitForCompletion(true) ? 0 : 1;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
deleted file mode 100644
index 199e168..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
-Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
-Input/OutputFormats, a table indexing MapReduce job, and utility methods.
-
-<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
-in the HBase Reference Guide for mapreduce over hbase documentation.
-*/
-package org.apache.hadoop.hbase.mapreduce;
[08/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
deleted file mode 100644
index ac2f20d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduceUtil.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableSet;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTableMapReduceUtil {
-
- private static final Log LOG = LogFactory
- .getLog(TestTableMapReduceUtil.class);
-
- private static Table presidentsTable;
- private static final String TABLE_NAME = "People";
-
- private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info");
- private static final byte[] COLUMN_QUALIFIER = Bytes.toBytes("name");
-
- private static ImmutableSet<String> presidentsRowKeys = ImmutableSet.of(
- "president1", "president2", "president3");
- private static Iterator<String> presidentNames = ImmutableSet.of(
- "John F. Kennedy", "George W. Bush", "Barack Obama").iterator();
-
- private static ImmutableSet<String> actorsRowKeys = ImmutableSet.of("actor1",
- "actor2");
- private static Iterator<String> actorNames = ImmutableSet.of(
- "Jack Nicholson", "Martin Freeman").iterator();
-
- private static String PRESIDENT_PATTERN = "president";
- private static String ACTOR_PATTERN = "actor";
- private static ImmutableMap<String, ImmutableSet<String>> relation = ImmutableMap
- .of(PRESIDENT_PATTERN, presidentsRowKeys, ACTOR_PATTERN, actorsRowKeys);
-
- private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- presidentsTable = createAndFillTable(TableName.valueOf(TABLE_NAME));
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void before() throws IOException {
- LOG.info("before");
- UTIL.ensureSomeRegionServersAvailable(1);
- LOG.info("before done");
- }
-
- public static Table createAndFillTable(TableName tableName) throws IOException {
- Table table = UTIL.createTable(tableName, COLUMN_FAMILY);
- createPutCommand(table);
- return table;
- }
-
- private static void createPutCommand(Table table) throws IOException {
- for (String president : presidentsRowKeys) {
- if (presidentNames.hasNext()) {
- Put p = new Put(Bytes.toBytes(president));
- p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(presidentNames.next()));
- table.put(p);
- }
- }
-
- for (String actor : actorsRowKeys) {
- if (actorNames.hasNext()) {
- Put p = new Put(Bytes.toBytes(actor));
- p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(actorNames.next()));
- table.put(p);
- }
- }
- }
-
- /**
- * Check what the given number of reduce tasks for the given job configuration
- * does not exceed the number of regions for the given table.
- */
- @Test
- public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
- throws IOException {
- Assert.assertNotNull(presidentsTable);
- Configuration cfg = UTIL.getConfiguration();
- JobConf jobConf = new JobConf(cfg);
- TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
- TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
- TableMapReduceUtil.setScannerCaching(jobConf, 100);
- assertEquals(1, jobConf.getNumReduceTasks());
- assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));
-
- jobConf.setNumReduceTasks(10);
- TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
- TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
- assertEquals(1, jobConf.getNumReduceTasks());
- }
-
- @Test
- public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
- throws IOException {
- Configuration cfg = UTIL.getConfiguration();
- JobConf jobConf = new JobConf(cfg);
- TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
- TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
- assertEquals(1, jobConf.getNumMapTasks());
-
- jobConf.setNumMapTasks(10);
- TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
- TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
- assertEquals(1, jobConf.getNumMapTasks());
- }
-
- @Test
- @SuppressWarnings("deprecation")
- public void shoudBeValidMapReduceEvaluation() throws Exception {
- Configuration cfg = UTIL.getConfiguration();
- JobConf jobConf = new JobConf(cfg);
- try {
- jobConf.setJobName("process row task");
- jobConf.setNumReduceTasks(1);
- TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
- ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
- jobConf);
- TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
- ClassificatorRowReduce.class, jobConf);
- RunningJob job = JobClient.runJob(jobConf);
- assertTrue(job.isSuccessful());
- } finally {
- if (jobConf != null)
- FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
- }
- }
-
- @Test
- @SuppressWarnings("deprecation")
- public void shoudBeValidMapReduceWithPartitionerEvaluation()
- throws IOException {
- Configuration cfg = UTIL.getConfiguration();
- JobConf jobConf = new JobConf(cfg);
- try {
- jobConf.setJobName("process row task");
- jobConf.setNumReduceTasks(2);
- TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
- ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
- jobConf);
-
- TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
- ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
- RunningJob job = JobClient.runJob(jobConf);
- assertTrue(job.isSuccessful());
- } finally {
- if (jobConf != null)
- FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
- }
- }
-
- @SuppressWarnings("deprecation")
- static class ClassificatorRowReduce extends MapReduceBase implements
- TableReduce<ImmutableBytesWritable, Put> {
-
- @Override
- public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
- OutputCollector<ImmutableBytesWritable, Put> output, Reporter reporter)
- throws IOException {
- String strKey = Bytes.toString(key.get());
- List<Put> result = new ArrayList<>();
- while (values.hasNext())
- result.add(values.next());
-
- if (relation.keySet().contains(strKey)) {
- Set<String> set = relation.get(strKey);
- if (set != null) {
- assertEquals(set.size(), result.size());
- } else {
- throwAccertionError("Test infrastructure error: set is null");
- }
- } else {
- throwAccertionError("Test infrastructure error: key not found in map");
- }
- }
-
- private void throwAccertionError(String errorMessage) throws AssertionError {
- throw new AssertionError(errorMessage);
- }
- }
-
- @SuppressWarnings("deprecation")
- static class ClassificatorMapper extends MapReduceBase implements
- TableMap<ImmutableBytesWritable, Put> {
-
- @Override
- public void map(ImmutableBytesWritable row, Result result,
- OutputCollector<ImmutableBytesWritable, Put> outCollector,
- Reporter reporter) throws IOException {
- String rowKey = Bytes.toString(result.getRow());
- final ImmutableBytesWritable pKey = new ImmutableBytesWritable(
- Bytes.toBytes(PRESIDENT_PATTERN));
- final ImmutableBytesWritable aKey = new ImmutableBytesWritable(
- Bytes.toBytes(ACTOR_PATTERN));
- ImmutableBytesWritable outKey = null;
-
- if (rowKey.startsWith(PRESIDENT_PATTERN)) {
- outKey = pKey;
- } else if (rowKey.startsWith(ACTOR_PATTERN)) {
- outKey = aKey;
- } else {
- throw new AssertionError("unexpected rowKey");
- }
-
- String name = Bytes.toString(result.getValue(COLUMN_FAMILY,
- COLUMN_QUALIFIER));
- outCollector.collect(outKey,
- new Put(Bytes.toBytes("rowKey2"))
- .addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(name)));
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
deleted file mode 100644
index 835117c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableOutputFormatConnectionExhaust.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-
-import static org.junit.Assert.fail;
-
-/**
- * Spark creates many instances of TableOutputFormat within a single process. We need to make
- * sure we can have many instances and not leak connections.
- *
- * This test creates a few TableOutputFormats and shouldn't fail due to ZK connection exhaustion.
- */
-@Category(MediumTests.class)
-public class TestTableOutputFormatConnectionExhaust {
-
- private static final Log LOG =
- LogFactory.getLog(TestTableOutputFormatConnectionExhaust.class);
-
- private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
- static final String TABLE = "TestTableOutputFormatConnectionExhaust";
- static final String FAMILY = "family";
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- // Default in ZookeeperMiniCluster is 1000, setting artificially low to trigger exhaustion.
- // need min of 7 to properly start the default mini HBase cluster
- UTIL.getConfiguration().setInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 10);
- UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void before() throws IOException {
- LOG.info("before");
- UTIL.ensureSomeRegionServersAvailable(1);
- LOG.info("before done");
- }
-
- /**
- * Open and close a TableOutputFormat. The closing the RecordWriter should release HBase
- * Connection (ZK) resources, and will throw exception if they are exhausted.
- */
- static void openCloseTableOutputFormat(int iter) throws IOException {
- LOG.info("Instantiating TableOutputFormat connection " + iter);
- JobConf conf = new JobConf();
- conf.addResource(UTIL.getConfiguration());
- conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
- TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
- TableOutputFormat tof = new TableOutputFormat();
- RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
- rw.close(null);
- }
-
- @Test
- public void testConnectionExhaustion() throws IOException {
- int MAX_INSTANCES = 5; // fails on iteration 3 if zk connections leak
- for (int i = 0; i < MAX_INSTANCES; i++) {
- final int iter = i;
- try {
- openCloseTableOutputFormat(iter);
- } catch (Exception e) {
- LOG.error("Exception encountered", e);
- fail("Failed on iteration " + i);
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
deleted file mode 100644
index c689c83..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,271 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import static org.mockito.Mockito.mock;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
-
- private static final byte[] aaa = Bytes.toBytes("aaa");
- private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
- private static final String COLUMNS =
- Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
-
- @Rule
- public TestName name = new TestName();
-
- @Override
- protected byte[] getStartRow() {
- return aaa;
- }
-
- @Override
- protected byte[] getEndRow() {
- return after_zzz;
- }
-
- static class TestTableSnapshotMapper extends MapReduceBase
- implements TableMap<ImmutableBytesWritable, NullWritable> {
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
- throws IOException {
- verifyRowFromMap(key, value);
- collector.collect(key, NullWritable.get());
- }
- }
-
- public static class TestTableSnapshotReducer extends MapReduceBase
- implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
- HBaseTestingUtility.SeenRowTracker rowTracker =
- new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
-
- @Override
- public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
- OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
- throws IOException {
- rowTracker.addRow(key.get());
- }
-
- @Override
- public void close() {
- rowTracker.validate();
- }
- }
-
- @Test
- public void testInitTableSnapshotMapperJobConfig() throws Exception {
- setupCluster();
- final TableName tableName = TableName.valueOf(name.getMethodName());
- String snapshotName = "foo";
-
- try {
- createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
- JobConf job = new JobConf(UTIL.getConfiguration());
- Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
- TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
- COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
-
- // TODO: would be better to examine directly the cache instance that results from this
- // config. Currently this is not possible because BlockCache initialization is static.
- Assert.assertEquals(
- "Snapshot job should be configured for default LruBlockCache.",
- HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
- job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
- Assert.assertEquals(
- "Snapshot job should not use BucketCache.",
- 0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
- } finally {
- UTIL.getAdmin().deleteSnapshot(snapshotName);
- UTIL.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- // TODO: mapred does not support limiting input range by startrow, endrow.
- // Thus the following tests must override parameterverification.
-
- @Test
- @Override
- public void testWithMockedMapReduceMultiRegion() throws Exception {
- testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
- }
-
- @Test
- @Override
- public void testWithMapReduceMultiRegion() throws Exception {
- testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
- }
-
- @Test
- @Override
- // run the MR job while HBase is offline
- public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
- testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
- }
-
- @Override
- public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
- String snapshotName, Path tmpTableDir) throws Exception {
- JobConf job = new JobConf(UTIL.getConfiguration());
- TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
- COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
- }
-
- @Override
- protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
- int numRegions, int expectedNumSplits) throws Exception {
- setupCluster();
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- createTableAndSnapshot(
- util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
-
- JobConf job = new JobConf(util.getConfiguration());
- Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
-
- TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
- COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
-
- // mapred doesn't support start and end keys? o.O
- verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
-
- } finally {
- util.getAdmin().deleteSnapshot(snapshotName);
- util.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
- byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
- TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
- InputSplit[] splits = tsif.getSplits(job, 0);
-
- Assert.assertEquals(expectedNumSplits, splits.length);
-
- HBaseTestingUtility.SeenRowTracker rowTracker =
- new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
-
- for (int i = 0; i < splits.length; i++) {
- // validate input split
- InputSplit split = splits[i];
- Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
-
- // validate record reader
- OutputCollector collector = mock(OutputCollector.class);
- Reporter reporter = mock(Reporter.class);
- RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
-
- // validate we can read all the data back
- ImmutableBytesWritable key = rr.createKey();
- Result value = rr.createValue();
- while (rr.next(key, value)) {
- verifyRowFromMap(key, value);
- rowTracker.addRow(key.copyBytes());
- }
-
- rr.close();
- }
-
- // validate all rows are seen
- rowTracker.validate();
- }
-
- @Override
- protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
- String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
- boolean shutdownCluster) throws Exception {
- doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
- numRegions, expectedNumSplits, shutdownCluster);
- }
-
- // this is also called by the IntegrationTestTableSnapshotInputFormat
- public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
- String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
- int expectedNumSplits, boolean shutdownCluster) throws Exception {
-
- //create the table and snapshot
- createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
-
- if (shutdownCluster) {
- util.shutdownMiniHBaseCluster();
- }
-
- try {
- // create the job
- JobConf jobConf = new JobConf(util.getConfiguration());
-
- jobConf.setJarByClass(util.getClass());
- org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
- TestTableSnapshotInputFormat.class);
-
- TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
- TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, jobConf, true, tableDir);
-
- jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
- jobConf.setNumReduceTasks(1);
- jobConf.setOutputFormat(NullOutputFormat.class);
-
- RunningJob job = JobClient.runJob(jobConf);
- Assert.assertTrue(job.isSuccessful());
- } finally {
- if (!shutdownCluster) {
- util.getAdmin().deleteSnapshot(snapshotName);
- util.deleteTable(tableName);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
deleted file mode 100644
index b342f64..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/HadoopSecurityEnabledUserProviderForTesting.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.security.UserProvider;
-
-/**
- * A {@link UserProvider} that always says hadoop security is enabled, regardless of the underlying
- * configuration. HBase security is <i>not enabled</i> as this is used to determine if SASL is used
- * to do the authentication, which requires a Kerberos ticket (which we currently don't have in
- * tests).
- * <p>
- * This should only be used for <b>TESTING</b>.
- */
-public class HadoopSecurityEnabledUserProviderForTesting extends UserProvider {
-
- @Override
- public boolean isHBaseSecurityEnabled() {
- return false;
- }
-
- @Override
- public boolean isHadoopSecurityEnabled() {
- return true;
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
deleted file mode 100644
index c717fa9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatTestBase.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Base set of tests and setup for input formats touching multiple tables.
- */
-public abstract class MultiTableInputFormatTestBase {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
- public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- static final String TABLE_NAME = "scantest";
- static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
- static final String KEY_STARTROW = "startRow";
- static final String KEY_LASTROW = "stpRow";
-
- static List<String> TABLES = Lists.newArrayList();
-
- static {
- for (int i = 0; i < 3; i++) {
- TABLES.add(TABLE_NAME + String.valueOf(i));
- }
- }
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- // switch TIF to log at DEBUG level
- TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
- // start mini hbase cluster
- TEST_UTIL.startMiniCluster(3);
- // create and fill table
- for (String tableName : TABLES) {
- try (Table table =
- TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
- INPUT_FAMILY, 4)) {
- TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
- }
- }
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @After
- public void tearDown() throws Exception {
- Configuration c = TEST_UTIL.getConfiguration();
- FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
- }
-
- /**
- * Pass the key and value to reducer.
- */
- public static class ScanMapper extends
- TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
- /**
- * Pass the key and value to reduce.
- *
- * @param key The key, here "aaa", "aab" etc.
- * @param value The value is the same as the key.
- * @param context The task context.
- * @throws IOException When reading the rows fails.
- */
- @Override
- public void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException, InterruptedException {
- makeAssertions(key, value);
- context.write(key, key);
- }
-
- public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
- if (value.size() != 1) {
- throw new IOException("There should only be one input column");
- }
- Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
- value.getMap();
- if (!cf.containsKey(INPUT_FAMILY)) {
- throw new IOException("Wrong input columns. Missing: '" +
- Bytes.toString(INPUT_FAMILY) + "'.");
- }
- String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
- LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
- ", value -> " + val);
- }
- }
-
- /**
- * Checks the last and first keys seen against the scanner boundaries.
- */
- public static class ScanReducer
- extends
- Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
- NullWritable, NullWritable> {
- private String first = null;
- private String last = null;
-
- @Override
- protected void reduce(ImmutableBytesWritable key,
- Iterable<ImmutableBytesWritable> values, Context context)
- throws IOException, InterruptedException {
- makeAssertions(key, values);
- }
-
- protected void makeAssertions(ImmutableBytesWritable key,
- Iterable<ImmutableBytesWritable> values) {
- int count = 0;
- for (ImmutableBytesWritable value : values) {
- String val = Bytes.toStringBinary(value.get());
- LOG.debug("reduce: key[" + count + "] -> " +
- Bytes.toStringBinary(key.get()) + ", value -> " + val);
- if (first == null) first = val;
- last = val;
- count++;
- }
- assertEquals(3, count);
- }
-
- @Override
- protected void cleanup(Context context) throws IOException,
- InterruptedException {
- Configuration c = context.getConfiguration();
- cleanup(c);
- }
-
- protected void cleanup(Configuration c) {
- String startRow = c.get(KEY_STARTROW);
- String lastRow = c.get(KEY_LASTROW);
- LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
- startRow + "\"");
- LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
- "\"");
- if (startRow != null && startRow.length() > 0) {
- assertEquals(startRow, first);
- }
- if (lastRow != null && lastRow.length() > 0) {
- assertEquals(lastRow, last);
- }
- }
- }
-
- @Test
- public void testScanEmptyToEmpty() throws IOException, InterruptedException,
- ClassNotFoundException {
- testScan(null, null, null);
- }
-
- @Test
- public void testScanEmptyToAPP() throws IOException, InterruptedException,
- ClassNotFoundException {
- testScan(null, "app", "apo");
- }
-
- @Test
- public void testScanOBBToOPP() throws IOException, InterruptedException,
- ClassNotFoundException {
- testScan("obb", "opp", "opo");
- }
-
- @Test
- public void testScanYZYToEmpty() throws IOException, InterruptedException,
- ClassNotFoundException {
- testScan("yzy", null, "zzz");
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- private void testScan(String start, String stop, String last)
- throws IOException, InterruptedException, ClassNotFoundException {
- String jobName =
- "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
- (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
- LOG.info("Before map/reduce startup - job " + jobName);
- Configuration c = new Configuration(TEST_UTIL.getConfiguration());
-
- c.set(KEY_STARTROW, start != null ? start : "");
- c.set(KEY_LASTROW, last != null ? last : "");
-
- List<Scan> scans = new ArrayList<>();
-
- for (String tableName : TABLES) {
- Scan scan = new Scan();
-
- scan.addFamily(INPUT_FAMILY);
- scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
-
- if (start != null) {
- scan.setStartRow(Bytes.toBytes(start));
- }
- if (stop != null) {
- scan.setStopRow(Bytes.toBytes(stop));
- }
-
- scans.add(scan);
-
- LOG.info("scan before: " + scan);
- }
-
- runJob(jobName, c, scans);
- }
-
- protected void runJob(String jobName, Configuration c, List<Scan> scans)
- throws IOException, InterruptedException, ClassNotFoundException {
- Job job = new Job(c, jobName);
-
- initJob(scans, job);
- job.setReducerClass(ScanReducer.class);
- job.setNumReduceTasks(1); // one to get final "first" and "last" key
- FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
- LOG.info("Started " + job.getJobName());
- job.waitForCompletion(true);
- assertTrue(job.isSuccessful());
- LOG.info("After map/reduce completion - job " + jobName);
- }
-
- protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
-
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
deleted file mode 100644
index efacca9..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/NMapInputFormat.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Input format that creates a configurable number of map tasks
- * each provided with a single row of NullWritables. This can be
- * useful when trying to write mappers which don't have any real
- * input (eg when the mapper is simply producing random data as output)
- */
-public class NMapInputFormat extends InputFormat<NullWritable, NullWritable> {
- private static final String NMAPS_KEY = "nmapinputformat.num.maps";
-
- @Override
- public RecordReader<NullWritable, NullWritable> createRecordReader(
- InputSplit split,
- TaskAttemptContext tac) throws IOException, InterruptedException {
- return new SingleRecordReader<>(NullWritable.get(), NullWritable.get());
- }
-
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException,
- InterruptedException {
- int count = getNumMapTasks(context.getConfiguration());
- List<InputSplit> splits = new ArrayList<>(count);
- for (int i = 0; i < count; i++) {
- splits.add(new NullInputSplit());
- }
- return splits;
- }
-
- public static void setNumMapTasks(Configuration conf, int numTasks) {
- conf.setInt(NMAPS_KEY, numTasks);
- }
-
- public static int getNumMapTasks(Configuration conf) {
- return conf.getInt(NMAPS_KEY, 1);
- }
-
- private static class NullInputSplit extends InputSplit implements Writable {
- @Override
- public long getLength() throws IOException, InterruptedException {
- return 0;
- }
-
- @Override
- public String[] getLocations() throws IOException, InterruptedException {
- return new String[] {};
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- }
- }
-
- private static class SingleRecordReader<K, V>
- extends RecordReader<K, V> {
-
- private final K key;
- private final V value;
- boolean providedKey = false;
-
- SingleRecordReader(K key, V value) {
- this.key = key;
- this.value = value;
- }
-
- @Override
- public void close() {
- }
-
- @Override
- public K getCurrentKey() {
- return key;
- }
-
- @Override
- public V getCurrentValue(){
- return value;
- }
-
- @Override
- public float getProgress() {
- return 0;
- }
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext tac) {
- }
-
- @Override
- public boolean nextKeyValue() {
- if (providedKey) return false;
- providedKey = true;
- return true;
- }
-
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
deleted file mode 100644
index fa47253..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellScanner;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.HFileLink;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.HFileArchiveUtil;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-import static org.junit.Assert.assertFalse;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-public abstract class TableSnapshotInputFormatTestBase {
- private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatTestBase.class);
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- protected final HBaseTestingUtility UTIL = new HBaseTestingUtility();
- protected static final int NUM_REGION_SERVERS = 2;
- protected static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
-
- protected FileSystem fs;
- protected Path rootDir;
-
- public void setupCluster() throws Exception {
- setupConf(UTIL.getConfiguration());
- UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
- rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
- fs = rootDir.getFileSystem(UTIL.getConfiguration());
- }
-
- public void tearDownCluster() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- private static void setupConf(Configuration conf) {
- // Enable snapshot
- conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
- }
-
- protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
- int numRegions, int expectedNumSplits) throws Exception;
-
- protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
- String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
- boolean shutdownCluster) throws Exception;
-
- protected abstract byte[] getStartRow();
-
- protected abstract byte[] getEndRow();
-
- @Test
- public void testWithMockedMapReduceSingleRegion() throws Exception {
- testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
- }
-
- @Test
- public void testWithMockedMapReduceMultiRegion() throws Exception {
- testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
- }
-
- @Test
- public void testWithMapReduceSingleRegion() throws Exception {
- testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
- }
-
- @Test
- public void testWithMapReduceMultiRegion() throws Exception {
- testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
- }
-
- @Test
- // run the MR job while HBase is offline
- public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
- testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
- }
-
- // Test that snapshot restore does not create back references in the HBase root dir.
- @Test
- public void testRestoreSnapshotDoesNotCreateBackRefLinks() throws Exception {
- setupCluster();
- TableName tableName = TableName.valueOf("testRestoreSnapshotDoesNotCreateBackRefLinks");
- String snapshotName = "foo";
-
- try {
- createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
-
- Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
- testRestoreSnapshotDoesNotCreateBackRefLinksInit(tableName, snapshotName,tmpTableDir);
-
- Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
- for (Path regionDir : FSUtils.getRegionDirs(fs, FSUtils.getTableDir(rootDir, tableName))) {
- for (Path storeDir : FSUtils.getFamilyDirs(fs, regionDir)) {
- for (FileStatus status : fs.listStatus(storeDir)) {
- System.out.println(status.getPath());
- if (StoreFileInfo.isValid(status)) {
- Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(UTIL.getConfiguration(),
- tableName, regionDir.getName(), storeDir.getName());
-
- Path path = HFileLink.getBackReferencesDir(storeDir, status.getPath().getName());
- // assert back references directory is empty
- assertFalse("There is a back reference in " + path, fs.exists(path));
-
- path = HFileLink.getBackReferencesDir(archiveStoreDir, status.getPath().getName());
- // assert back references directory is empty
- assertFalse("There is a back reference in " + path, fs.exists(path));
- }
- }
- }
- }
- } finally {
- UTIL.getAdmin().deleteSnapshot(snapshotName);
- UTIL.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- public abstract void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
- String snapshotName, Path tmpTableDir) throws Exception;
-
- protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
- int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
- setupCluster();
- try {
- Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
- TableName tableName = TableName.valueOf("testWithMapReduce");
- testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
- expectedNumSplits, shutdownCluster);
- } finally {
- tearDownCluster();
- }
- }
-
- protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result)
- throws IOException {
- byte[] row = key.get();
- CellScanner scanner = result.cellScanner();
- while (scanner.advance()) {
- Cell cell = scanner.current();
-
- //assert that all Cells in the Result have the same key
- Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
- cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
- }
-
- for (int j = 0; j < FAMILIES.length; j++) {
- byte[] actual = result.getValue(FAMILIES[j], FAMILIES[j]);
- Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
- + " ,actual:" + Bytes.toString(actual), row, actual);
- }
- }
-
- protected static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
- String snapshotName, byte[] startRow, byte[] endRow, int numRegions)
- throws Exception {
- try {
- LOG.debug("Ensuring table doesn't exist.");
- util.deleteTable(tableName);
- } catch(Exception ex) {
- // ignore
- }
-
- LOG.info("creating table '" + tableName + "'");
- if (numRegions > 1) {
- util.createTable(tableName, FAMILIES, 1, startRow, endRow, numRegions);
- } else {
- util.createTable(tableName, FAMILIES);
- }
- Admin admin = util.getAdmin();
-
- LOG.info("put some stuff in the table");
- Table table = util.getConnection().getTable(tableName);
- util.loadTable(table, FAMILIES);
-
- Path rootDir = FSUtils.getRootDir(util.getConfiguration());
- FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
-
- LOG.info("snapshot");
- SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
- Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
-
- LOG.info("load different values");
- byte[] value = Bytes.toBytes("after_snapshot_value");
- util.loadTable(table, FAMILIES, value);
-
- LOG.info("cause flush to create new files in the region");
- admin.flush(tableName);
- table.close();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
deleted file mode 100644
index ff623cb..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
+++ /dev/null
@@ -1,376 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.io.*;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestCellCounter {
- private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
- private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
- private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
- private static final String FAMILY_A_STRING = "a";
- private static final String FAMILY_B_STRING = "b";
- private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
- private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
- private static final byte[] QUALIFIER = Bytes.toBytes("q");
-
- private static Path FQ_OUTPUT_DIR;
- private static final String OUTPUT_DIR = "target" + File.separator + "test-data" + File.separator
- + "output";
- private static long now = System.currentTimeMillis();
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- FQ_OUTPUT_DIR = new Path(OUTPUT_DIR).makeQualified(new LocalFileSystem());
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- /**
- * Test CellCounter all data should print to output
- *
- */
- @Test (timeout=300000)
- public void testCellCounter() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try{
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1" };
- runCount(args);
- FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
- "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total ROWS" + "\t" + "1"));
- assertTrue(data.contains("b;q" + "\t" + "1"));
- assertTrue(data.contains("a;q" + "\t" + "1"));
- assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
- }finally{
- t.close();
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
- }
-
- /**
- * Test CellCounter all data should print to output
- */
- @Test(timeout = 300000)
- public void testCellCounterPrefix() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try {
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1" };
- runCount(args);
- FileInputStream inputStream =
- new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total ROWS" + "\t" + "1"));
- assertTrue(data.contains("b;q" + "\t" + "1"));
- assertTrue(data.contains("a;q" + "\t" + "1"));
- assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
- } finally {
- t.close();
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
- }
-
- /**
- * Test CellCounter with time range all data should print to output
- */
- @Test (timeout=300000)
- public void testCellCounterStartTimeRange() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try{
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = {
- sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1",
- "--starttime=" + now,
- "--endtime=" + now + 2 };
- runCount(args);
- FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
- "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total ROWS" + "\t" + "1"));
- assertTrue(data.contains("b;q" + "\t" + "1"));
- assertTrue(data.contains("a;q" + "\t" + "1"));
- assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
- }finally{
- t.close();
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
- }
-
- /**
- * Test CellCounter with time range all data should print to output
- */
- @Test (timeout=300000)
- public void testCellCounteEndTimeRange() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try{
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = {
- sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "^row1",
- "--endtime=" + now + 1 };
- runCount(args);
- FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
- "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total ROWS" + "\t" + "1"));
- assertTrue(data.contains("b;q" + "\t" + "1"));
- assertTrue(data.contains("a;q" + "\t" + "1"));
- assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
- }finally{
- t.close();
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
- }
-
- /**
- * Test CellCounter with time range all data should print to output
- */
- @Test (timeout=300000)
- public void testCellCounteOutOfTimeRange() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try{
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = {
- sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "--starttime=" + now + 1,
- "--endtime=" + now + 2 };
-
- runCount(args);
- FileInputStream inputStream = new FileInputStream(OUTPUT_DIR + File.separator +
- "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- // nothing should hace been emitted to the reducer
- assertTrue(data.isEmpty());
- }finally{
- t.close();
- FileUtil.fullyDelete(new File(OUTPUT_DIR));
- }
- }
-
-
- private boolean runCount(String[] args) throws Exception {
- // need to make a copy of the configuration because to make sure
- // different temp dirs are used.
- int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new CellCounter(),
- args);
- return status == 0;
- }
-
- /**
- * Test main method of CellCounter
- */
- @Test (timeout=300000)
- public void testCellCounterMain() throws Exception {
-
- PrintStream oldPrintStream = System.err;
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- String[] args = {};
- System.setErr(new PrintStream(data));
- try {
- System.setErr(new PrintStream(data));
-
- try {
- CellCounter.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- assertTrue(data.toString().contains("ERROR: Wrong number of parameters:"));
- // should be information about usage
- assertTrue(data.toString().contains("Usage:"));
- }
-
- } finally {
- System.setErr(oldPrintStream);
- System.setSecurityManager(SECURITY_MANAGER);
- }
- }
-
- /**
- * Test CellCounter for complete table all data should print to output
- */
- @Test(timeout = 600000)
- public void testCellCounterForCompleteTable() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName());
- String outputPath = OUTPUT_DIR + sourceTable;
- LocalFileSystem localFileSystem = new LocalFileSystem();
- Path outputDir =
- new Path(outputPath).makeQualified(localFileSystem.getUri(),
- localFileSystem.getWorkingDirectory());
- byte[][] families = { FAMILY_A, FAMILY_B };
- Table t = UTIL.createTable(sourceTable, families);
- try {
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
- t.put(p);
- String[] args = { sourceTable.getNameAsString(), outputDir.toString(), ";" };
- runCount(args);
- FileInputStream inputStream =
- new FileInputStream(outputPath + File.separator + "part-r-00000");
- String data = IOUtils.toString(inputStream);
- inputStream.close();
- assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
- assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "4"));
- assertTrue(data.contains("Total ROWS" + "\t" + "2"));
- assertTrue(data.contains("b;q" + "\t" + "2"));
- assertTrue(data.contains("a;q" + "\t" + "2"));
- assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row2;a;q_Versions" + "\t" + "1"));
- assertTrue(data.contains("row2;b;q_Versions" + "\t" + "1"));
-
- FileUtil.fullyDelete(new File(outputPath));
- args = new String[] { "-D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=a, b",
- sourceTable.getNameAsString(), outputDir.toString(), ";"};
- runCount(args);
- inputStream = new FileInputStream(outputPath + File.separator + "part-r-00000");
- String data2 = IOUtils.toString(inputStream);
- inputStream.close();
- assertEquals(data, data2);
- } finally {
- t.close();
- localFileSystem.close();
- FileUtil.fullyDelete(new File(outputPath));
- }
- }
-
- @Test
- public void TestCellCounterWithoutOutputDir() throws Exception {
- String[] args = new String[] { "tableName" };
- assertEquals("CellCounter should exit with -1 as output directory is not specified.", -1,
- ToolRunner.run(HBaseConfiguration.create(), new CellCounter(), args));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
deleted file mode 100644
index 0bec03b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
+++ /dev/null
@@ -1,262 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-/**
- * Basic test for the CopyTable M/R tool
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestCopyTable {
- private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- private static final byte[] ROW1 = Bytes.toBytes("row1");
- private static final byte[] ROW2 = Bytes.toBytes("row2");
- private static final String FAMILY_A_STRING = "a";
- private static final String FAMILY_B_STRING = "b";
- private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
- private static final byte[] FAMILY_B = Bytes.toBytes(FAMILY_B_STRING);
- private static final byte[] QUALIFIER = Bytes.toBytes("q");
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- TEST_UTIL.startMiniCluster(3);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- private void doCopyTableTest(boolean bulkload) throws Exception {
- final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
- final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
- final byte[] FAMILY = Bytes.toBytes("family");
- final byte[] COLUMN1 = Bytes.toBytes("c1");
-
- try (Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
- Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);) {
- // put rows into the first table
- for (int i = 0; i < 10; i++) {
- Put p = new Put(Bytes.toBytes("row" + i));
- p.addColumn(FAMILY, COLUMN1, COLUMN1);
- t1.put(p);
- }
-
- CopyTable copy = new CopyTable();
-
- int code;
- if (bulkload) {
- code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
- copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
- "--bulkload", tableName1.getNameAsString() });
- } else {
- code = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
- copy, new String[] { "--new.name=" + tableName2.getNameAsString(),
- tableName1.getNameAsString() });
- }
- assertEquals("copy job failed", 0, code);
-
- // verify the data was copied into table 2
- for (int i = 0; i < 10; i++) {
- Get g = new Get(Bytes.toBytes("row" + i));
- Result r = t2.get(g);
- assertEquals(1, r.size());
- assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
- }
- } finally {
- TEST_UTIL.deleteTable(tableName1);
- TEST_UTIL.deleteTable(tableName2);
- }
- }
-
- /**
- * Simple end-to-end test
- * @throws Exception
- */
- @Test
- public void testCopyTable() throws Exception {
- doCopyTableTest(false);
- }
-
- /**
- * Simple end-to-end test with bulkload.
- */
- @Test
- public void testCopyTableWithBulkload() throws Exception {
- doCopyTableTest(true);
- }
-
- @Test
- public void testStartStopRow() throws Exception {
- final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
- final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
- final byte[] FAMILY = Bytes.toBytes("family");
- final byte[] COLUMN1 = Bytes.toBytes("c1");
- final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
- final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
- final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
-
- Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
- Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
-
- // put rows into the first table
- Put p = new Put(ROW0);
- p.addColumn(FAMILY, COLUMN1, COLUMN1);
- t1.put(p);
- p = new Put(ROW1);
- p.addColumn(FAMILY, COLUMN1, COLUMN1);
- t1.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY, COLUMN1, COLUMN1);
- t1.put(p);
-
- CopyTable copy = new CopyTable();
- assertEquals(
- 0,
- ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
- copy, new String[] { "--new.name=" + tableName2, "--startrow=\\x01row1",
- "--stoprow=\\x01row2", tableName1.getNameAsString() }));
-
- // verify the data was copied into table 2
- // row1 exist, row0, row2 do not exist
- Get g = new Get(ROW1);
- Result r = t2.get(g);
- assertEquals(1, r.size());
- assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));
-
- g = new Get(ROW0);
- r = t2.get(g);
- assertEquals(0, r.size());
-
- g = new Get(ROW2);
- r = t2.get(g);
- assertEquals(0, r.size());
-
- t1.close();
- t2.close();
- TEST_UTIL.deleteTable(tableName1);
- TEST_UTIL.deleteTable(tableName2);
- }
-
- /**
- * Test copy of table from sourceTable to targetTable all rows from family a
- */
- @Test
- public void testRenameFamily() throws Exception {
- final TableName sourceTable = TableName.valueOf(name.getMethodName() + "source");
- final TableName targetTable = TableName.valueOf(name.getMethodName() + "-target");
-
- byte[][] families = { FAMILY_A, FAMILY_B };
-
- Table t = TEST_UTIL.createTable(sourceTable, families);
- Table t2 = TEST_UTIL.createTable(targetTable, families);
- Put p = new Put(ROW1);
- p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data11"));
- p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data12"));
- p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data13"));
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Dat21"));
- p.addColumn(FAMILY_A, QUALIFIER, Bytes.toBytes("Data22"));
- p.addColumn(FAMILY_B, QUALIFIER, Bytes.toBytes("Data23"));
- t.put(p);
-
- long currentTime = System.currentTimeMillis();
- String[] args = new String[] { "--new.name=" + targetTable, "--families=a:b", "--all.cells",
- "--starttime=" + (currentTime - 100000), "--endtime=" + (currentTime + 100000),
- "--versions=1", sourceTable.getNameAsString() };
- assertNull(t2.get(new Get(ROW1)).getRow());
-
- assertTrue(runCopy(args));
-
- assertNotNull(t2.get(new Get(ROW1)).getRow());
- Result res = t2.get(new Get(ROW1));
- byte[] b1 = res.getValue(FAMILY_B, QUALIFIER);
- assertEquals("Data13", new String(b1));
- assertNotNull(t2.get(new Get(ROW2)).getRow());
- res = t2.get(new Get(ROW2));
- b1 = res.getValue(FAMILY_A, QUALIFIER);
- // Data from the family of B is not copied
- assertNull(b1);
-
- }
-
- /**
- * Test main method of CopyTable.
- */
- @Test
- public void testMainMethod() throws Exception {
- String[] emptyArgs = { "-h" };
- PrintStream oldWriter = System.err;
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- PrintStream writer = new PrintStream(data);
- System.setErr(writer);
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- try {
- CopyTable.main(emptyArgs);
- fail("should be exit");
- } catch (SecurityException e) {
- assertEquals(1, newSecurityManager.getExitCode());
- } finally {
- System.setErr(oldWriter);
- System.setSecurityManager(SECURITY_MANAGER);
- }
- assertTrue(data.toString().contains("rs.class"));
- // should print usage information
- assertTrue(data.toString().contains("Usage:"));
- }
-
- private boolean runCopy(String[] args) throws Exception {
- int status = ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()), new CopyTable(),
- args);
- return status == 0;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
deleted file mode 100644
index b7fdb47..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestGroupingTableMapper.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import static org.mockito.Mockito.*;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestGroupingTableMapper {
-
- /**
- * Test GroupingTableMapper class
- */
- @Test
- public void testGroupingTableMapper() throws Exception {
-
- GroupingTableMapper mapper = new GroupingTableMapper();
- Configuration configuration = new Configuration();
- configuration.set(GroupingTableMapper.GROUP_COLUMNS, "family1:clm family2:clm");
- mapper.setConf(configuration);
-
- Result result = mock(Result.class);
- @SuppressWarnings("unchecked")
- Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context =
- mock(Mapper.Context.class);
- context.write(any(ImmutableBytesWritable.class), any(Result.class));
- List<Cell> keyValue = new ArrayList<>();
- byte[] row = {};
- keyValue.add(new KeyValue(row, Bytes.toBytes("family2"), Bytes.toBytes("clm"), Bytes
- .toBytes("value1")));
- keyValue.add(new KeyValue(row, Bytes.toBytes("family1"), Bytes.toBytes("clm"), Bytes
- .toBytes("value2")));
- when(result.listCells()).thenReturn(keyValue);
- mapper.map(null, result, context);
- // template data
- byte[][] data = { Bytes.toBytes("value1"), Bytes.toBytes("value2") };
- ImmutableBytesWritable ibw = mapper.createGroupKey(data);
- verify(context).write(ibw, result);
- }
-
-}
[37/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
new file mode 100644
index 0000000..b64271e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
@@ -0,0 +1,793 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static java.lang.String.format;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+/**
+ * Tool to import data from a TSV file.
+ *
+ * This tool is rather simplistic - it doesn't do any quoting or
+ * escaping, but is useful for many data loads.
+ *
+ * @see ImportTsv#usage(String)
+ */
+@InterfaceAudience.Public
+public class ImportTsv extends Configured implements Tool {
+
+ protected static final Log LOG = LogFactory.getLog(ImportTsv.class);
+
+ final static String NAME = "importtsv";
+
+ public final static String MAPPER_CONF_KEY = "importtsv.mapper.class";
+ public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output";
+ public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp";
+ public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+ // TODO: the rest of these configs are used exclusively by TsvImporterMapper.
+ // Move them out of the tool and let the mapper handle its own validation.
+ public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run";
+ // If true, bad lines are logged to stderr. Default: false.
+ public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines";
+ public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines";
+ public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns";
+ public final static String COLUMNS_CONF_KEY = "importtsv.columns";
+ public final static String SEPARATOR_CONF_KEY = "importtsv.separator";
+ public final static String ATTRIBUTE_SEPERATOR_CONF_KEY = "attributes.seperator";
+ //This config is used to propagate credentials from parent MR jobs which launch
+ //ImportTSV jobs. SEE IntegrationTestImportTsv.
+ public final static String CREDENTIALS_LOCATION = "credentials_location";
+ final static String DEFAULT_SEPARATOR = "\t";
+ final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>";
+ final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";
+ final static Class DEFAULT_MAPPER = TsvImporterMapper.class;
+ public final static String CREATE_TABLE_CONF_KEY = "create.table";
+ public final static String NO_STRICT_COL_FAMILY = "no.strict";
+ /**
+ * If table didn't exist and was created in dry-run mode, this flag is
+ * flipped to delete it when MR ends.
+ */
+ private static boolean DRY_RUN_TABLE_CREATED;
+
+ public static class TsvParser {
+ /**
+ * Column families and qualifiers mapped to the TSV columns
+ */
+ private final byte[][] families;
+ private final byte[][] qualifiers;
+
+ private final byte separatorByte;
+
+ private int rowKeyColumnIndex;
+
+ private int maxColumnCount;
+
+ // Default value must be negative
+ public static final int DEFAULT_TIMESTAMP_COLUMN_INDEX = -1;
+
+ private int timestampKeyColumnIndex = DEFAULT_TIMESTAMP_COLUMN_INDEX;
+
+ public static final String ROWKEY_COLUMN_SPEC = "HBASE_ROW_KEY";
+
+ public static final String TIMESTAMPKEY_COLUMN_SPEC = "HBASE_TS_KEY";
+
+ public static final String ATTRIBUTES_COLUMN_SPEC = "HBASE_ATTRIBUTES_KEY";
+
+ public static final String CELL_VISIBILITY_COLUMN_SPEC = "HBASE_CELL_VISIBILITY";
+
+ public static final String CELL_TTL_COLUMN_SPEC = "HBASE_CELL_TTL";
+
+ private int attrKeyColumnIndex = DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+
+ public static final int DEFAULT_ATTRIBUTES_COLUMN_INDEX = -1;
+
+ public static final int DEFAULT_CELL_VISIBILITY_COLUMN_INDEX = -1;
+
+ public static final int DEFAULT_CELL_TTL_COLUMN_INDEX = -1;
+
+ private int cellVisibilityColumnIndex = DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+
+ private int cellTTLColumnIndex = DEFAULT_CELL_TTL_COLUMN_INDEX;
+
+ /**
+ * @param columnsSpecification the list of columns to parser out, comma separated.
+ * The row key should be the special token TsvParser.ROWKEY_COLUMN_SPEC
+ * @param separatorStr
+ */
+ public TsvParser(String columnsSpecification, String separatorStr) {
+ // Configure separator
+ byte[] separator = Bytes.toBytes(separatorStr);
+ Preconditions.checkArgument(separator.length == 1,
+ "TsvParser only supports single-byte separators");
+ separatorByte = separator[0];
+
+ // Configure columns
+ ArrayList<String> columnStrings = Lists.newArrayList(
+ Splitter.on(',').trimResults().split(columnsSpecification));
+
+ maxColumnCount = columnStrings.size();
+ families = new byte[maxColumnCount][];
+ qualifiers = new byte[maxColumnCount][];
+
+ for (int i = 0; i < columnStrings.size(); i++) {
+ String str = columnStrings.get(i);
+ if (ROWKEY_COLUMN_SPEC.equals(str)) {
+ rowKeyColumnIndex = i;
+ continue;
+ }
+ if (TIMESTAMPKEY_COLUMN_SPEC.equals(str)) {
+ timestampKeyColumnIndex = i;
+ continue;
+ }
+ if (ATTRIBUTES_COLUMN_SPEC.equals(str)) {
+ attrKeyColumnIndex = i;
+ continue;
+ }
+ if (CELL_VISIBILITY_COLUMN_SPEC.equals(str)) {
+ cellVisibilityColumnIndex = i;
+ continue;
+ }
+ if (CELL_TTL_COLUMN_SPEC.equals(str)) {
+ cellTTLColumnIndex = i;
+ continue;
+ }
+ String[] parts = str.split(":", 2);
+ if (parts.length == 1) {
+ families[i] = str.getBytes();
+ qualifiers[i] = HConstants.EMPTY_BYTE_ARRAY;
+ } else {
+ families[i] = parts[0].getBytes();
+ qualifiers[i] = parts[1].getBytes();
+ }
+ }
+ }
+
+ public boolean hasTimestamp() {
+ return timestampKeyColumnIndex != DEFAULT_TIMESTAMP_COLUMN_INDEX;
+ }
+
+ public int getTimestampKeyColumnIndex() {
+ return timestampKeyColumnIndex;
+ }
+
+ public boolean hasAttributes() {
+ return attrKeyColumnIndex != DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+ }
+
+ public boolean hasCellVisibility() {
+ return cellVisibilityColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+ }
+
+ public boolean hasCellTTL() {
+ return cellTTLColumnIndex != DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+ }
+
+ public int getAttributesKeyColumnIndex() {
+ return attrKeyColumnIndex;
+ }
+
+ public int getCellVisibilityColumnIndex() {
+ return cellVisibilityColumnIndex;
+ }
+
+ public int getCellTTLColumnIndex() {
+ return cellTTLColumnIndex;
+ }
+
+ public int getRowKeyColumnIndex() {
+ return rowKeyColumnIndex;
+ }
+
+ public byte[] getFamily(int idx) {
+ return families[idx];
+ }
+ public byte[] getQualifier(int idx) {
+ return qualifiers[idx];
+ }
+
+ public ParsedLine parse(byte[] lineBytes, int length)
+ throws BadTsvLineException {
+ // Enumerate separator offsets
+ ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount);
+ for (int i = 0; i < length; i++) {
+ if (lineBytes[i] == separatorByte) {
+ tabOffsets.add(i);
+ }
+ }
+ if (tabOffsets.isEmpty()) {
+ throw new BadTsvLineException("No delimiter");
+ }
+
+ tabOffsets.add(length);
+
+ if (tabOffsets.size() > maxColumnCount) {
+ throw new BadTsvLineException("Excessive columns");
+ } else if (tabOffsets.size() <= getRowKeyColumnIndex()) {
+ throw new BadTsvLineException("No row key");
+ } else if (hasTimestamp()
+ && tabOffsets.size() <= getTimestampKeyColumnIndex()) {
+ throw new BadTsvLineException("No timestamp");
+ } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) {
+ throw new BadTsvLineException("No attributes specified");
+ } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) {
+ throw new BadTsvLineException("No cell visibility specified");
+ } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) {
+ throw new BadTsvLineException("No cell TTL specified");
+ }
+ return new ParsedLine(tabOffsets, lineBytes);
+ }
+
+ class ParsedLine {
+ private final ArrayList<Integer> tabOffsets;
+ private byte[] lineBytes;
+
+ ParsedLine(ArrayList<Integer> tabOffsets, byte[] lineBytes) {
+ this.tabOffsets = tabOffsets;
+ this.lineBytes = lineBytes;
+ }
+
+ public int getRowKeyOffset() {
+ return getColumnOffset(rowKeyColumnIndex);
+ }
+ public int getRowKeyLength() {
+ return getColumnLength(rowKeyColumnIndex);
+ }
+
+ public long getTimestamp(long ts) throws BadTsvLineException {
+ // Return ts if HBASE_TS_KEY is not configured in column spec
+ if (!hasTimestamp()) {
+ return ts;
+ }
+
+ String timeStampStr = Bytes.toString(lineBytes,
+ getColumnOffset(timestampKeyColumnIndex),
+ getColumnLength(timestampKeyColumnIndex));
+ try {
+ return Long.parseLong(timeStampStr);
+ } catch (NumberFormatException nfe) {
+ // treat this record as bad record
+ throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
+ }
+ }
+
+ private String getAttributes() {
+ if (!hasAttributes()) {
+ return null;
+ } else {
+ return Bytes.toString(lineBytes, getColumnOffset(attrKeyColumnIndex),
+ getColumnLength(attrKeyColumnIndex));
+ }
+ }
+
+ public String[] getIndividualAttributes() {
+ String attributes = getAttributes();
+ if (attributes != null) {
+ return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR);
+ } else {
+ return null;
+ }
+ }
+
+ public int getAttributeKeyOffset() {
+ if (hasAttributes()) {
+ return getColumnOffset(attrKeyColumnIndex);
+ } else {
+ return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+ }
+ }
+
+ public int getAttributeKeyLength() {
+ if (hasAttributes()) {
+ return getColumnLength(attrKeyColumnIndex);
+ } else {
+ return DEFAULT_ATTRIBUTES_COLUMN_INDEX;
+ }
+ }
+
+ public int getCellVisibilityColumnOffset() {
+ if (hasCellVisibility()) {
+ return getColumnOffset(cellVisibilityColumnIndex);
+ } else {
+ return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+ }
+ }
+
+ public int getCellVisibilityColumnLength() {
+ if (hasCellVisibility()) {
+ return getColumnLength(cellVisibilityColumnIndex);
+ } else {
+ return DEFAULT_CELL_VISIBILITY_COLUMN_INDEX;
+ }
+ }
+
+ public String getCellVisibility() {
+ if (!hasCellVisibility()) {
+ return null;
+ } else {
+ return Bytes.toString(lineBytes, getColumnOffset(cellVisibilityColumnIndex),
+ getColumnLength(cellVisibilityColumnIndex));
+ }
+ }
+
+ public int getCellTTLColumnOffset() {
+ if (hasCellTTL()) {
+ return getColumnOffset(cellTTLColumnIndex);
+ } else {
+ return DEFAULT_CELL_TTL_COLUMN_INDEX;
+ }
+ }
+
+ public int getCellTTLColumnLength() {
+ if (hasCellTTL()) {
+ return getColumnLength(cellTTLColumnIndex);
+ } else {
+ return DEFAULT_CELL_TTL_COLUMN_INDEX;
+ }
+ }
+
+ public long getCellTTL() {
+ if (!hasCellTTL()) {
+ return 0;
+ } else {
+ return Bytes.toLong(lineBytes, getColumnOffset(cellTTLColumnIndex),
+ getColumnLength(cellTTLColumnIndex));
+ }
+ }
+
+ public int getColumnOffset(int idx) {
+ if (idx > 0)
+ return tabOffsets.get(idx - 1) + 1;
+ else
+ return 0;
+ }
+ public int getColumnLength(int idx) {
+ return tabOffsets.get(idx) - getColumnOffset(idx);
+ }
+ public int getColumnCount() {
+ return tabOffsets.size();
+ }
+ public byte[] getLineBytes() {
+ return lineBytes;
+ }
+ }
+
+ public static class BadTsvLineException extends Exception {
+ public BadTsvLineException(String err) {
+ super(err);
+ }
+ private static final long serialVersionUID = 1L;
+ }
+
+ /**
+ * Return starting position and length of row key from the specified line bytes.
+ * @param lineBytes
+ * @param length
+ * @return Pair of row key offset and length.
+ * @throws BadTsvLineException
+ */
+ public Pair<Integer, Integer> parseRowKey(byte[] lineBytes, int length)
+ throws BadTsvLineException {
+ int rkColumnIndex = 0;
+ int startPos = 0, endPos = 0;
+ for (int i = 0; i <= length; i++) {
+ if (i == length || lineBytes[i] == separatorByte) {
+ endPos = i - 1;
+ if (rkColumnIndex++ == getRowKeyColumnIndex()) {
+ if ((endPos + 1) == startPos) {
+ throw new BadTsvLineException("Empty value for ROW KEY.");
+ }
+ break;
+ } else {
+ startPos = endPos + 2;
+ }
+ }
+ if (i == length) {
+ throw new BadTsvLineException(
+ "Row key does not exist as number of columns in the line"
+ + " are less than row key position.");
+ }
+ }
+ return new Pair<>(startPos, endPos - startPos + 1);
+ }
+ }
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ protected static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException, ClassNotFoundException {
+ Job job = null;
+ boolean isDryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
+ try (Connection connection = ConnectionFactory.createConnection(conf)) {
+ try (Admin admin = connection.getAdmin()) {
+ // Support non-XML supported characters
+ // by re-encoding the passed separator as a Base64 string.
+ String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
+ if (actualSeparator != null) {
+ conf.set(SEPARATOR_CONF_KEY,
+ Base64.encodeBytes(actualSeparator.getBytes()));
+ }
+
+ // See if a non-default Mapper was set
+ String mapperClassName = conf.get(MAPPER_CONF_KEY);
+ Class mapperClass = mapperClassName != null? Class.forName(mapperClassName): DEFAULT_MAPPER;
+
+ TableName tableName = TableName.valueOf(args[0]);
+ Path inputDir = new Path(args[1]);
+ String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
+ job = Job.getInstance(conf, jobName);
+ job.setJarByClass(mapperClass);
+ FileInputFormat.setInputPaths(job, inputDir);
+ job.setInputFormatClass(TextInputFormat.class);
+ job.setMapperClass(mapperClass);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+ String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+ String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
+ if(StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
+ String fileLoc = conf.get(CREDENTIALS_LOCATION);
+ Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
+ job.getCredentials().addAll(cred);
+ }
+
+ if (hfileOutPath != null) {
+ if (!admin.tableExists(tableName)) {
+ LOG.warn(format("Table '%s' does not exist.", tableName));
+ if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
+ // TODO: this is backwards. Instead of depending on the existence of a table,
+ // create a sane splits file for HFileOutputFormat based on data sampling.
+ createTable(admin, tableName, columns);
+ if (isDryRun) {
+ LOG.warn("Dry run: Table will be deleted at end of dry run.");
+ synchronized (ImportTsv.class) {
+ DRY_RUN_TABLE_CREATED = true;
+ }
+ }
+ } else {
+ String errorMsg =
+ format("Table '%s' does not exist and '%s' is set to no.", tableName,
+ CREATE_TABLE_CONF_KEY);
+ LOG.error(errorMsg);
+ throw new TableNotFoundException(errorMsg);
+ }
+ }
+ try (Table table = connection.getTable(tableName);
+ RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
+ boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
+ // if no.strict is false then check column family
+ if(!noStrict) {
+ ArrayList<String> unmatchedFamilies = new ArrayList<>();
+ Set<String> cfSet = getColumnFamilies(columns);
+ TableDescriptor tDesc = table.getDescriptor();
+ for (String cf : cfSet) {
+ if(!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
+ unmatchedFamilies.add(cf);
+ }
+ }
+ if(unmatchedFamilies.size() > 0) {
+ ArrayList<String> familyNames = new ArrayList<>();
+ for (ColumnFamilyDescriptor family : table.getDescriptor().getColumnFamilies()) {
+ familyNames.add(family.getNameAsString());
+ }
+ String msg =
+ "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY
+ + " does not match with any of the table " + tableName
+ + " column families " + familyNames + ".\n"
+ + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
+ + "=true.\n";
+ usage(msg);
+ System.exit(-1);
+ }
+ }
+ if (mapperClass.equals(TsvImporterTextMapper.class)) {
+ job.setMapOutputValueClass(Text.class);
+ job.setReducerClass(TextSortReducer.class);
+ } else {
+ job.setMapOutputValueClass(Put.class);
+ job.setCombinerClass(PutCombiner.class);
+ job.setReducerClass(PutSortReducer.class);
+ }
+ if (!isDryRun) {
+ Path outputDir = new Path(hfileOutPath);
+ FileOutputFormat.setOutputPath(job, outputDir);
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
+ regionLocator);
+ }
+ }
+ } else {
+ if (!admin.tableExists(tableName)) {
+ String errorMsg = format("Table '%s' does not exist.", tableName);
+ LOG.error(errorMsg);
+ throw new TableNotFoundException(errorMsg);
+ }
+ if (mapperClass.equals(TsvImporterTextMapper.class)) {
+ usage(TsvImporterTextMapper.class.toString()
+ + " should not be used for non bulkloading case. use "
+ + TsvImporterMapper.class.toString()
+ + " or custom mapper whose value type is Put.");
+ System.exit(-1);
+ }
+ if (!isDryRun) {
+ // No reducers. Just write straight to table. Call initTableReducerJob
+ // to set up the TableOutputFormat.
+ TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
+ }
+ job.setNumReduceTasks(0);
+ }
+ if (isDryRun) {
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.getConfiguration().setStrings("io.serializations",
+ job.getConfiguration().get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+ KeyValueSerialization.class.getName());
+ }
+ TableMapReduceUtil.addDependencyJars(job);
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ org.apache.hadoop.hbase.shaded.com.google.common.base.Function.class /* Guava used by TsvParser */);
+ }
+ }
+ return job;
+ }
+
+ private static void createTable(Admin admin, TableName tableName, String[] columns)
+ throws IOException {
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ Set<String> cfSet = getColumnFamilies(columns);
+ for (String cf : cfSet) {
+ HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
+ htd.addFamily(hcd);
+ }
+ LOG.warn(format("Creating table '%s' with '%s' columns and default descriptors.",
+ tableName, cfSet));
+ admin.createTable(htd);
+ }
+
+ private static void deleteTable(Configuration conf, String[] args) {
+ TableName tableName = TableName.valueOf(args[0]);
+ try (Connection connection = ConnectionFactory.createConnection(conf);
+ Admin admin = connection.getAdmin()) {
+ try {
+ admin.disableTable(tableName);
+ } catch (TableNotEnabledException e) {
+ LOG.debug("Dry mode: Table: " + tableName + " already disabled, so just deleting it.");
+ }
+ admin.deleteTable(tableName);
+ } catch (IOException e) {
+ LOG.error(format("***Dry run: Failed to delete table '%s'.***%n%s", tableName,
+ e.toString()));
+ return;
+ }
+ LOG.info(format("Dry run: Deleted table '%s'.", tableName));
+ }
+
+ private static Set<String> getColumnFamilies(String[] columns) {
+ Set<String> cfSet = new HashSet<>();
+ for (String aColumn : columns) {
+ if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)
+ || TsvParser.TIMESTAMPKEY_COLUMN_SPEC.equals(aColumn)
+ || TsvParser.CELL_VISIBILITY_COLUMN_SPEC.equals(aColumn)
+ || TsvParser.CELL_TTL_COLUMN_SPEC.equals(aColumn)
+ || TsvParser.ATTRIBUTES_COLUMN_SPEC.equals(aColumn))
+ continue;
+ // we are only concerned with the first one (in case this is a cf:cq)
+ cfSet.add(aColumn.split(":", 2)[0]);
+ }
+ return cfSet;
+ }
+
+ /*
+ * @param errorMsg Error message. Can be null.
+ */
+ private static void usage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ String usage =
+ "Usage: " + NAME + " -D"+ COLUMNS_CONF_KEY + "=a,b,c <tablename> <inputdir>\n" +
+ "\n" +
+ "Imports the given input directory of TSV data into the specified table.\n" +
+ "\n" +
+ "The column names of the TSV data must be specified using the -D" + COLUMNS_CONF_KEY + "\n" +
+ "option. This option takes the form of comma-separated column names, where each\n" +
+ "column name is either a simple column family, or a columnfamily:qualifier. The special\n" +
+ "column name " + TsvParser.ROWKEY_COLUMN_SPEC + " is used to designate that this column should be used\n" +
+ "as the row key for each imported record. You must specify exactly one column\n" +
+ "to be the row key, and you must specify a column name for every column that exists in the\n" +
+ "input data. Another special column" + TsvParser.TIMESTAMPKEY_COLUMN_SPEC +
+ " designates that this column should be\n" +
+ "used as timestamp for each record. Unlike " + TsvParser.ROWKEY_COLUMN_SPEC + ", " +
+ TsvParser.TIMESTAMPKEY_COLUMN_SPEC + " is optional." + "\n" +
+ "You must specify at most one column as timestamp key for each imported record.\n" +
+ "Record with invalid timestamps (blank, non-numeric) will be treated as bad record.\n" +
+ "Note: if you use this option, then '" + TIMESTAMP_CONF_KEY + "' option will be ignored.\n" +
+ "\n" +
+ "Other special columns that can be specified are " + TsvParser.CELL_TTL_COLUMN_SPEC +
+ " and " + TsvParser.CELL_VISIBILITY_COLUMN_SPEC + ".\n" +
+ TsvParser.CELL_TTL_COLUMN_SPEC + " designates that this column will be used " +
+ "as a Cell's Time To Live (TTL) attribute.\n" +
+ TsvParser.CELL_VISIBILITY_COLUMN_SPEC + " designates that this column contains the " +
+ "visibility label expression.\n" +
+ "\n" +
+ TsvParser.ATTRIBUTES_COLUMN_SPEC+" can be used to specify Operation Attributes per record.\n"+
+ " Should be specified as key=>value where "+TsvParser.DEFAULT_ATTRIBUTES_COLUMN_INDEX+ " is used \n"+
+ " as the seperator. Note that more than one OperationAttributes can be specified.\n"+
+ "By default importtsv will load data directly into HBase. To instead generate\n" +
+ "HFiles of data to prepare for a bulk data load, pass the option:\n" +
+ " -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output\n" +
+ " Note: if you do not use this option, then the target table must already exist in HBase\n" +
+ "\n" +
+ "Other options that may be specified with -D include:\n" +
+ " -D" + DRY_RUN_CONF_KEY + "=true - Dry run mode. Data is not actually populated into" +
+ " table. If table does not exist, it is created but deleted in the end.\n" +
+ " -D" + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" +
+ " -D" + LOG_BAD_LINES_CONF_KEY + "=true - logs invalid lines to stderr\n" +
+ " -D" + SKIP_EMPTY_COLUMNS + "=false - If true then skip empty columns in bulk import\n" +
+ " '-D" + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" +
+ " -D" + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n" +
+ " -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " +
+ DEFAULT_MAPPER.getName() + "\n" +
+ " -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" +
+ " -D" + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n" +
+ " Note: if you set this to 'no', then the target table must already exist in HBase\n" +
+ " -D" + NO_STRICT_COL_FAMILY + "=true - ignore column family check in hbase table. " +
+ "Default is false\n\n" +
+ "For performance consider the following options:\n" +
+ " -Dmapreduce.map.speculative=false\n" +
+ " -Dmapreduce.reduce.speculative=false";
+
+ System.err.println(usage);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ usage("Wrong number of arguments: " + args.length);
+ return -1;
+ }
+
+ // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
+ // perform validation on these additional args. When it's not null, user has provided their
+ // own mapper, thus these validation are not relevant.
+ // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
+ if (null == getConf().get(MAPPER_CONF_KEY)) {
+ // Make sure columns are specified
+ String[] columns = getConf().getStrings(COLUMNS_CONF_KEY);
+ if (columns == null) {
+ usage("No columns specified. Please specify with -D" +
+ COLUMNS_CONF_KEY+"=...");
+ return -1;
+ }
+
+ // Make sure they specify exactly one column as the row key
+ int rowkeysFound = 0;
+ for (String col : columns) {
+ if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++;
+ }
+ if (rowkeysFound != 1) {
+ usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
+ return -1;
+ }
+
+ // Make sure we have at most one column as the timestamp key
+ int tskeysFound = 0;
+ for (String col : columns) {
+ if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
+ tskeysFound++;
+ }
+ if (tskeysFound > 1) {
+ usage("Must specify at most one column as "
+ + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
+ return -1;
+ }
+
+ int attrKeysFound = 0;
+ for (String col : columns) {
+ if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
+ attrKeysFound++;
+ }
+ if (attrKeysFound > 1) {
+ usage("Must specify at most one column as "
+ + TsvParser.ATTRIBUTES_COLUMN_SPEC);
+ return -1;
+ }
+
+ // Make sure one or more columns are specified excluding rowkey and
+ // timestamp key
+ if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
+ usage("One or more columns in addition to the row key and timestamp(optional) are required");
+ return -1;
+ }
+ }
+
+ // If timestamp option is not specified, use current system time.
+ long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());
+
+ // Set it back to replace invalid timestamp (non-numeric) with current
+ // system time
+ getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);
+
+ synchronized (ImportTsv.class) {
+ DRY_RUN_TABLE_CREATED = false;
+ }
+ Job job = createSubmittableJob(getConf(), args);
+ boolean success = job.waitForCompletion(true);
+ boolean delete = false;
+ synchronized (ImportTsv.class) {
+ delete = DRY_RUN_TABLE_CREATED;
+ }
+ if (delete) {
+ deleteTable(getConf(), args);
+ }
+ return success ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int status = ToolRunner.run(HBaseConfiguration.create(), new ImportTsv(), args);
+ System.exit(status);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
new file mode 100644
index 0000000..953df62
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/JarFinder.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.text.MessageFormat;
+import java.util.Enumeration;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+/**
+ * Finds the Jar for a class. If the class is in a directory in the
+ * classpath, it creates a Jar on the fly with the contents of the directory
+ * and returns the path to that Jar. If a Jar is created, it is created in
+ * the system temporary directory.
+ *
+ * This file was forked from hadoop/common/branches/branch-2@1377176.
+ */
+public class JarFinder {
+
+ private static void copyToZipStream(File file, ZipEntry entry,
+ ZipOutputStream zos) throws IOException {
+ InputStream is = new FileInputStream(file);
+ try {
+ zos.putNextEntry(entry);
+ byte[] arr = new byte[4096];
+ int read = is.read(arr);
+ while (read > -1) {
+ zos.write(arr, 0, read);
+ read = is.read(arr);
+ }
+ } finally {
+ try {
+ is.close();
+ } finally {
+ zos.closeEntry();
+ }
+ }
+ }
+
+ public static void jarDir(File dir, String relativePath, ZipOutputStream zos)
+ throws IOException {
+ Preconditions.checkNotNull(relativePath, "relativePath");
+ Preconditions.checkNotNull(zos, "zos");
+
+ // by JAR spec, if there is a manifest, it must be the first entry in the
+ // ZIP.
+ File manifestFile = new File(dir, JarFile.MANIFEST_NAME);
+ ZipEntry manifestEntry = new ZipEntry(JarFile.MANIFEST_NAME);
+ if (!manifestFile.exists()) {
+ zos.putNextEntry(manifestEntry);
+ new Manifest().write(new BufferedOutputStream(zos));
+ zos.closeEntry();
+ } else {
+ copyToZipStream(manifestFile, manifestEntry, zos);
+ }
+ zos.closeEntry();
+ zipDir(dir, relativePath, zos, true);
+ zos.close();
+ }
+
+ private static void zipDir(File dir, String relativePath, ZipOutputStream zos,
+ boolean start) throws IOException {
+ String[] dirList = dir.list();
+ if (dirList == null) {
+ return;
+ }
+ for (String aDirList : dirList) {
+ File f = new File(dir, aDirList);
+ if (!f.isHidden()) {
+ if (f.isDirectory()) {
+ if (!start) {
+ ZipEntry dirEntry = new ZipEntry(relativePath + f.getName() + "/");
+ zos.putNextEntry(dirEntry);
+ zos.closeEntry();
+ }
+ String filePath = f.getPath();
+ File file = new File(filePath);
+ zipDir(file, relativePath + f.getName() + "/", zos, false);
+ }
+ else {
+ String path = relativePath + f.getName();
+ if (!path.equals(JarFile.MANIFEST_NAME)) {
+ ZipEntry anEntry = new ZipEntry(path);
+ copyToZipStream(f, anEntry, zos);
+ }
+ }
+ }
+ }
+ }
+
+ private static void createJar(File dir, File jarFile) throws IOException {
+ Preconditions.checkNotNull(dir, "dir");
+ Preconditions.checkNotNull(jarFile, "jarFile");
+ File jarDir = jarFile.getParentFile();
+ if (!jarDir.exists()) {
+ if (!jarDir.mkdirs()) {
+ throw new IOException(MessageFormat.format("could not create dir [{0}]",
+ jarDir));
+ }
+ }
+ try (FileOutputStream fos = new FileOutputStream(jarFile);
+ JarOutputStream jos = new JarOutputStream(fos)) {
+ jarDir(dir, "", jos);
+ }
+ }
+
+ /**
+ * Returns the full path to the Jar containing the class. It always return a
+ * JAR.
+ *
+ * @param klass class.
+ *
+ * @return path to the Jar containing the class.
+ */
+ public static String getJar(Class klass) {
+ Preconditions.checkNotNull(klass, "klass");
+ ClassLoader loader = klass.getClassLoader();
+ if (loader != null) {
+ String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
+ try {
+ for (Enumeration itr = loader.getResources(class_file);
+ itr.hasMoreElements(); ) {
+ URL url = (URL) itr.nextElement();
+ String path = url.getPath();
+ if (path.startsWith("file:")) {
+ path = path.substring("file:".length());
+ }
+ path = URLDecoder.decode(path, "UTF-8");
+ if ("jar".equals(url.getProtocol())) {
+ path = URLDecoder.decode(path, "UTF-8");
+ return path.replaceAll("!.*$", "");
+ }
+ else if ("file".equals(url.getProtocol())) {
+ String klassName = klass.getName();
+ klassName = klassName.replace(".", "/") + ".class";
+ path = path.substring(0, path.length() - klassName.length());
+ File baseDir = new File(path);
+ File testDir = new File(System.getProperty("test.build.dir", "target/test-dir"));
+ testDir = testDir.getAbsoluteFile();
+ if (!testDir.exists()) {
+ testDir.mkdirs();
+ }
+ File tempJar = File.createTempFile("hadoop-", "", testDir);
+ tempJar = new File(tempJar.getAbsolutePath() + ".jar");
+ tempJar.deleteOnExit();
+ createJar(baseDir, tempJar);
+ return tempJar.getAbsolutePath();
+ }
+ }
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
new file mode 100644
index 0000000..241608b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSerialization.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+
+@InterfaceAudience.Public
+public class KeyValueSerialization implements Serialization<KeyValue> {
+ @Override
+ public boolean accept(Class<?> c) {
+ return KeyValue.class.isAssignableFrom(c);
+ }
+
+ @Override
+ public KeyValueDeserializer getDeserializer(Class<KeyValue> t) {
+ return new KeyValueDeserializer();
+ }
+
+ @Override
+ public KeyValueSerializer getSerializer(Class<KeyValue> c) {
+ return new KeyValueSerializer();
+ }
+
+ public static class KeyValueDeserializer implements Deserializer<KeyValue> {
+ private DataInputStream dis;
+
+ @Override
+ public void close() throws IOException {
+ this.dis.close();
+ }
+
+ @Override
+ public KeyValue deserialize(KeyValue ignore) throws IOException {
+ // I can't overwrite the passed in KV, not from a proto kv, not just yet. TODO
+ return KeyValueUtil.create(this.dis);
+ }
+
+ @Override
+ public void open(InputStream is) throws IOException {
+ this.dis = new DataInputStream(is);
+ }
+ }
+
+ public static class KeyValueSerializer implements Serializer<KeyValue> {
+ private DataOutputStream dos;
+
+ @Override
+ public void close() throws IOException {
+ this.dos.close();
+ }
+
+ @Override
+ public void open(OutputStream os) throws IOException {
+ this.dos = new DataOutputStream(os);
+ }
+
+ @Override
+ public void serialize(KeyValue kv) throws IOException {
+ KeyValueUtil.write(kv, this.dos);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
new file mode 100644
index 0000000..997e5a8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyValueSortReducer.java
@@ -0,0 +1,57 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.util.TreeSet;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * Emits sorted KeyValues.
+ * Reads in all KeyValues from passed Iterator, sorts them, then emits
+ * KeyValues in sorted order. If lots of columns per row, it will use lots of
+ * memory sorting.
+ * @see HFileOutputFormat2
+ */
+@InterfaceAudience.Public
+public class KeyValueSortReducer
+ extends Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue> {
+ protected void reduce(ImmutableBytesWritable row, Iterable<KeyValue> kvs,
+ Reducer<ImmutableBytesWritable, KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
+ throws java.io.IOException, InterruptedException {
+ TreeSet<KeyValue> map = new TreeSet<>(CellComparator.COMPARATOR);
+ for (KeyValue kv: kvs) {
+ try {
+ map.add(kv.clone());
+ } catch (CloneNotSupportedException e) {
+ throw new java.io.IOException(e);
+ }
+ }
+ context.setStatus("Read " + map.getClass());
+ int index = 0;
+ for (KeyValue kv: map) {
+ context.write(row, kv);
+ if (++index % 100 == 0) context.setStatus("Wrote " + index);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
new file mode 100644
index 0000000..9f783f1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableHFileOutputFormat.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Create 3 level tree directory, first level is using table name as parent
+ * directory and then use family name as child directory, and all related HFiles
+ * for one family are under child directory
+ * -tableName1
+ * -columnFamilyName1
+ * -columnFamilyName2
+ * -HFiles
+ * -tableName2
+ * -columnFamilyName1
+ * -HFiles
+ * -columnFamilyName2
+ */
+@InterfaceAudience.Public
+@VisibleForTesting
+public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
+ private static final Log LOG = LogFactory.getLog(MultiTableHFileOutputFormat.class);
+
+ /**
+ * Creates a composite key to use as a mapper output key when using
+ * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
+ *
+ * @param tableName Name of the Table - Eg: TableName.getNameAsString()
+ * @param suffix Usually represents a rowkey when creating a mapper key or column family
+ * @return byte[] representation of composite key
+ */
+ public static byte[] createCompositeKey(byte[] tableName,
+ byte[] suffix) {
+ return combineTableNameSuffix(tableName, suffix);
+ }
+
+ /**
+ * Alternate api which accepts an ImmutableBytesWritable for the suffix
+ * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
+ */
+ public static byte[] createCompositeKey(byte[] tableName,
+ ImmutableBytesWritable suffix) {
+ return combineTableNameSuffix(tableName, suffix.get());
+ }
+
+ /**
+ * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
+ * suffix
+ * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
+ */
+ public static byte[] createCompositeKey(String tableName,
+ ImmutableBytesWritable suffix) {
+ return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
+ }
+
+ /**
+ * Analogous to
+ * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
+ * this function will configure the requisite number of reducers to write HFiles for multple
+ * tables simultaneously
+ *
+ * @param job See {@link org.apache.hadoop.mapreduce.Job}
+ * @param multiTableDescriptors Table descriptor and region locator pairs
+ * @throws IOException
+ */
+ public static void configureIncrementalLoad(Job job, List<TableInfo>
+ multiTableDescriptors)
+ throws IOException {
+ MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
+ MultiTableHFileOutputFormat.class);
+ }
+
+ final private static int validateCompositeKey(byte[] keyBytes) {
+
+ int separatorIdx = Bytes.indexOf(keyBytes, tableSeparator);
+
+ // Either the separator was not found or a tablename wasn't present or a key wasn't present
+ if (separatorIdx == -1) {
+ throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
+ .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
+ }
+ return separatorIdx;
+ }
+
+ protected static byte[] getTableName(byte[] keyBytes) {
+ int separatorIdx = validateCompositeKey(keyBytes);
+ return Bytes.copy(keyBytes, 0, separatorIdx);
+ }
+
+ protected static byte[] getSuffix(byte[] keyBytes) {
+ int separatorIdx = validateCompositeKey(keyBytes);
+ return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
new file mode 100644
index 0000000..f8fb6dc
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormat.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+
+/**
+ * Convert HBase tabular data from multiple scanners into a format that
+ * is consumable by Map/Reduce.
+ *
+ * <p>
+ * Usage example
+ * </p>
+ *
+ * <pre>
+ * List<Scan> scans = new ArrayList<Scan>();
+ *
+ * Scan scan1 = new Scan();
+ * scan1.setStartRow(firstRow1);
+ * scan1.setStopRow(lastRow1);
+ * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
+ * scans.add(scan1);
+ *
+ * Scan scan2 = new Scan();
+ * scan2.setStartRow(firstRow2);
+ * scan2.setStopRow(lastRow2);
+ * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
+ * scans.add(scan2);
+ *
+ * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
+ * IntWritable.class, job);
+ * </pre>
+ */
+@InterfaceAudience.Public
+public class MultiTableInputFormat extends MultiTableInputFormatBase implements
+ Configurable {
+
+ /** Job parameter that specifies the scan list. */
+ public static final String SCANS = "hbase.mapreduce.scans";
+
+ /** The configuration. */
+ private Configuration conf = null;
+
+ /**
+ * Returns the current configuration.
+ *
+ * @return The current configuration.
+ * @see org.apache.hadoop.conf.Configurable#getConf()
+ */
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Sets the configuration. This is used to set the details for the tables to
+ * be scanned.
+ *
+ * @param configuration The configuration to set.
+ * @see org.apache.hadoop.conf.Configurable#setConf(
+ * org.apache.hadoop.conf.Configuration)
+ */
+ @Override
+ public void setConf(Configuration configuration) {
+ this.conf = configuration;
+ String[] rawScans = conf.getStrings(SCANS);
+ if (rawScans.length <= 0) {
+ throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
+ + SCANS);
+ }
+ List<Scan> scans = new ArrayList<>();
+
+ for (int i = 0; i < rawScans.length; i++) {
+ try {
+ scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
+ }
+ }
+ this.setScans(scans);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
new file mode 100644
index 0000000..5d541a6
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+/**
+ * A base for {@link MultiTableInputFormat}s. Receives a list of
+ * {@link Scan} instances that define the input tables and
+ * filters etc. Subclasses may use other TableRecordReader implementations.
+ */
+@InterfaceAudience.Public
+public abstract class MultiTableInputFormatBase extends
+ InputFormat<ImmutableBytesWritable, Result> {
+
+ private static final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
+
+ /** Holds the set of scans used to define the input. */
+ private List<Scan> scans;
+
+ /** The reader scanning the table, can be a custom one. */
+ private TableRecordReader tableRecordReader = null;
+
+ /**
+ * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
+ * default.
+ *
+ * @param split The split to work with.
+ * @param context The current context.
+ * @return The newly created record reader.
+ * @throws IOException When creating the reader fails.
+ * @throws InterruptedException when record reader initialization fails
+ * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
+ * org.apache.hadoop.mapreduce.InputSplit,
+ * org.apache.hadoop.mapreduce.TaskAttemptContext)
+ */
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
+ InputSplit split, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ TableSplit tSplit = (TableSplit) split;
+ LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
+
+ if (tSplit.getTable() == null) {
+ throw new IOException("Cannot create a record reader because of a"
+ + " previous error. Please look at the previous logs lines from"
+ + " the task's full log for more details.");
+ }
+ final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
+ Table table = connection.getTable(tSplit.getTable());
+
+ if (this.tableRecordReader == null) {
+ this.tableRecordReader = new TableRecordReader();
+ }
+ final TableRecordReader trr = this.tableRecordReader;
+
+ try {
+ Scan sc = tSplit.getScan();
+ sc.setStartRow(tSplit.getStartRow());
+ sc.setStopRow(tSplit.getEndRow());
+ trr.setScan(sc);
+ trr.setTable(table);
+ return new RecordReader<ImmutableBytesWritable, Result>() {
+
+ @Override
+ public void close() throws IOException {
+ trr.close();
+ connection.close();
+ }
+
+ @Override
+ public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
+ return trr.getCurrentKey();
+ }
+
+ @Override
+ public Result getCurrentValue() throws IOException, InterruptedException {
+ return trr.getCurrentValue();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return trr.getProgress();
+ }
+
+ @Override
+ public void initialize(InputSplit inputsplit, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ trr.initialize(inputsplit, context);
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ return trr.nextKeyValue();
+ }
+ };
+ } catch (IOException ioe) {
+ // If there is an exception make sure that all
+ // resources are closed and released.
+ trr.close();
+ connection.close();
+ throw ioe;
+ }
+ }
+
+ /**
+ * Calculates the splits that will serve as input for the map tasks. The
+ * number of splits matches the number of regions in a table.
+ *
+ * @param context The current job context.
+ * @return The list of input splits.
+ * @throws IOException When creating the list of splits fails.
+ * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
+ */
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ if (scans.isEmpty()) {
+ throw new IOException("No scans were provided.");
+ }
+
+ Map<TableName, List<Scan>> tableMaps = new HashMap<>();
+ for (Scan scan : scans) {
+ byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
+ if (tableNameBytes == null)
+ throw new IOException("A scan object did not have a table name");
+
+ TableName tableName = TableName.valueOf(tableNameBytes);
+
+ List<Scan> scanList = tableMaps.get(tableName);
+ if (scanList == null) {
+ scanList = new ArrayList<>();
+ tableMaps.put(tableName, scanList);
+ }
+ scanList.add(scan);
+ }
+
+ List<InputSplit> splits = new ArrayList<>();
+ Iterator iter = tableMaps.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
+ TableName tableName = entry.getKey();
+ List<Scan> scanList = entry.getValue();
+
+ try (Connection conn = ConnectionFactory.createConnection(context.getConfiguration());
+ Table table = conn.getTable(tableName);
+ RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+ RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
+ regionLocator, conn.getAdmin());
+ Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
+ for (Scan scan : scanList) {
+ if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
+ throw new IOException("Expecting at least one region for table : "
+ + tableName.getNameAsString());
+ }
+ int count = 0;
+
+ byte[] startRow = scan.getStartRow();
+ byte[] stopRow = scan.getStopRow();
+
+ for (int i = 0; i < keys.getFirst().length; i++) {
+ if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+ continue;
+ }
+
+ if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+ Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+ (stopRow.length == 0 || Bytes.compareTo(stopRow,
+ keys.getFirst()[i]) > 0)) {
+ byte[] splitStart = startRow.length == 0 ||
+ Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
+ keys.getFirst()[i] : startRow;
+ byte[] splitStop = (stopRow.length == 0 ||
+ Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
+ keys.getSecond()[i].length > 0 ?
+ keys.getSecond()[i] : stopRow;
+
+ HRegionLocation hregionLocation = regionLocator.getRegionLocation(
+ keys.getFirst()[i], false);
+ String regionHostname = hregionLocation.getHostname();
+ HRegionInfo regionInfo = hregionLocation.getRegionInfo();
+ String encodedRegionName = regionInfo.getEncodedName();
+ long regionSize = sizeCalculator.getRegionSize(
+ regionInfo.getRegionName());
+
+ TableSplit split = new TableSplit(table.getName(),
+ scan, splitStart, splitStop, regionHostname,
+ encodedRegionName, regionSize);
+
+ splits.add(split);
+
+ if (LOG.isDebugEnabled())
+ LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+ }
+ }
+ }
+ }
+ }
+
+ return splits;
+ }
+
+ /**
+ * Test if the given region is to be included in the InputSplit while
+ * splitting the regions of a table.
+ * <p>
+ * This optimization is effective when there is a specific reasoning to
+ * exclude an entire region from the M-R job, (and hence, not contributing to
+ * the InputSplit), given the start and end keys of the same. <br>
+ * Useful when we need to remember the last-processed top record and revisit
+ * the [last, current) interval for M-R processing, continuously. In addition
+ * to reducing InputSplits, reduces the load on the region server as well, due
+ * to the ordering of the keys. <br>
+ * <br>
+ * Note: It is possible that <code>endKey.length() == 0 </code> , for the last
+ * (recent) region. <br>
+ * Override this method, if you want to bulk exclude regions altogether from
+ * M-R. By default, no region is excluded( i.e. all regions are included).
+ *
+ * @param startKey Start key of the region
+ * @param endKey End key of the region
+ * @return true, if this region needs to be included as part of the input
+ * (default).
+ */
+ protected boolean includeRegionInSplit(final byte[] startKey,
+ final byte[] endKey) {
+ return true;
+ }
+
+ /**
+ * Allows subclasses to get the list of {@link Scan} objects.
+ */
+ protected List<Scan> getScans() {
+ return this.scans;
+ }
+
+ /**
+ * Allows subclasses to set the list of {@link Scan} objects.
+ *
+ * @param scans The list of {@link Scan} used to define the input
+ */
+ protected void setScans(List<Scan> scans) {
+ this.scans = scans;
+ }
+
+ /**
+ * Allows subclasses to set the {@link TableRecordReader}.
+ *
+ * @param tableRecordReader A different {@link TableRecordReader}
+ * implementation.
+ */
+ protected void setTableRecordReader(TableRecordReader tableRecordReader) {
+ this.tableRecordReader = tableRecordReader;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
new file mode 100644
index 0000000..4cc784f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableOutputFormat.java
@@ -0,0 +1,176 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * <p>
+ * Hadoop output format that writes to one or more HBase tables. The key is
+ * taken to be the table name while the output value <em>must</em> be either a
+ * {@link Put} or a {@link Delete} instance. All tables must already exist, and
+ * all Puts and Deletes must reference only valid column families.
+ * </p>
+ *
+ * <p>
+ * Write-ahead logging (WAL) for Puts can be disabled by setting
+ * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
+ * Note that disabling write-ahead logging is only appropriate for jobs where
+ * loss of data due to region server failure can be tolerated (for example,
+ * because it is easy to rerun a bulk import).
+ * </p>
+ */
+@InterfaceAudience.Public
+public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
+ /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (WAL) */
+ public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
+ /** Property value to use write-ahead logging */
+ public static final boolean WAL_ON = true;
+ /** Property value to disable write-ahead logging */
+ public static final boolean WAL_OFF = false;
+ /**
+ * Record writer for outputting to multiple HTables.
+ */
+ protected static class MultiTableRecordWriter extends
+ RecordWriter<ImmutableBytesWritable, Mutation> {
+ private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
+ Connection connection;
+ Map<ImmutableBytesWritable, BufferedMutator> mutatorMap = new HashMap<>();
+ Configuration conf;
+ boolean useWriteAheadLogging;
+
+ /**
+ * @param conf
+ * HBaseConfiguration to used
+ * @param useWriteAheadLogging
+ * whether to use write ahead logging. This can be turned off (
+ * <tt>false</tt>) to improve performance when bulk loading data.
+ */
+ public MultiTableRecordWriter(Configuration conf,
+ boolean useWriteAheadLogging) throws IOException {
+ LOG.debug("Created new MultiTableRecordReader with WAL "
+ + (useWriteAheadLogging ? "on" : "off"));
+ this.conf = conf;
+ this.useWriteAheadLogging = useWriteAheadLogging;
+ }
+
+ /**
+ * @param tableName
+ * the name of the table, as a string
+ * @return the named mutator
+ * @throws IOException
+ * if there is a problem opening a table
+ */
+ BufferedMutator getBufferedMutator(ImmutableBytesWritable tableName) throws IOException {
+ if(this.connection == null){
+ this.connection = ConnectionFactory.createConnection(conf);
+ }
+ if (!mutatorMap.containsKey(tableName)) {
+ LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
+
+ BufferedMutator mutator =
+ connection.getBufferedMutator(TableName.valueOf(tableName.get()));
+ mutatorMap.put(tableName, mutator);
+ }
+ return mutatorMap.get(tableName);
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException {
+ for (BufferedMutator mutator : mutatorMap.values()) {
+ mutator.close();
+ }
+ if (connection != null) {
+ connection.close();
+ }
+ }
+
+ /**
+ * Writes an action (Put or Delete) to the specified table.
+ *
+ * @param tableName
+ * the table being updated.
+ * @param action
+ * the update, either a put or a delete.
+ * @throws IllegalArgumentException
+ * if the action is not a put or a delete.
+ */
+ @Override
+ public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
+ BufferedMutator mutator = getBufferedMutator(tableName);
+ // The actions are not immutable, so we defensively copy them
+ if (action instanceof Put) {
+ Put put = new Put((Put) action);
+ put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
+ : Durability.SKIP_WAL);
+ mutator.mutate(put);
+ } else if (action instanceof Delete) {
+ Delete delete = new Delete((Delete) action);
+ mutator.mutate(delete);
+ } else
+ throw new IllegalArgumentException(
+ "action must be either Delete or Put");
+ }
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext context) throws IOException,
+ InterruptedException {
+ // we can't know ahead of time if it's going to blow up when the user
+ // passes a table name that doesn't exist, so nothing useful here.
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ return new TableOutputCommitter();
+ }
+
+ @Override
+ public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ Configuration conf = context.getConfiguration();
+ return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
+ conf.getBoolean(WAL_PROPERTY, WAL_ON));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..e7538a8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableSnapshotInputFormat.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * MultiTableSnapshotInputFormat generalizes
+ * {@link TableSnapshotInputFormat}
+ * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
+ * configured for each.
+ * Internally, the input format delegates to
+ * {@link TableSnapshotInputFormat}
+ * and thus has the same performance advantages;
+ * see {@link TableSnapshotInputFormat} for
+ * more details.
+ * Usage is similar to TableSnapshotInputFormat, with the following exception:
+ * initMultiTableSnapshotMapperJob takes in a map
+ * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
+ * scan will be applied;
+ * the overall dataset for the job is defined by the concatenation of the regions and tables
+ * included in each snapshot/scan
+ * pair.
+ * {@link TableMapReduceUtil#initMultiTableSnapshotMapperJob
+ * (java.util.Map, Class, Class, Class, org.apache.hadoop.mapreduce.Job, boolean, org.apache
+ * .hadoop.fs.Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
+ * "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
+ * "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
+ * );
+ * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
+ * TableMapReduceUtil.initTableSnapshotMapperJob(
+ * snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
+ * MyMapOutputValueWritable.class, job, true, restoreDir);
+ * }
+ * </pre>
+ * Internally, this input format restores each snapshot into a subdirectory of the given tmp
+ * directory. Input splits and
+ * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * (one per region).
+ * See {@link TableSnapshotInputFormat} for more notes on
+ * permissioning; the
+ * same caveats apply here.
+ *
+ * @see TableSnapshotInputFormat
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat {
+
+ private final MultiTableSnapshotInputFormatImpl delegate;
+
+ public MultiTableSnapshotInputFormat() {
+ this.delegate = new MultiTableSnapshotInputFormatImpl();
+ }
+
+ @Override
+ public List<InputSplit> getSplits(JobContext jobContext)
+ throws IOException, InterruptedException {
+ List<TableSnapshotInputFormatImpl.InputSplit> splits =
+ delegate.getSplits(jobContext.getConfiguration());
+ List<InputSplit> rtn = Lists.newArrayListWithCapacity(splits.size());
+
+ for (TableSnapshotInputFormatImpl.InputSplit split : splits) {
+ rtn.add(new TableSnapshotInputFormat.TableSnapshotRegionSplit(split));
+ }
+
+ return rtn;
+ }
+
+ public static void setInput(Configuration configuration,
+ Map<String, Collection<Scan>> snapshotScans, Path tmpRestoreDir) throws IOException {
+ new MultiTableSnapshotInputFormatImpl().setInput(configuration, snapshotScans, tmpRestoreDir);
+ }
+}
[39/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
new file mode 100644
index 0000000..9cccf8c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
@@ -0,0 +1,386 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Tool used to copy a table to another one which can be on a different setup.
+ * It is also configurable with a start and time as well as a specification
+ * of the region server implementation if different from the local cluster.
+ */
+@InterfaceAudience.Public
+public class CopyTable extends Configured implements Tool {
+ private static final Log LOG = LogFactory.getLog(CopyTable.class);
+
+ final static String NAME = "copytable";
+ long startTime = 0;
+ long endTime = HConstants.LATEST_TIMESTAMP;
+ int batch = Integer.MAX_VALUE;
+ int cacheRow = -1;
+ int versions = -1;
+ String tableName = null;
+ String startRow = null;
+ String stopRow = null;
+ String dstTableName = null;
+ String peerAddress = null;
+ String families = null;
+ boolean allCells = false;
+ static boolean shuffle = false;
+
+ boolean bulkload = false;
+ Path bulkloadDir = null;
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public Job createSubmittableJob(String[] args)
+ throws IOException {
+ if (!doCommandLine(args)) {
+ return null;
+ }
+
+ Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(CopyTable.class);
+ Scan scan = new Scan();
+
+ scan.setBatch(batch);
+ scan.setCacheBlocks(false);
+
+ if (cacheRow > 0) {
+ scan.setCaching(cacheRow);
+ } else {
+ scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
+ }
+
+ scan.setTimeRange(startTime, endTime);
+
+ if (allCells) {
+ scan.setRaw(true);
+ }
+ if (shuffle) {
+ job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");
+ }
+ if (versions >= 0) {
+ scan.setMaxVersions(versions);
+ }
+
+ if (startRow != null) {
+ scan.setStartRow(Bytes.toBytesBinary(startRow));
+ }
+
+ if (stopRow != null) {
+ scan.setStopRow(Bytes.toBytesBinary(stopRow));
+ }
+
+ if(families != null) {
+ String[] fams = families.split(",");
+ Map<String,String> cfRenameMap = new HashMap<>();
+ for(String fam : fams) {
+ String sourceCf;
+ if(fam.contains(":")) {
+ // fam looks like "sourceCfName:destCfName"
+ String[] srcAndDest = fam.split(":", 2);
+ sourceCf = srcAndDest[0];
+ String destCf = srcAndDest[1];
+ cfRenameMap.put(sourceCf, destCf);
+ } else {
+ // fam is just "sourceCf"
+ sourceCf = fam;
+ }
+ scan.addFamily(Bytes.toBytes(sourceCf));
+ }
+ Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
+ }
+ job.setNumReduceTasks(0);
+
+ if (bulkload) {
+ TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
+ null, job);
+
+ // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
+ TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
+
+ FileSystem fs = FileSystem.get(getConf());
+ Random rand = new Random();
+ Path root = new Path(fs.getWorkingDirectory(), "copytable");
+ fs.mkdirs(root);
+ while (true) {
+ bulkloadDir = new Path(root, "" + rand.nextLong());
+ if (!fs.exists(bulkloadDir)) {
+ break;
+ }
+ }
+
+ System.out.println("HFiles will be stored at " + this.bulkloadDir);
+ HFileOutputFormat2.setOutputPath(job, bulkloadDir);
+ try (Connection conn = ConnectionFactory.createConnection(getConf());
+ Admin admin = conn.getAdmin()) {
+ HFileOutputFormat2.configureIncrementalLoadMap(job,
+ admin.listTableDescriptor((TableName.valueOf(dstTableName))));
+ }
+ } else {
+ TableMapReduceUtil.initTableMapperJob(tableName, scan,
+ Import.Importer.class, null, null, job);
+
+ TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
+ null);
+ }
+
+ return job;
+ }
+
+ /*
+ * @param errorMsg Error message. Can be null.
+ */
+ private static void printUsage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
+ "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
+ System.err.println();
+ System.err.println("Options:");
+ System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
+ System.err.println(" specify if different from current cluster");
+ System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
+ System.err.println(" startrow the start row");
+ System.err.println(" stoprow the stop row");
+ System.err.println(" starttime beginning of the time range (unixtime in millis)");
+ System.err.println(" without endtime means from starttime to forever");
+ System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
+ System.err.println(" versions number of cell versions to copy");
+ System.err.println(" new.name new table's name");
+ System.err.println(" peer.adr Address of the peer cluster given in the format");
+ System.err.println(" hbase.zookeeper.quorum:hbase.zookeeper.client"
+ + ".port:zookeeper.znode.parent");
+ System.err.println(" families comma-separated list of families to copy");
+ System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
+ System.err.println(" To keep the same name, just give \"cfName\"");
+ System.err.println(" all.cells also copy delete markers and deleted cells");
+ System.err.println(" bulkload Write input into HFiles and bulk load to the destination "
+ + "table");
+ System.err.println();
+ System.err.println("Args:");
+ System.err.println(" tablename Name of the table to copy");
+ System.err.println();
+ System.err.println("Examples:");
+ System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
+ System.err.println(" $ hbase " +
+ "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
+ "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
+ System.err.println("For performance consider the following general option:\n"
+ + " It is recommended that you set the following to >=100. A higher value uses more memory but\n"
+ + " decreases the round trip time to the server and may increase performance.\n"
+ + " -Dhbase.client.scanner.caching=100\n"
+ + " The following should always be set to false, to prevent writing data twice, which may produce \n"
+ + " inaccurate results.\n"
+ + " -Dmapreduce.map.speculative=false");
+ }
+
+ private boolean doCommandLine(final String[] args) {
+ // Process command-line args. TODO: Better cmd-line processing
+ // (but hopefully something not as painful as cli options).
+ if (args.length < 1) {
+ printUsage(null);
+ return false;
+ }
+ try {
+ for (int i = 0; i < args.length; i++) {
+ String cmd = args[i];
+ if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ printUsage(null);
+ return false;
+ }
+
+ final String startRowArgKey = "--startrow=";
+ if (cmd.startsWith(startRowArgKey)) {
+ startRow = cmd.substring(startRowArgKey.length());
+ continue;
+ }
+
+ final String stopRowArgKey = "--stoprow=";
+ if (cmd.startsWith(stopRowArgKey)) {
+ stopRow = cmd.substring(stopRowArgKey.length());
+ continue;
+ }
+
+ final String startTimeArgKey = "--starttime=";
+ if (cmd.startsWith(startTimeArgKey)) {
+ startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+ continue;
+ }
+
+ final String endTimeArgKey = "--endtime=";
+ if (cmd.startsWith(endTimeArgKey)) {
+ endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+ continue;
+ }
+
+ final String batchArgKey = "--batch=";
+ if (cmd.startsWith(batchArgKey)) {
+ batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
+ continue;
+ }
+
+ final String cacheRowArgKey = "--cacheRow=";
+ if (cmd.startsWith(cacheRowArgKey)) {
+ cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));
+ continue;
+ }
+
+ final String versionsArgKey = "--versions=";
+ if (cmd.startsWith(versionsArgKey)) {
+ versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+ continue;
+ }
+
+ final String newNameArgKey = "--new.name=";
+ if (cmd.startsWith(newNameArgKey)) {
+ dstTableName = cmd.substring(newNameArgKey.length());
+ continue;
+ }
+
+ final String peerAdrArgKey = "--peer.adr=";
+ if (cmd.startsWith(peerAdrArgKey)) {
+ peerAddress = cmd.substring(peerAdrArgKey.length());
+ continue;
+ }
+
+ final String familiesArgKey = "--families=";
+ if (cmd.startsWith(familiesArgKey)) {
+ families = cmd.substring(familiesArgKey.length());
+ continue;
+ }
+
+ if (cmd.startsWith("--all.cells")) {
+ allCells = true;
+ continue;
+ }
+
+ if (cmd.startsWith("--bulkload")) {
+ bulkload = true;
+ continue;
+ }
+
+ if (cmd.startsWith("--shuffle")) {
+ shuffle = true;
+ continue;
+ }
+
+ if (i == args.length-1) {
+ tableName = cmd;
+ } else {
+ printUsage("Invalid argument '" + cmd + "'");
+ return false;
+ }
+ }
+ if (dstTableName == null && peerAddress == null) {
+ printUsage("At least a new table name or a " +
+ "peer address must be specified");
+ return false;
+ }
+ if ((endTime != 0) && (startTime > endTime)) {
+ printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
+ return false;
+ }
+
+ if (bulkload && peerAddress != null) {
+ printUsage("Remote bulkload is not supported!");
+ return false;
+ }
+
+ // set dstTableName if necessary
+ if (dstTableName == null) {
+ dstTableName = tableName;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ printUsage("Can't start because " + e.getMessage());
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Main entry point.
+ *
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int ret = ToolRunner.run(HBaseConfiguration.create(), new CopyTable(), args);
+ System.exit(ret);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Job job = createSubmittableJob(args);
+ if (job == null) return 1;
+ if (!job.waitForCompletion(true)) {
+ LOG.info("Map-reduce job failed!");
+ if (bulkload) {
+ LOG.info("Files are not bulkloaded!");
+ }
+ return 1;
+ }
+ int code = 0;
+ if (bulkload) {
+ code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
+ this.dstTableName});
+ if (code == 0) {
+ // bulkloadDir is deleted only LoadIncrementalHFiles was successful so that one can rerun
+ // LoadIncrementalHFiles.
+ FileSystem fs = FileSystem.get(this.getConf());
+ if (!fs.delete(this.bulkloadDir, true)) {
+ LOG.error("Deleting folder " + bulkloadDir + " failed!");
+ code = 1;
+ }
+ }
+ }
+ return code;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
new file mode 100644
index 0000000..004ee5c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/DefaultVisibilityExpressionResolver.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_FAMILY;
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABELS_TABLE_NAME;
+import static org.apache.hadoop.hbase.security.visibility.VisibilityConstants.LABEL_QUALIFIER;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.security.visibility.Authorizations;
+import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
+import org.apache.hadoop.hbase.security.visibility.VisibilityLabelOrdinalProvider;
+import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * This implementation creates tags by expanding expression using label ordinal. Labels will be
+ * serialized in sorted order of it's ordinal.
+ */
+@InterfaceAudience.Private
+public class DefaultVisibilityExpressionResolver implements VisibilityExpressionResolver {
+ private static final Log LOG = LogFactory.getLog(DefaultVisibilityExpressionResolver.class);
+
+ private Configuration conf;
+ private final Map<String, Integer> labels = new HashMap<>();
+
+ @Override
+ public Configuration getConf() {
+ return this.conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
+
+ @Override
+ public void init() {
+ // Reading all the labels and ordinal.
+ // This scan should be done by user with global_admin privileges.. Ensure that it works
+ Table labelsTable = null;
+ Connection connection = null;
+ try {
+ connection = ConnectionFactory.createConnection(conf);
+ try {
+ labelsTable = connection.getTable(LABELS_TABLE_NAME);
+ } catch (IOException e) {
+ LOG.error("Error opening 'labels' table", e);
+ return;
+ }
+ Scan scan = new Scan();
+ scan.setAuthorizations(new Authorizations(VisibilityUtils.SYSTEM_LABEL));
+ scan.addColumn(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
+ ResultScanner scanner = null;
+ try {
+ scanner = labelsTable.getScanner(scan);
+ Result next = null;
+ while ((next = scanner.next()) != null) {
+ byte[] row = next.getRow();
+ byte[] value = next.getValue(LABELS_TABLE_FAMILY, LABEL_QUALIFIER);
+ labels.put(Bytes.toString(value), Bytes.toInt(row));
+ }
+ } catch (TableNotFoundException e) {
+ // Table not found. So just return
+ return;
+ } catch (IOException e) {
+ LOG.error("Error scanning 'labels' table", e);
+ } finally {
+ if (scanner != null) scanner.close();
+ }
+ } catch (IOException ioe) {
+ LOG.error("Failed reading 'labels' tags", ioe);
+ return;
+ } finally {
+ if (labelsTable != null) {
+ try {
+ labelsTable.close();
+ } catch (IOException ioe) {
+ LOG.warn("Error closing 'labels' table", ioe);
+ }
+ }
+ if (connection != null)
+ try {
+ connection.close();
+ } catch (IOException ioe) {
+ LOG.warn("Failed close of temporary connection", ioe);
+ }
+ }
+ }
+
+ @Override
+ public List<Tag> createVisibilityExpTags(String visExpression) throws IOException {
+ VisibilityLabelOrdinalProvider provider = new VisibilityLabelOrdinalProvider() {
+ @Override
+ public int getLabelOrdinal(String label) {
+ Integer ordinal = null;
+ ordinal = labels.get(label);
+ if (ordinal != null) {
+ return ordinal.intValue();
+ }
+ return VisibilityConstants.NON_EXIST_LABEL_ORDINAL;
+ }
+
+ @Override
+ public String getLabel(int ordinal) {
+ // Unused
+ throw new UnsupportedOperationException(
+ "getLabel should not be used in VisibilityExpressionResolver");
+ }
+ };
+ return VisibilityUtils.createVisibilityExpTags(visExpression, true, false, null, provider);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
new file mode 100644
index 0000000..9737b55
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
@@ -0,0 +1,64 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
+import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
+import org.apache.hadoop.util.ProgramDriver;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing
+ * name of job to this main.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+@InterfaceStability.Stable
+public class Driver {
+ /**
+ * @param args
+ * @throws Throwable
+ */
+ public static void main(String[] args) throws Throwable {
+ ProgramDriver pgd = new ProgramDriver();
+
+ pgd.addClass(RowCounter.NAME, RowCounter.class,
+ "Count rows in HBase table.");
+ pgd.addClass(CellCounter.NAME, CellCounter.class,
+ "Count cells in HBase table.");
+ pgd.addClass(Export.NAME, Export.class, "Write table data to HDFS.");
+ pgd.addClass(Import.NAME, Import.class, "Import data written by Export.");
+ pgd.addClass(ImportTsv.NAME, ImportTsv.class, "Import data in TSV format.");
+ pgd.addClass(LoadIncrementalHFiles.NAME, LoadIncrementalHFiles.class,
+ "Complete a bulk data load.");
+ pgd.addClass(CopyTable.NAME, CopyTable.class,
+ "Export a table from local cluster to peer cluster.");
+ pgd.addClass(VerifyReplication.NAME, VerifyReplication.class, "Compare" +
+ " the data from tables in two different clusters. WARNING: It" +
+ " doesn't work for incrementColumnValues'd cells since the" +
+ " timestamp is changed after being appended to the log.");
+ pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
+ pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
+ " the specific snapshot to a given FileSystem.");
+
+ ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
+ invoke(pgd, new Object[]{args});
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
new file mode 100644
index 0000000..de6cf3a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
@@ -0,0 +1,197 @@
+/**
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Export an HBase table.
+ * Writes content to sequence files up in HDFS. Use {@link Import} to read it
+ * back in again.
+ */
+@InterfaceAudience.Public
+public class Export extends Configured implements Tool {
+ private static final Log LOG = LogFactory.getLog(Export.class);
+ final static String NAME = "export";
+ final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
+ final static String EXPORT_BATCHING = "hbase.export.scanner.batch";
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ String tableName = args[0];
+ Path outputDir = new Path(args[1]);
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJobName(NAME + "_" + tableName);
+ job.setJarByClass(Export.class);
+ // Set optional scan parameters
+ Scan s = getConfiguredScanForJob(conf, args);
+ IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
+ // No reducers. Just write straight to output files.
+ job.setNumReduceTasks(0);
+ job.setOutputFormatClass(SequenceFileOutputFormat.class);
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(Result.class);
+ FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
+ return job;
+ }
+
+ private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
+ Scan s = new Scan();
+ // Optional arguments.
+ // Set Scan Versions
+ int versions = args.length > 2? Integer.parseInt(args[2]): 1;
+ s.setMaxVersions(versions);
+ // Set Scan Range
+ long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
+ long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
+ s.setTimeRange(startTime, endTime);
+ // Set cache blocks
+ s.setCacheBlocks(false);
+ // set Start and Stop row
+ if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
+ s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
+ }
+ if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
+ s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
+ }
+ // Set Scan Column Family
+ boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
+ if (raw) {
+ s.setRaw(raw);
+ }
+ for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
+ s.addFamily(Bytes.toBytes(columnFamily));
+ }
+ // Set RowFilter or Prefix Filter if applicable.
+ Filter exportFilter = getExportFilter(args);
+ if (exportFilter!= null) {
+ LOG.info("Setting Scan Filter for Export.");
+ s.setFilter(exportFilter);
+ }
+
+ int batching = conf.getInt(EXPORT_BATCHING, -1);
+ if (batching != -1){
+ try {
+ s.setBatch(batching);
+ } catch (IncompatibleFilterException e) {
+ LOG.error("Batching could not be set", e);
+ }
+ }
+ LOG.info("versions=" + versions + ", starttime=" + startTime +
+ ", endtime=" + endTime + ", keepDeletedCells=" + raw);
+ return s;
+ }
+
+ private static Filter getExportFilter(String[] args) {
+ Filter exportFilter = null;
+ String filterCriteria = (args.length > 5) ? args[5]: null;
+ if (filterCriteria == null) return null;
+ if (filterCriteria.startsWith("^")) {
+ String regexPattern = filterCriteria.substring(1, filterCriteria.length());
+ exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
+ } else {
+ exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
+ }
+ return exportFilter;
+ }
+
+ /*
+ * @param errorMsg Error message. Can be null.
+ */
+ private static void usage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+ "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
+ System.err.println(" Note: -D properties will be applied to the conf used. ");
+ System.err.println(" For example: ");
+ System.err.println(" -D mapreduce.output.fileoutputformat.compress=true");
+ System.err.println(" -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");
+ System.err.println(" -D mapreduce.output.fileoutputformat.compress.type=BLOCK");
+ System.err.println(" Additionally, the following SCAN properties can be specified");
+ System.err.println(" to control/limit what is exported..");
+ System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
+ System.err.println(" -D " + RAW_SCAN + "=true");
+ System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");
+ System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");
+ System.err.println(" -D " + JOB_NAME_CONF_KEY
+ + "=jobName - use the specified mapreduce job name for the export");
+ System.err.println("For performance consider the following properties:\n"
+ + " -Dhbase.client.scanner.caching=100\n"
+ + " -Dmapreduce.map.speculative=false\n"
+ + " -Dmapreduce.reduce.speculative=false");
+ System.err.println("For tables with very wide rows consider setting the batch size as below:\n"
+ + " -D" + EXPORT_BATCHING + "=10");
+ }
+
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ usage("Wrong number of arguments: " + args.length);
+ return -1;
+ }
+ Job job = createSubmittableJob(getConf(), args);
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ /**
+ * Main entry point.
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);
+ System.exit(errCode);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
new file mode 100644
index 0000000..dc30c6e
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
@@ -0,0 +1,177 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Extract grouping columns from input record.
+ */
+@InterfaceAudience.Public
+public class GroupingTableMapper
+extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
+
+ /**
+ * JobConf parameter to specify the columns used to produce the key passed to
+ * collect from the map phase.
+ */
+ public static final String GROUP_COLUMNS =
+ "hbase.mapred.groupingtablemap.columns";
+
+ /** The grouping columns. */
+ protected byte [][] columns;
+ /** The current configuration. */
+ private Configuration conf = null;
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table to be processed.
+ * @param scan The scan with the columns etc.
+ * @param groupColumns A space separated list of columns used to form the
+ * key used in collect.
+ * @param mapper The mapper class.
+ * @param job The current job.
+ * @throws IOException When setting up the job fails.
+ */
+ @SuppressWarnings("unchecked")
+ public static void initJob(String table, Scan scan, String groupColumns,
+ Class<? extends TableMapper> mapper, Job job) throws IOException {
+ TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
+ ImmutableBytesWritable.class, Result.class, job);
+ job.getConfiguration().set(GROUP_COLUMNS, groupColumns);
+ }
+
+ /**
+ * Extract the grouping columns from value to construct a new key. Pass the
+ * new key and value to reduce. If any of the grouping columns are not found
+ * in the value, the record is skipped.
+ *
+ * @param key The current key.
+ * @param value The current value.
+ * @param context The current context.
+ * @throws IOException When writing the record fails.
+ * @throws InterruptedException When the job is aborted.
+ */
+ @Override
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ byte[][] keyVals = extractKeyValues(value);
+ if(keyVals != null) {
+ ImmutableBytesWritable tKey = createGroupKey(keyVals);
+ context.write(tKey, value);
+ }
+ }
+
+ /**
+ * Extract columns values from the current record. This method returns
+ * null if any of the columns are not found.
+ * <p>
+ * Override this method if you want to deal with nulls differently.
+ *
+ * @param r The current values.
+ * @return Array of byte values.
+ */
+ protected byte[][] extractKeyValues(Result r) {
+ byte[][] keyVals = null;
+ ArrayList<byte[]> foundList = new ArrayList<>();
+ int numCols = columns.length;
+ if (numCols > 0) {
+ for (Cell value: r.listCells()) {
+ byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
+ CellUtil.cloneQualifier(value));
+ for (int i = 0; i < numCols; i++) {
+ if (Bytes.equals(column, columns[i])) {
+ foundList.add(CellUtil.cloneValue(value));
+ break;
+ }
+ }
+ }
+ if(foundList.size() == numCols) {
+ keyVals = foundList.toArray(new byte[numCols][]);
+ }
+ }
+ return keyVals;
+ }
+
+ /**
+ * Create a key by concatenating multiple column values.
+ * <p>
+ * Override this function in order to produce different types of keys.
+ *
+ * @param vals The current key/values.
+ * @return A key generated by concatenating multiple column values.
+ */
+ protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
+ if(vals == null) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < vals.length; i++) {
+ if(i > 0) {
+ sb.append(" ");
+ }
+ sb.append(Bytes.toString(vals[i]));
+ }
+ return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
+ }
+
+ /**
+ * Returns the current configuration.
+ *
+ * @return The current configuration.
+ * @see org.apache.hadoop.conf.Configurable#getConf()
+ */
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Sets the configuration. This is used to set up the grouping details.
+ *
+ * @param configuration The configuration to set.
+ * @see org.apache.hadoop.conf.Configurable#setConf(
+ * org.apache.hadoop.conf.Configuration)
+ */
+ @Override
+ public void setConf(Configuration configuration) {
+ this.conf = configuration;
+ String[] cols = conf.get(GROUP_COLUMNS, "").split(" ");
+ columns = new byte[cols.length][];
+ for(int i = 0; i < cols.length; i++) {
+ columns[i] = Bytes.toBytes(cols[i]);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
new file mode 100644
index 0000000..e90d5c1
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileInputFormat.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple MR input format for HFiles.
+ * This code was borrowed from Apache Crunch project.
+ * Updated to the recent version of HBase.
+ */
+public class HFileInputFormat extends FileInputFormat<NullWritable, Cell> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HFileInputFormat.class);
+
+ /**
+ * File filter that removes all "hidden" files. This might be something worth removing from
+ * a more general purpose utility; it accounts for the presence of metadata files created
+ * in the way we're doing exports.
+ */
+ static final PathFilter HIDDEN_FILE_FILTER = new PathFilter() {
+ @Override
+ public boolean accept(Path p) {
+ String name = p.getName();
+ return !name.startsWith("_") && !name.startsWith(".");
+ }
+ };
+
+ /**
+ * Record reader for HFiles.
+ */
+ private static class HFileRecordReader extends RecordReader<NullWritable, Cell> {
+
+ private Reader in;
+ protected Configuration conf;
+ private HFileScanner scanner;
+
+ /**
+ * A private cache of the key value so it doesn't need to be loaded twice from the scanner.
+ */
+ private Cell value = null;
+ private long count;
+ private boolean seeked = false;
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ FileSplit fileSplit = (FileSplit) split;
+ conf = context.getConfiguration();
+ Path path = fileSplit.getPath();
+ FileSystem fs = path.getFileSystem(conf);
+ LOG.info("Initialize HFileRecordReader for {}", path);
+ this.in = HFile.createReader(fs, path, conf);
+
+ // The file info must be loaded before the scanner can be used.
+ // This seems like a bug in HBase, but it's easily worked around.
+ this.in.loadFileInfo();
+ this.scanner = in.getScanner(false, false);
+
+ }
+
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ boolean hasNext;
+ if (!seeked) {
+ LOG.info("Seeking to start");
+ hasNext = scanner.seekTo();
+ seeked = true;
+ } else {
+ hasNext = scanner.next();
+ }
+ if (!hasNext) {
+ return false;
+ }
+ value = scanner.getCell();
+ count++;
+ return true;
+ }
+
+ @Override
+ public NullWritable getCurrentKey() throws IOException, InterruptedException {
+ return NullWritable.get();
+ }
+
+ @Override
+ public Cell getCurrentValue() throws IOException, InterruptedException {
+ return value;
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ // This would be inaccurate if KVs are not uniformly-sized or we have performed a seek to
+ // the start row, but better than nothing anyway.
+ return 1.0f * count / in.getEntries();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (in != null) {
+ in.close();
+ in = null;
+ }
+ }
+ }
+
+ @Override
+ protected List<FileStatus> listStatus(JobContext job) throws IOException {
+ List<FileStatus> result = new ArrayList<FileStatus>();
+
+ // Explode out directories that match the original FileInputFormat filters
+ // since HFiles are written to directories where the
+ // directory name is the column name
+ for (FileStatus status : super.listStatus(job)) {
+ if (status.isDirectory()) {
+ FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
+ for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
+ result.add(match);
+ }
+ } else {
+ result.add(status);
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public RecordReader<NullWritable, Cell> createRecordReader(InputSplit split, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ return new HFileRecordReader();
+ }
+
+ @Override
+ protected boolean isSplitable(JobContext context, Path filename) {
+ // This file isn't splittable.
+ return false;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
new file mode 100644
index 0000000..7fea254
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
@@ -0,0 +1,902 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.InetSocketAddress;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.UUID;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Writes HFiles. Passed Cells must arrive in order.
+ * Writes current time as the sequence id for the file. Sets the major compacted
+ * attribute on created @{link {@link HFile}s. Calling write(null,null) will forcibly roll
+ * all HFiles being written.
+ * <p>
+ * Using this class as part of a MapReduce job is best done
+ * using {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}.
+ */
+@InterfaceAudience.Public
+public class HFileOutputFormat2
+ extends FileOutputFormat<ImmutableBytesWritable, Cell> {
+ private static final Log LOG = LogFactory.getLog(HFileOutputFormat2.class);
+ static class TableInfo {
+ private TableDescriptor tableDesctiptor;
+ private RegionLocator regionLocator;
+
+ public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) {
+ this.tableDesctiptor = tableDesctiptor;
+ this.regionLocator = regionLocator;
+ }
+
+ /**
+ * The modification for the returned HTD doesn't affect the inner TD.
+ * @return A clone of inner table descriptor
+ * @deprecated use {@link #getTableDescriptor}
+ */
+ @Deprecated
+ public HTableDescriptor getHTableDescriptor() {
+ return new HTableDescriptor(tableDesctiptor);
+ }
+
+ public TableDescriptor getTableDescriptor() {
+ return tableDesctiptor;
+ }
+
+ public RegionLocator getRegionLocator() {
+ return regionLocator;
+ }
+ }
+
+ protected static final byte[] tableSeparator = ";".getBytes(StandardCharsets.UTF_8);
+
+ protected static byte[] combineTableNameSuffix(byte[] tableName,
+ byte[] suffix ) {
+ return Bytes.add(tableName, tableSeparator, suffix);
+ }
+
+ // The following constants are private since these are used by
+ // HFileOutputFormat2 to internally transfer data between job setup and
+ // reducer run using conf.
+ // These should not be changed by the client.
+ static final String COMPRESSION_FAMILIES_CONF_KEY =
+ "hbase.hfileoutputformat.families.compression";
+ static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
+ "hbase.hfileoutputformat.families.bloomtype";
+ static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
+ "hbase.mapreduce.hfileoutputformat.blocksize";
+ static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
+ "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
+
+ // This constant is public since the client can modify this when setting
+ // up their conf object and thus refer to this symbol.
+ // It is present for backwards compatibility reasons. Use it only to
+ // override the auto-detection of datablock encoding.
+ public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
+ "hbase.mapreduce.hfileoutputformat.datablock.encoding";
+
+ /**
+ * Keep locality while generating HFiles for bulkload. See HBASE-12596
+ */
+ public static final String LOCALITY_SENSITIVE_CONF_KEY =
+ "hbase.bulkload.locality.sensitive.enabled";
+ private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
+ static final String OUTPUT_TABLE_NAME_CONF_KEY =
+ "hbase.mapreduce.hfileoutputformat.table.name";
+ static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY =
+ "hbase.mapreduce.use.multi.table.hfileoutputformat";
+
+ public static final String STORAGE_POLICY_PROPERTY = "hbase.hstore.storagepolicy";
+ public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
+
+ @Override
+ public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
+ final TaskAttemptContext context) throws IOException, InterruptedException {
+ return createRecordWriter(context);
+ }
+
+ protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) {
+ return combineTableNameSuffix(tableName, family);
+ }
+
+ static <V extends Cell> RecordWriter<ImmutableBytesWritable, V>
+ createRecordWriter(final TaskAttemptContext context)
+ throws IOException {
+
+ // Get the path of the temporary output file
+ final Path outputPath = FileOutputFormat.getOutputPath(context);
+ final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
+ final Configuration conf = context.getConfiguration();
+ final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
+ final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
+ if (writeTableNames==null || writeTableNames.isEmpty()) {
+ throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY
+ + " cannot be empty");
+ }
+ final FileSystem fs = outputDir.getFileSystem(conf);
+ // These configs. are from hbase-*.xml
+ final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
+ HConstants.DEFAULT_MAX_FILE_SIZE);
+ // Invented config. Add to hbase-*.xml if other than default compression.
+ final String defaultCompressionStr = conf.get("hfile.compression",
+ Compression.Algorithm.NONE.getName());
+ final Algorithm defaultCompression = HFileWriterImpl
+ .compressionByName(defaultCompressionStr);
+ final boolean compactionExclude = conf.getBoolean(
+ "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
+
+ final Set<String> allTableNames = Arrays.stream(writeTableNames.split(
+ Bytes.toString(tableSeparator))).collect(Collectors.toSet());
+
+ // create a map from column family to the compression algorithm
+ final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
+ final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
+ final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
+
+ String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
+ final Map<byte[], DataBlockEncoding> datablockEncodingMap
+ = createFamilyDataBlockEncodingMap(conf);
+ final DataBlockEncoding overriddenEncoding;
+ if (dataBlockEncodingStr != null) {
+ overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
+ } else {
+ overriddenEncoding = null;
+ }
+
+ return new RecordWriter<ImmutableBytesWritable, V>() {
+ // Map of families to writers and how much has been output on the writer.
+ private final Map<byte[], WriterLength> writers =
+ new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
+ private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTime());
+ private boolean rollRequested = false;
+
+ @Override
+ public void write(ImmutableBytesWritable row, V cell)
+ throws IOException {
+ KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+
+ // null input == user explicitly wants to flush
+ if (row == null && kv == null) {
+ rollWriters();
+ return;
+ }
+
+ byte[] rowKey = CellUtil.cloneRow(kv);
+ long length = kv.getLength();
+ byte[] family = CellUtil.cloneFamily(kv);
+ byte[] tableNameBytes = null;
+ if (writeMultipleTables) {
+ tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get());
+ if (!allTableNames.contains(Bytes.toString(tableNameBytes))) {
+ throw new IllegalArgumentException("TableName '" + Bytes.toString(tableNameBytes) +
+ "' not" + " expected");
+ }
+ } else {
+ tableNameBytes = writeTableNames.getBytes(StandardCharsets.UTF_8);
+ }
+ byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
+ WriterLength wl = this.writers.get(tableAndFamily);
+
+ // If this is a new column family, verify that the directory exists
+ if (wl == null) {
+ Path writerPath = null;
+ if (writeMultipleTables) {
+ writerPath = new Path(outputDir, new Path(Bytes.toString(tableNameBytes), Bytes
+ .toString(family)));
+ }
+ else {
+ writerPath = new Path(outputDir, Bytes.toString(family));
+ }
+ fs.mkdirs(writerPath);
+ configureStoragePolicy(conf, fs, tableAndFamily, writerPath);
+ }
+
+ // If any of the HFiles for the column families has reached
+ // maxsize, we need to roll all the writers
+ if (wl != null && wl.written + length >= maxsize) {
+ this.rollRequested = true;
+ }
+
+ // This can only happen once a row is finished though
+ if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
+ rollWriters();
+ }
+
+ // create a new WAL writer, if necessary
+ if (wl == null || wl.writer == null) {
+ if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
+ HRegionLocation loc = null;
+
+ String tableName = Bytes.toString(tableNameBytes);
+ if (tableName != null) {
+ try (Connection connection = ConnectionFactory.createConnection(conf);
+ RegionLocator locator =
+ connection.getRegionLocator(TableName.valueOf(tableName))) {
+ loc = locator.getRegionLocation(rowKey);
+ } catch (Throwable e) {
+ LOG.warn("There's something wrong when locating rowkey: " +
+ Bytes.toString(rowKey) + " for tablename: " + tableName, e);
+ loc = null;
+ } }
+
+ if (null == loc) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("failed to get region location, so use default writer for rowkey: " +
+ Bytes.toString(rowKey));
+ }
+ wl = getNewWriter(tableNameBytes, family, conf, null);
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("first rowkey: [" + Bytes.toString(rowKey) + "]");
+ }
+ InetSocketAddress initialIsa =
+ new InetSocketAddress(loc.getHostname(), loc.getPort());
+ if (initialIsa.isUnresolved()) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("failed to resolve bind address: " + loc.getHostname() + ":"
+ + loc.getPort() + ", so use default writer");
+ }
+ wl = getNewWriter(tableNameBytes, family, conf, null);
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("use favored nodes writer: " + initialIsa.getHostString());
+ }
+ wl = getNewWriter(tableNameBytes, family, conf, new InetSocketAddress[] { initialIsa
+ });
+ }
+ }
+ } else {
+ wl = getNewWriter(tableNameBytes, family, conf, null);
+ }
+ }
+
+ // we now have the proper WAL writer. full steam ahead
+ kv.updateLatestStamp(this.now);
+ wl.writer.append(kv);
+ wl.written += length;
+
+ // Copy the row so we know when a row transition.
+ this.previousRow = rowKey;
+ }
+
+ private void rollWriters() throws IOException {
+ for (WriterLength wl : this.writers.values()) {
+ if (wl.writer != null) {
+ LOG.info(
+ "Writer=" + wl.writer.getPath() + ((wl.written == 0)? "": ", wrote=" + wl.written));
+ close(wl.writer);
+ }
+ wl.writer = null;
+ wl.written = 0;
+ }
+ this.rollRequested = false;
+ }
+
+ /*
+ * Create a new StoreFile.Writer.
+ * @param family
+ * @return A WriterLength, containing a new StoreFile.Writer.
+ * @throws IOException
+ */
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
+ justification="Not important")
+ private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration
+ conf, InetSocketAddress[] favoredNodes) throws IOException {
+ byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
+ Path familydir = new Path(outputDir, Bytes.toString(family));
+ if (writeMultipleTables) {
+ familydir = new Path(outputDir,
+ new Path(Bytes.toString(tableName), Bytes.toString(family)));
+ }
+ WriterLength wl = new WriterLength();
+ Algorithm compression = compressionMap.get(tableAndFamily);
+ compression = compression == null ? defaultCompression : compression;
+ BloomType bloomType = bloomTypeMap.get(tableAndFamily);
+ bloomType = bloomType == null ? BloomType.NONE : bloomType;
+ Integer blockSize = blockSizeMap.get(tableAndFamily);
+ blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
+ DataBlockEncoding encoding = overriddenEncoding;
+ encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding;
+ encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
+ Configuration tempConf = new Configuration(conf);
+ tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
+ HFileContextBuilder contextBuilder = new HFileContextBuilder()
+ .withCompression(compression)
+ .withChecksumType(HStore.getChecksumType(conf))
+ .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
+ .withBlockSize(blockSize);
+
+ if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
+ contextBuilder.withIncludesTags(true);
+ }
+
+ contextBuilder.withDataBlockEncoding(encoding);
+ HFileContext hFileContext = contextBuilder.build();
+ if (null == favoredNodes) {
+ wl.writer =
+ new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), fs)
+ .withOutputDir(familydir).withBloomType(bloomType)
+ .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext).build();
+ } else {
+ wl.writer =
+ new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
+ .withOutputDir(familydir).withBloomType(bloomType)
+ .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext)
+ .withFavoredNodes(favoredNodes).build();
+ }
+
+ this.writers.put(tableAndFamily, wl);
+ return wl;
+ }
+
+ private void close(final StoreFileWriter w) throws IOException {
+ if (w != null) {
+ w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
+ Bytes.toBytes(System.currentTimeMillis()));
+ w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
+ Bytes.toBytes(context.getTaskAttemptID().toString()));
+ w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
+ Bytes.toBytes(true));
+ w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
+ Bytes.toBytes(compactionExclude));
+ w.appendTrackedTimestampsToMetadata();
+ w.close();
+ }
+ }
+
+ @Override
+ public void close(TaskAttemptContext c)
+ throws IOException, InterruptedException {
+ for (WriterLength wl: this.writers.values()) {
+ close(wl.writer);
+ }
+ }
+ };
+ }
+
+ /**
+ * Configure block storage policy for CF after the directory is created.
+ */
+ static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
+ byte[] tableAndFamily, Path cfPath) {
+ if (null == conf || null == fs || null == tableAndFamily || null == cfPath) {
+ return;
+ }
+
+ String policy =
+ conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily),
+ conf.get(STORAGE_POLICY_PROPERTY));
+ FSUtils.setStoragePolicy(fs, cfPath, policy);
+ }
+
+ /*
+ * Data structure to hold a Writer and amount of data written on it.
+ */
+ static class WriterLength {
+ long written = 0;
+ StoreFileWriter writer = null;
+ }
+
+ /**
+ * Return the start keys of all of the regions in this table,
+ * as a list of ImmutableBytesWritable.
+ */
+ private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators,
+ boolean writeMultipleTables)
+ throws IOException {
+
+ ArrayList<ImmutableBytesWritable> ret = new ArrayList<>();
+ for(RegionLocator regionLocator : regionLocators)
+ {
+ TableName tableName = regionLocator.getName();
+ LOG.info("Looking up current regions for table " + tableName);
+ byte[][] byteKeys = regionLocator.getStartKeys();
+ for (byte[] byteKey : byteKeys) {
+ byte[] fullKey = byteKey; //HFileOutputFormat2 use case
+ if (writeMultipleTables)
+ {
+ //MultiTableHFileOutputFormat use case
+ fullKey = combineTableNameSuffix(tableName.getName(), byteKey);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("SplitPoint startkey for table [" + tableName + "]: [" + Bytes.toStringBinary
+ (fullKey) + "]");
+ }
+ ret.add(new ImmutableBytesWritable(fullKey));
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Write out a {@link SequenceFile} that can be read by
+ * {@link TotalOrderPartitioner} that contains the split points in startKeys.
+ */
+ @SuppressWarnings("deprecation")
+ private static void writePartitions(Configuration conf, Path partitionsPath,
+ List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException {
+ LOG.info("Writing partition information to " + partitionsPath);
+ if (startKeys.isEmpty()) {
+ throw new IllegalArgumentException("No regions passed");
+ }
+
+ // We're generating a list of split points, and we don't ever
+ // have keys < the first region (which has an empty start key)
+ // so we need to remove it. Otherwise we would end up with an
+ // empty reducer with index 0
+ TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
+ ImmutableBytesWritable first = sorted.first();
+ if (writeMultipleTables) {
+ first = new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first
+ ().get()));
+ }
+ if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
+ throw new IllegalArgumentException(
+ "First region of table should have empty start key. Instead has: "
+ + Bytes.toStringBinary(first.get()));
+ }
+ sorted.remove(sorted.first());
+
+ // Write the actual file
+ FileSystem fs = partitionsPath.getFileSystem(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(
+ fs, conf, partitionsPath, ImmutableBytesWritable.class,
+ NullWritable.class);
+
+ try {
+ for (ImmutableBytesWritable startKey : sorted) {
+ writer.append(startKey, NullWritable.get());
+ }
+ } finally {
+ writer.close();
+ }
+ }
+
+ /**
+ * Configure a MapReduce Job to perform an incremental load into the given
+ * table. This
+ * <ul>
+ * <li>Inspects the table to configure a total order partitioner</li>
+ * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
+ * <li>Sets the number of reduce tasks to match the current number of regions</li>
+ * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
+ * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
+ * PutSortReducer)</li>
+ * </ul>
+ * The user should be sure to set the map output value class to either KeyValue or Put before
+ * running this function.
+ */
+ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
+ throws IOException {
+ configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+ }
+
+ /**
+ * Configure a MapReduce Job to perform an incremental load into the given
+ * table. This
+ * <ul>
+ * <li>Inspects the table to configure a total order partitioner</li>
+ * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
+ * <li>Sets the number of reduce tasks to match the current number of regions</li>
+ * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
+ * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
+ * PutSortReducer)</li>
+ * </ul>
+ * The user should be sure to set the map output value class to either KeyValue or Put before
+ * running this function.
+ */
+ public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor,
+ RegionLocator regionLocator) throws IOException {
+ ArrayList<TableInfo> singleTableInfo = new ArrayList<>();
+ singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator));
+ configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class);
+ }
+
+ static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, Class<? extends OutputFormat<?, ?>> cls) throws IOException {
+ Configuration conf = job.getConfiguration();
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(KeyValue.class);
+ job.setOutputFormatClass(cls);
+
+ if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) {
+ throw new IllegalArgumentException("Duplicate entries found in TableInfo argument");
+ }
+ boolean writeMultipleTables = false;
+ if (MultiTableHFileOutputFormat.class.equals(cls)) {
+ writeMultipleTables = true;
+ conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true);
+ }
+ // Based on the configured map output class, set the correct reducer to properly
+ // sort the incoming values.
+ // TODO it would be nice to pick one or the other of these formats.
+ if (KeyValue.class.equals(job.getMapOutputValueClass())) {
+ job.setReducerClass(KeyValueSortReducer.class);
+ } else if (Put.class.equals(job.getMapOutputValueClass())) {
+ job.setReducerClass(PutSortReducer.class);
+ } else if (Text.class.equals(job.getMapOutputValueClass())) {
+ job.setReducerClass(TextSortReducer.class);
+ } else {
+ LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
+ }
+
+ conf.setStrings("io.serializations", conf.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+ KeyValueSerialization.class.getName());
+
+ if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
+ LOG.info("bulkload locality sensitive enabled");
+ }
+
+ /* Now get the region start keys for every table required */
+ List<String> allTableNames = new ArrayList<>(multiTableInfo.size());
+ List<RegionLocator> regionLocators = new ArrayList<>( multiTableInfo.size());
+ List<TableDescriptor> tableDescriptors = new ArrayList<>( multiTableInfo.size());
+
+ for( TableInfo tableInfo : multiTableInfo )
+ {
+ regionLocators.add(tableInfo.getRegionLocator());
+ allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString());
+ tableDescriptors.add(tableInfo.getTableDescriptor());
+ }
+ // Record tablenames for creating writer by favored nodes, and decoding compression, block size and other attributes of columnfamily per table
+ conf.set(OUTPUT_TABLE_NAME_CONF_KEY, StringUtils.join(allTableNames, Bytes
+ .toString(tableSeparator)));
+ List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocators, writeMultipleTables);
+ // Use table's region boundaries for TOP split points.
+ LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
+ "to match current region count for all tables");
+ job.setNumReduceTasks(startKeys.size());
+
+ configurePartitioner(job, startKeys, writeMultipleTables);
+ // Set compression algorithms based on column families
+
+ conf.set(COMPRESSION_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(compressionDetails,
+ tableDescriptors));
+ conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(blockSizeDetails,
+ tableDescriptors));
+ conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomTypeDetails,
+ tableDescriptors));
+ conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+ serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors));
+
+ TableMapReduceUtil.addDependencyJars(job);
+ TableMapReduceUtil.initCredentials(job);
+ LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
+ }
+
+ public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
+ IOException {
+ Configuration conf = job.getConfiguration();
+
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(KeyValue.class);
+ job.setOutputFormatClass(HFileOutputFormat2.class);
+
+ ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
+ singleTableDescriptor.add(tableDescriptor);
+
+ conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
+ // Set compression algorithms based on column families
+ conf.set(COMPRESSION_FAMILIES_CONF_KEY,
+ serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
+ conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
+ serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
+ conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
+ serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
+ conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+ serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
+
+ TableMapReduceUtil.addDependencyJars(job);
+ TableMapReduceUtil.initCredentials(job);
+ LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
+ }
+
+ /**
+ * Runs inside the task to deserialize column family to compression algorithm
+ * map from the configuration.
+ *
+ * @param conf to read the serialized values from
+ * @return a map from column family to the configured compression algorithm
+ */
+ @VisibleForTesting
+ static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
+ conf) {
+ Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+ COMPRESSION_FAMILIES_CONF_KEY);
+ Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+ Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
+ compressionMap.put(e.getKey(), algorithm);
+ }
+ return compressionMap;
+ }
+
+ /**
+ * Runs inside the task to deserialize column family to bloom filter type
+ * map from the configuration.
+ *
+ * @param conf to read the serialized values from
+ * @return a map from column family to the the configured bloom filter type
+ */
+ @VisibleForTesting
+ static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
+ Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+ BLOOM_TYPE_FAMILIES_CONF_KEY);
+ Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+ BloomType bloomType = BloomType.valueOf(e.getValue());
+ bloomTypeMap.put(e.getKey(), bloomType);
+ }
+ return bloomTypeMap;
+ }
+
+ /**
+ * Runs inside the task to deserialize column family to block size
+ * map from the configuration.
+ *
+ * @param conf to read the serialized values from
+ * @return a map from column family to the configured block size
+ */
+ @VisibleForTesting
+ static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
+ Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+ BLOCK_SIZE_FAMILIES_CONF_KEY);
+ Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+ Integer blockSize = Integer.parseInt(e.getValue());
+ blockSizeMap.put(e.getKey(), blockSize);
+ }
+ return blockSizeMap;
+ }
+
+ /**
+ * Runs inside the task to deserialize column family to data block encoding
+ * type map from the configuration.
+ *
+ * @param conf to read the serialized values from
+ * @return a map from column family to HFileDataBlockEncoder for the
+ * configured data block type for the family
+ */
+ @VisibleForTesting
+ static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
+ Configuration conf) {
+ Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
+ DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
+ Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
+ encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
+ }
+ return encoderMap;
+ }
+
+
+ /**
+ * Run inside the task to deserialize column family to given conf value map.
+ *
+ * @param conf to read the serialized values from
+ * @param confName conf key to read from the configuration
+ * @return a map of column family to the given configuration value
+ */
+ private static Map<byte[], String> createFamilyConfValueMap(
+ Configuration conf, String confName) {
+ Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ String confVal = conf.get(confName, "");
+ for (String familyConf : confVal.split("&")) {
+ String[] familySplit = familyConf.split("=");
+ if (familySplit.length != 2) {
+ continue;
+ }
+ try {
+ confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(StandardCharsets.UTF_8),
+ URLDecoder.decode(familySplit[1], "UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ // will not happen with UTF-8 encoding
+ throw new AssertionError(e);
+ }
+ }
+ return confValMap;
+ }
+
+ /**
+ * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
+ * <code>splitPoints</code>. Cleans up the partitions file after job exists.
+ */
+ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
+ writeMultipleTables)
+ throws IOException {
+ Configuration conf = job.getConfiguration();
+ // create the partitions file
+ FileSystem fs = FileSystem.get(conf);
+ String hbaseTmpFsDir =
+ conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
+ HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
+ Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
+ fs.makeQualified(partitionsPath);
+ writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
+ fs.deleteOnExit(partitionsPath);
+
+ // configure job to use it
+ job.setPartitionerClass(TotalOrderPartitioner.class);
+ TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
+ }
+
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
+ @VisibleForTesting
+ static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, List<TableDescriptor> allTables)
+ throws UnsupportedEncodingException {
+ StringBuilder attributeValue = new StringBuilder();
+ int i = 0;
+ for (TableDescriptor tableDescriptor : allTables) {
+ if (tableDescriptor == null) {
+ // could happen with mock table instance
+ // CODEREVIEW: Can I set an empty string in conf if mock table instance?
+ return "";
+ }
+ for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) {
+ if (i++ > 0) {
+ attributeValue.append('&');
+ }
+ attributeValue.append(URLEncoder.encode(
+ Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), familyDescriptor.getName())),
+ "UTF-8"));
+ attributeValue.append('=');
+ attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8"));
+ }
+ }
+ // Get rid of the last ampersand
+ return attributeValue.toString();
+ }
+
+ /**
+ * Serialize column family to compression algorithm map to configuration.
+ * Invoked while configuring the MR job for incremental load.
+ *
+ * @param tableDescriptor to read the properties from
+ * @param conf to persist serialized values into
+ * @throws IOException
+ * on failure to read column family descriptors
+ */
+ @VisibleForTesting
+ static Function<ColumnFamilyDescriptor, String> compressionDetails = familyDescriptor ->
+ familyDescriptor.getCompressionType().getName();
+
+ /**
+ * Serialize column family to block size map to configuration. Invoked while
+ * configuring the MR job for incremental load.
+ *
+ * @param tableDescriptor
+ * to read the properties from
+ * @param conf
+ * to persist serialized values into
+ *
+ * @throws IOException
+ * on failure to read column family descriptors
+ */
+ @VisibleForTesting
+ static Function<ColumnFamilyDescriptor, String> blockSizeDetails = familyDescriptor -> String
+ .valueOf(familyDescriptor.getBlocksize());
+
+ /**
+ * Serialize column family to bloom type map to configuration. Invoked while
+ * configuring the MR job for incremental load.
+ *
+ * @param tableDescriptor
+ * to read the properties from
+ * @param conf
+ * to persist serialized values into
+ *
+ * @throws IOException
+ * on failure to read column family descriptors
+ */
+ @VisibleForTesting
+ static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> {
+ String bloomType = familyDescriptor.getBloomFilterType().toString();
+ if (bloomType == null) {
+ bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name();
+ }
+ return bloomType;
+ };
+
+ /**
+ * Serialize column family to data block encoding map to configuration.
+ * Invoked while configuring the MR job for incremental load.
+ *
+ * @param tableDescriptor
+ * to read the properties from
+ * @param conf
+ * to persist serialized values into
+ * @throws IOException
+ * on failure to read column family descriptors
+ */
+ @VisibleForTesting
+ static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> {
+ DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
+ if (encoding == null) {
+ encoding = DataBlockEncoding.NONE;
+ }
+ return encoding.toString();
+ };
+
+}
[06/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
deleted file mode 100644
index dc59817..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
+++ /dev/null
@@ -1,727 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeepDeletedCells;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterBase;
-import org.apache.hadoop.hbase.filter.PrefixFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.Import.KeyValueImporter;
-import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * Tests the table import and table export MR job functionality
- */
-@Category({VerySlowMapReduceTests.class, MediumTests.class})
-public class TestImportExport {
- private static final Log LOG = LogFactory.getLog(TestImportExport.class);
- private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
- private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1");
- private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2");
- private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3");
- private static final String FAMILYA_STRING = "a";
- private static final String FAMILYB_STRING = "b";
- private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
- private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
- private static final byte[] QUAL = Bytes.toBytes("q");
- private static final String OUTPUT_DIR = "outputdir";
- private static String FQ_OUTPUT_DIR;
- private static final String EXPORT_BATCH_SIZE = "100";
-
- private static long now = System.currentTimeMillis();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- // Up the handlers; this test needs more than usual.
- UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
- UTIL.startMiniCluster();
- FQ_OUTPUT_DIR =
- new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Rule
- public final TestName name = new TestName();
-
- @Before
- public void announce() {
- LOG.info("Running " + name.getMethodName());
- }
-
- @Before
- @After
- public void cleanup() throws Exception {
- FileSystem fs = FileSystem.get(UTIL.getConfiguration());
- fs.delete(new Path(OUTPUT_DIR), true);
- }
-
- /**
- * Runs an export job with the specified command line args
- * @param args
- * @return true if job completed successfully
- * @throws IOException
- * @throws InterruptedException
- * @throws ClassNotFoundException
- */
- boolean runExport(String[] args) throws Exception {
- // need to make a copy of the configuration because to make sure different temp dirs are used.
- int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Export(), args);
- return status == 0;
- }
-
- /**
- * Runs an import job with the specified command line args
- * @param args
- * @return true if job completed successfully
- * @throws IOException
- * @throws InterruptedException
- * @throws ClassNotFoundException
- */
- boolean runImport(String[] args) throws Exception {
- // need to make a copy of the configuration because to make sure different temp dirs are used.
- int status = ToolRunner.run(new Configuration(UTIL.getConfiguration()), new Import(), args);
- return status == 0;
- }
-
- /**
- * Test simple replication case with column mapping
- * @throws Exception
- */
- @Test
- public void testSimpleCase() throws Exception {
- try (Table t = UTIL.createTable(TableName.valueOf(name.getMethodName()), FAMILYA, 3);) {
- Put p = new Put(ROW1);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- t.put(p);
- p = new Put(ROW2);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- t.put(p);
- p = new Put(ROW3);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- t.put(p);
- }
-
- String[] args = new String[] {
- // Only export row1 & row2.
- "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1",
- "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3",
- name.getMethodName(),
- FQ_OUTPUT_DIR,
- "1000", // max number of key versions per key to export
- };
- assertTrue(runExport(args));
-
- final String IMPORT_TABLE = name.getMethodName() + "import";
- try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), FAMILYB, 3);) {
- args = new String[] {
- "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
- IMPORT_TABLE,
- FQ_OUTPUT_DIR
- };
- assertTrue(runImport(args));
-
- Get g = new Get(ROW1);
- g.setMaxVersions();
- Result r = t.get(g);
- assertEquals(3, r.size());
- g = new Get(ROW2);
- g.setMaxVersions();
- r = t.get(g);
- assertEquals(3, r.size());
- g = new Get(ROW3);
- r = t.get(g);
- assertEquals(0, r.size());
- }
- }
-
- /**
- * Test export hbase:meta table
- *
- * @throws Exception
- */
- @Test
- public void testMetaExport() throws Exception {
- String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
- String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
- assertTrue(runExport(args));
- }
-
- /**
- * Test import data from 0.94 exported file
- * @throws Exception
- */
- @Test
- public void testImport94Table() throws Exception {
- final String name = "exportedTableIn94Format";
- URL url = TestImportExport.class.getResource(name);
- File f = new File(url.toURI());
- if (!f.exists()) {
- LOG.warn("FAILED TO FIND " + f + "; skipping out on test");
- return;
- }
- assertTrue(f.exists());
- LOG.info("FILE=" + f);
- Path importPath = new Path(f.toURI());
- FileSystem fs = FileSystem.get(UTIL.getConfiguration());
- fs.copyFromLocalFile(importPath, new Path(FQ_OUTPUT_DIR + Path.SEPARATOR + name));
- String IMPORT_TABLE = name;
- try (Table t = UTIL.createTable(TableName.valueOf(IMPORT_TABLE), Bytes.toBytes("f1"), 3);) {
- String[] args = new String[] {
- "-Dhbase.import.version=0.94" ,
- IMPORT_TABLE, FQ_OUTPUT_DIR
- };
- assertTrue(runImport(args));
- /* exportedTableIn94Format contains 5 rows
- ROW COLUMN+CELL
- r1 column=f1:c1, timestamp=1383766761171, value=val1
- r2 column=f1:c1, timestamp=1383766771642, value=val2
- r3 column=f1:c1, timestamp=1383766777615, value=val3
- r4 column=f1:c1, timestamp=1383766785146, value=val4
- r5 column=f1:c1, timestamp=1383766791506, value=val5
- */
- assertEquals(5, UTIL.countRows(t));
- }
- }
-
- /**
- * Test export scanner batching
- */
- @Test
- public void testExportScannerBatching() throws Exception {
- HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
- desc.addFamily(new HColumnDescriptor(FAMILYA)
- .setMaxVersions(1)
- );
- UTIL.getAdmin().createTable(desc);
- try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
-
- Put p = new Put(ROW1);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
- t.put(p);
-
- String[] args = new String[] {
- "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE, // added scanner batching arg.
- name.getMethodName(),
- FQ_OUTPUT_DIR
- };
- assertTrue(runExport(args));
-
- FileSystem fs = FileSystem.get(UTIL.getConfiguration());
- fs.delete(new Path(FQ_OUTPUT_DIR), true);
- }
- }
-
- @Test
- public void testWithDeletes() throws Exception {
- HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
- desc.addFamily(new HColumnDescriptor(FAMILYA)
- .setMaxVersions(5)
- .setKeepDeletedCells(KeepDeletedCells.TRUE)
- );
- UTIL.getAdmin().createTable(desc);
- try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
-
- Put p = new Put(ROW1);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 3, QUAL);
- p.addColumn(FAMILYA, QUAL, now + 4, QUAL);
- t.put(p);
-
- Delete d = new Delete(ROW1, now+3);
- t.delete(d);
- d = new Delete(ROW1);
- d.addColumns(FAMILYA, QUAL, now+2);
- t.delete(d);
- }
-
- String[] args = new String[] {
- "-D" + Export.RAW_SCAN + "=true",
- name.getMethodName(),
- FQ_OUTPUT_DIR,
- "1000", // max number of key versions per key to export
- };
- assertTrue(runExport(args));
-
- final String IMPORT_TABLE = name.getMethodName() + "import";
- desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
- desc.addFamily(new HColumnDescriptor(FAMILYA)
- .setMaxVersions(5)
- .setKeepDeletedCells(KeepDeletedCells.TRUE)
- );
- UTIL.getAdmin().createTable(desc);
- try (Table t = UTIL.getConnection().getTable(desc.getTableName());) {
- args = new String[] {
- IMPORT_TABLE,
- FQ_OUTPUT_DIR
- };
- assertTrue(runImport(args));
-
- Scan s = new Scan();
- s.setMaxVersions();
- s.setRaw(true);
- ResultScanner scanner = t.getScanner(s);
- Result r = scanner.next();
- Cell[] res = r.rawCells();
- assertTrue(CellUtil.isDeleteFamily(res[0]));
- assertEquals(now+4, res[1].getTimestamp());
- assertEquals(now+3, res[2].getTimestamp());
- assertTrue(CellUtil.isDelete(res[3]));
- assertEquals(now+2, res[4].getTimestamp());
- assertEquals(now+1, res[5].getTimestamp());
- assertEquals(now, res[6].getTimestamp());
- }
- }
-
-
- @Test
- public void testWithMultipleDeleteFamilyMarkersOfSameRowSameFamily() throws Exception {
- final TableName exportTable = TableName.valueOf(name.getMethodName());
- HTableDescriptor desc = new HTableDescriptor(exportTable);
- desc.addFamily(new HColumnDescriptor(FAMILYA)
- .setMaxVersions(5)
- .setKeepDeletedCells(KeepDeletedCells.TRUE)
- );
- UTIL.getAdmin().createTable(desc);
-
- Table exportT = UTIL.getConnection().getTable(exportTable);
-
- //Add first version of QUAL
- Put p = new Put(ROW1);
- p.addColumn(FAMILYA, QUAL, now, QUAL);
- exportT.put(p);
-
- //Add Delete family marker
- Delete d = new Delete(ROW1, now+3);
- exportT.delete(d);
-
- //Add second version of QUAL
- p = new Put(ROW1);
- p.addColumn(FAMILYA, QUAL, now + 5, "s".getBytes());
- exportT.put(p);
-
- //Add second Delete family marker
- d = new Delete(ROW1, now+7);
- exportT.delete(d);
-
-
- String[] args = new String[] {
- "-D" + Export.RAW_SCAN + "=true", exportTable.getNameAsString(),
- FQ_OUTPUT_DIR,
- "1000", // max number of key versions per key to export
- };
- assertTrue(runExport(args));
-
- final String importTable = name.getMethodName() + "import";
- desc = new HTableDescriptor(TableName.valueOf(importTable));
- desc.addFamily(new HColumnDescriptor(FAMILYA)
- .setMaxVersions(5)
- .setKeepDeletedCells(KeepDeletedCells.TRUE)
- );
- UTIL.getAdmin().createTable(desc);
-
- Table importT = UTIL.getConnection().getTable(TableName.valueOf(importTable));
- args = new String[] {
- importTable,
- FQ_OUTPUT_DIR
- };
- assertTrue(runImport(args));
-
- Scan s = new Scan();
- s.setMaxVersions();
- s.setRaw(true);
-
- ResultScanner importedTScanner = importT.getScanner(s);
- Result importedTResult = importedTScanner.next();
-
- ResultScanner exportedTScanner = exportT.getScanner(s);
- Result exportedTResult = exportedTScanner.next();
- try {
- Result.compareResults(exportedTResult, importedTResult);
- } catch (Exception e) {
- fail("Original and imported tables data comparision failed with error:"+e.getMessage());
- } finally {
- exportT.close();
- importT.close();
- }
- }
-
- /**
- * Create a simple table, run an Export Job on it, Import with filtering on, verify counts,
- * attempt with invalid values.
- */
- @Test
- public void testWithFilter() throws Exception {
- // Create simple table to export
- HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
- desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
- UTIL.getAdmin().createTable(desc);
- Table exportTable = UTIL.getConnection().getTable(desc.getTableName());
-
- Put p1 = new Put(ROW1);
- p1.addColumn(FAMILYA, QUAL, now, QUAL);
- p1.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- p1.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- p1.addColumn(FAMILYA, QUAL, now + 3, QUAL);
- p1.addColumn(FAMILYA, QUAL, now + 4, QUAL);
-
- // Having another row would actually test the filter.
- Put p2 = new Put(ROW2);
- p2.addColumn(FAMILYA, QUAL, now, QUAL);
-
- exportTable.put(Arrays.asList(p1, p2));
-
- // Export the simple table
- String[] args = new String[] { name.getMethodName(), FQ_OUTPUT_DIR, "1000" };
- assertTrue(runExport(args));
-
- // Import to a new table
- final String IMPORT_TABLE = name.getMethodName() + "import";
- desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
- desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
- UTIL.getAdmin().createTable(desc);
-
- Table importTable = UTIL.getConnection().getTable(desc.getTableName());
- args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
- "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE,
- FQ_OUTPUT_DIR,
- "1000" };
- assertTrue(runImport(args));
-
- // get the count of the source table for that time range
- PrefixFilter filter = new PrefixFilter(ROW1);
- int count = getCount(exportTable, filter);
-
- Assert.assertEquals("Unexpected row count between export and import tables", count,
- getCount(importTable, null));
-
- // and then test that a broken command doesn't bork everything - easier here because we don't
- // need to re-run the export job
-
- args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
- "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", name.getMethodName(),
- FQ_OUTPUT_DIR, "1000" };
- assertFalse(runImport(args));
-
- // cleanup
- exportTable.close();
- importTable.close();
- }
-
- /**
- * Count the number of keyvalues in the specified table for the given timerange
- * @param start
- * @param end
- * @param table
- * @return
- * @throws IOException
- */
- private int getCount(Table table, Filter filter) throws IOException {
- Scan scan = new Scan();
- scan.setFilter(filter);
- ResultScanner results = table.getScanner(scan);
- int count = 0;
- for (Result res : results) {
- count += res.size();
- }
- results.close();
- return count;
- }
-
- /**
- * test main method. Import should print help and call System.exit
- */
- @Test
- public void testImportMain() throws Exception {
- PrintStream oldPrintStream = System.err;
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- String[] args = {};
- System.setErr(new PrintStream(data));
- try {
- System.setErr(new PrintStream(data));
- Import.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- assertTrue(data.toString().contains("Wrong number of arguments:"));
- assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
- assertTrue(data.toString().contains("-Dimport.filter.class=<name of filter class>"));
- assertTrue(data.toString().contains("-Dimport.bulk.output=/path/for/output"));
- assertTrue(data.toString().contains("-Dmapreduce.reduce.speculative=false"));
- } finally {
- System.setErr(oldPrintStream);
- System.setSecurityManager(SECURITY_MANAGER);
- }
- }
-
- /**
- * test main method. Export should print help and call System.exit
- */
- @Test
- public void testExportMain() throws Exception {
- PrintStream oldPrintStream = System.err;
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- String[] args = {};
- System.setErr(new PrintStream(data));
- try {
- System.setErr(new PrintStream(data));
- Export.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- String errMsg = data.toString();
- assertTrue(errMsg.contains("Wrong number of arguments:"));
- assertTrue(errMsg.contains(
- "Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
- "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]"));
- assertTrue(
- errMsg.contains("-D hbase.mapreduce.scan.column.family=<family1>,<family2>, ..."));
- assertTrue(errMsg.contains("-D hbase.mapreduce.include.deleted.rows=true"));
- assertTrue(errMsg.contains("-Dhbase.client.scanner.caching=100"));
- assertTrue(errMsg.contains("-Dmapreduce.map.speculative=false"));
- assertTrue(errMsg.contains("-Dmapreduce.reduce.speculative=false"));
- assertTrue(errMsg.contains("-Dhbase.export.scanner.batch=10"));
- } finally {
- System.setErr(oldPrintStream);
- System.setSecurityManager(SECURITY_MANAGER);
- }
- }
-
- /**
- * Test map method of Importer
- */
- @SuppressWarnings({ "unchecked", "rawtypes" })
- @Test
- public void testKeyValueImporter() throws Exception {
- KeyValueImporter importer = new KeyValueImporter();
- Configuration configuration = new Configuration();
- Context ctx = mock(Context.class);
- when(ctx.getConfiguration()).thenReturn(configuration);
-
- doAnswer(new Answer<Void>() {
-
- @Override
- public Void answer(InvocationOnMock invocation) throws Throwable {
- ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
- KeyValue key = (KeyValue) invocation.getArguments()[1];
- assertEquals("Key", Bytes.toString(writer.get()));
- assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
- return null;
- }
- }).when(ctx).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
-
- importer.setup(ctx);
- Result value = mock(Result.class);
- KeyValue[] keys = {
- new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
- Bytes.toBytes("value")),
- new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
- Bytes.toBytes("value1")) };
- when(value.rawCells()).thenReturn(keys);
- importer.map(new ImmutableBytesWritable(Bytes.toBytes("Key")), value, ctx);
-
- }
-
- /**
- * Test addFilterAndArguments method of Import This method set couple
- * parameters into Configuration
- */
- @Test
- public void testAddFilterAndArguments() throws IOException {
- Configuration configuration = new Configuration();
-
- List<String> args = new ArrayList<>();
- args.add("param1");
- args.add("param2");
-
- Import.addFilterAndArguments(configuration, FilterBase.class, args);
- assertEquals("org.apache.hadoop.hbase.filter.FilterBase",
- configuration.get(Import.FILTER_CLASS_CONF_KEY));
- assertEquals("param1,param2", configuration.get(Import.FILTER_ARGS_CONF_KEY));
- }
-
- @Test
- public void testDurability() throws Exception {
- // Create an export table.
- String exportTableName = name.getMethodName() + "export";
- try (Table exportTable = UTIL.createTable(TableName.valueOf(exportTableName), FAMILYA, 3);) {
-
- // Insert some data
- Put put = new Put(ROW1);
- put.addColumn(FAMILYA, QUAL, now, QUAL);
- put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- exportTable.put(put);
-
- put = new Put(ROW2);
- put.addColumn(FAMILYA, QUAL, now, QUAL);
- put.addColumn(FAMILYA, QUAL, now + 1, QUAL);
- put.addColumn(FAMILYA, QUAL, now + 2, QUAL);
- exportTable.put(put);
-
- // Run the export
- String[] args = new String[] { exportTableName, FQ_OUTPUT_DIR, "1000"};
- assertTrue(runExport(args));
-
- // Create the table for import
- String importTableName = name.getMethodName() + "import1";
- Table importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
-
- // Register the wal listener for the import table
- HRegionInfo region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
- .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
- TableWALActionListener walListener = new TableWALActionListener(region);
- WAL wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
- wal.registerWALActionsListener(walListener);
-
- // Run the import with SKIP_WAL
- args =
- new String[] { "-D" + Import.WAL_DURABILITY + "=" + Durability.SKIP_WAL.name(),
- importTableName, FQ_OUTPUT_DIR };
- assertTrue(runImport(args));
- //Assert that the wal is not visisted
- assertTrue(!walListener.isWALVisited());
- //Ensure that the count is 2 (only one version of key value is obtained)
- assertTrue(getCount(importTable, null) == 2);
-
- // Run the import with the default durability option
- importTableName = name.getMethodName() + "import2";
- importTable = UTIL.createTable(TableName.valueOf(importTableName), FAMILYA, 3);
- region = UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer()
- .getOnlineRegions(importTable.getName()).get(0).getRegionInfo();
- wal = UTIL.getMiniHBaseCluster().getRegionServer(0).getWAL(region);
- walListener = new TableWALActionListener(region);
- wal.registerWALActionsListener(walListener);
- args = new String[] { importTableName, FQ_OUTPUT_DIR };
- assertTrue(runImport(args));
- //Assert that the wal is visisted
- assertTrue(walListener.isWALVisited());
- //Ensure that the count is 2 (only one version of key value is obtained)
- assertTrue(getCount(importTable, null) == 2);
- }
- }
-
- /**
- * This listens to the {@link #visitLogEntryBeforeWrite(HRegionInfo, WALKey, WALEdit)} to
- * identify that an entry is written to the Write Ahead Log for the given table.
- */
- private static class TableWALActionListener extends WALActionsListener.Base {
-
- private HRegionInfo regionInfo;
- private boolean isVisited = false;
-
- public TableWALActionListener(HRegionInfo region) {
- this.regionInfo = region;
- }
-
- @Override
- public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) {
- if (logKey.getTablename().getNameAsString().equalsIgnoreCase(
- this.regionInfo.getTable().getNameAsString()) && (!logEdit.isMetaEdit())) {
- isVisited = true;
- }
- }
-
- public boolean isWALVisited() {
- return isVisited;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
deleted file mode 100644
index 6d9b05b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithOperationAttributes.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.coprocessor.RegionObserver;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.regionserver.Region;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithOperationAttributes implements Configurable {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private static final Log LOG = LogFactory.getLog(TestImportTSVWithOperationAttributes.class);
- protected static final String NAME = TestImportTsv.class.getSimpleName();
- protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
- /**
- * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
- * false.
- */
- protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
- /**
- * Force use of combiner in doMROnTableTest. Boolean. Default is true.
- */
- protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
- private static Configuration conf;
-
- private static final String TEST_ATR_KEY = "test";
-
- private final String FAMILY = "FAM";
-
- @Rule
- public TestName name = new TestName();
-
- public Configuration getConf() {
- return util.getConfiguration();
- }
-
- public void setConf(Configuration conf) {
- throw new IllegalArgumentException("setConf not supported");
- }
-
- @BeforeClass
- public static void provisionCluster() throws Exception {
- conf = util.getConfiguration();
- conf.set("hbase.coprocessor.master.classes", OperationAttributesTestController.class.getName());
- conf.set("hbase.coprocessor.region.classes", OperationAttributesTestController.class.getName());
- util.startMiniCluster();
- }
-
- @AfterClass
- public static void releaseCluster() throws Exception {
- util.shutdownMiniCluster();
- }
-
- @Test
- public void testMROnTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest=>myvalue\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1, true);
- util.deleteTable(tableName);
- }
-
- @Test
- public void testMROnTableWithInvalidOperationAttr() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapperForOprAttr",
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_ATTRIBUTES_KEY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001btest1=>myvalue\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1, false);
- util.deleteTable(tableName);
- }
-
- /**
- * Run an ImportTsv job and perform basic validation on the results. Returns
- * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
- * for further validation as necessary. This method is static to insure
- * non-reliance on instance's util/conf facilities.
- *
- * @param args
- * Any arguments to pass BEFORE inputFile path is appended.
- * @param dataAvailable
- * @return The Tool instance used to run the test.
- */
- private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
- int valueMultiplier, boolean dataAvailable) throws Exception {
- String table = args[args.length - 1];
- Configuration conf = new Configuration(util.getConfiguration());
-
- // populate input file
- FileSystem fs = FileSystem.get(conf);
- Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
- FSDataOutputStream op = fs.create(inputPath, true);
- op.write(Bytes.toBytes(data));
- op.close();
- LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
- if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
- LOG.debug("Forcing combiner.");
- conf.setInt("mapreduce.map.combine.minspills", 1);
- }
-
- // run the import
- List<String> argv = new ArrayList<>(Arrays.asList(args));
- argv.add(inputPath.toString());
- Tool tool = new ImportTsv();
- LOG.debug("Running ImportTsv with arguments: " + argv);
- assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
-
- validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);
-
- if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
- LOG.debug("Deleting test subdirectory");
- util.cleanupDataTestDirOnTestFS(table);
- }
- return tool;
- }
-
- /**
- * Confirm ImportTsv via data in online table.
- *
- * @param dataAvailable
- */
- private static void validateTable(Configuration conf, TableName tableName, String family,
- int valueMultiplier, boolean dataAvailable) throws IOException {
-
- LOG.debug("Validating table.");
- Connection connection = ConnectionFactory.createConnection(conf);
- Table table = connection.getTable(tableName);
- boolean verified = false;
- long pause = conf.getLong("hbase.client.pause", 5 * 1000);
- int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- Scan scan = new Scan();
- // Scan entire family.
- scan.addFamily(Bytes.toBytes(family));
- if (dataAvailable) {
- ResultScanner resScanner = table.getScanner(scan);
- for (Result res : resScanner) {
- LOG.debug("Getting results " + res.size());
- assertTrue(res.size() == 2);
- List<Cell> kvs = res.listCells();
- assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
- assertTrue(CellUtil.matchingValue(kvs.get(1),
- Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
- // Only one result set is expected, so let it loop.
- verified = true;
- }
- } else {
- ResultScanner resScanner = table.getScanner(scan);
- Result[] next = resScanner.next(2);
- assertEquals(0, next.length);
- verified = true;
- }
-
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- table.close();
- connection.close();
- assertTrue(verified);
- }
-
- public static class OperationAttributesTestController implements RegionObserver {
-
- @Override
- public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
- Durability durability) throws IOException {
- Region region = e.getEnvironment().getRegion();
- if (!region.getRegionInfo().isMetaTable()
- && !region.getRegionInfo().getTable().isSystemTable()) {
- if (put.getAttribute(TEST_ATR_KEY) != null) {
- LOG.debug("allow any put to happen " + region.getRegionInfo().getRegionNameAsString());
- } else {
- e.bypass();
- }
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
deleted file mode 100644
index 4ab3d29..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithTTLs.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.coprocessor.RegionObserver;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.regionserver.Region;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithTTLs implements Configurable {
-
- protected static final Log LOG = LogFactory.getLog(TestImportTSVWithTTLs.class);
- protected static final String NAME = TestImportTsv.class.getSimpleName();
- protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
- /**
- * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
- * false.
- */
- protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
- /**
- * Force use of combiner in doMROnTableTest. Boolean. Default is true.
- */
- protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
- private final String FAMILY = "FAM";
- private static Configuration conf;
-
- @Rule
- public TestName name = new TestName();
-
- @Override
- public Configuration getConf() {
- return util.getConfiguration();
- }
-
- @Override
- public void setConf(Configuration conf) {
- throw new IllegalArgumentException("setConf not supported");
- }
-
- @BeforeClass
- public static void provisionCluster() throws Exception {
- conf = util.getConfiguration();
- // We don't check persistence in HFiles in this test, but if we ever do we will
- // need this where the default hfile version is not 3 (i.e. 0.98)
- conf.setInt("hfile.format.version", 3);
- conf.set("hbase.coprocessor.region.classes", TTLCheckingObserver.class.getName());
- util.startMiniCluster();
- }
-
- @AfterClass
- public static void releaseCluster() throws Exception {
- util.shutdownMiniCluster();
- }
-
- @Test
- public void testMROnTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_TTL",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001b1000000\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1);
- util.deleteTable(tableName);
- }
-
- protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
- String[] args, int valueMultiplier) throws Exception {
- TableName table = TableName.valueOf(args[args.length - 1]);
- Configuration conf = new Configuration(util.getConfiguration());
-
- // populate input file
- FileSystem fs = FileSystem.get(conf);
- Path inputPath = fs.makeQualified(new Path(util
- .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
- FSDataOutputStream op = fs.create(inputPath, true);
- op.write(Bytes.toBytes(data));
- op.close();
- LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
- if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
- LOG.debug("Forcing combiner.");
- conf.setInt("mapreduce.map.combine.minspills", 1);
- }
-
- // run the import
- List<String> argv = new ArrayList<>(Arrays.asList(args));
- argv.add(inputPath.toString());
- Tool tool = new ImportTsv();
- LOG.debug("Running ImportTsv with arguments: " + argv);
- try {
- // Job will fail if observer rejects entries without TTL
- assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
- } finally {
- // Clean up
- if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
- LOG.debug("Deleting test subdirectory");
- util.cleanupDataTestDirOnTestFS(table.getNameAsString());
- }
- }
-
- return tool;
- }
-
- public static class TTLCheckingObserver implements RegionObserver {
-
- @Override
- public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit,
- Durability durability) throws IOException {
- Region region = e.getEnvironment().getRegion();
- if (!region.getRegionInfo().isMetaTable()
- && !region.getRegionInfo().getTable().isSystemTable()) {
- // The put carries the TTL attribute
- if (put.getTTL() != Long.MAX_VALUE) {
- return;
- }
- throw new IOException("Operation does not have TTL set");
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
deleted file mode 100644
index 8967ac7..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTSVWithVisibilityLabels.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.visibility.Authorizations;
-import org.apache.hadoop.hbase.security.visibility.CellVisibility;
-import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
-import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
-import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
-import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
-import org.apache.hadoop.hbase.security.visibility.VisibilityController;
-import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestImportTSVWithVisibilityLabels implements Configurable {
-
- private static final Log LOG = LogFactory.getLog(TestImportTSVWithVisibilityLabels.class);
- protected static final String NAME = TestImportTsv.class.getSimpleName();
- protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
- /**
- * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
- * false.
- */
- protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
- /**
- * Force use of combiner in doMROnTableTest. Boolean. Default is true.
- */
- protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
- private final String FAMILY = "FAM";
- private final static String TOPSECRET = "topsecret";
- private final static String PUBLIC = "public";
- private final static String PRIVATE = "private";
- private final static String CONFIDENTIAL = "confidential";
- private final static String SECRET = "secret";
- private static User SUPERUSER;
- private static Configuration conf;
-
- @Rule
- public TestName name = new TestName();
-
- @Override
- public Configuration getConf() {
- return util.getConfiguration();
- }
-
- @Override
- public void setConf(Configuration conf) {
- throw new IllegalArgumentException("setConf not supported");
- }
-
- @BeforeClass
- public static void provisionCluster() throws Exception {
- conf = util.getConfiguration();
- SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
- conf.set("hbase.superuser", "admin,"+User.getCurrent().getName());
- conf.setInt("hfile.format.version", 3);
- conf.set("hbase.coprocessor.master.classes", VisibilityController.class.getName());
- conf.set("hbase.coprocessor.region.classes", VisibilityController.class.getName());
- conf.setClass(VisibilityUtils.VISIBILITY_LABEL_GENERATOR_CLASS, SimpleScanLabelGenerator.class,
- ScanLabelGenerator.class);
- util.startMiniCluster();
- // Wait for the labels table to become available
- util.waitTableEnabled(VisibilityConstants.LABELS_TABLE_NAME.getName(), 50000);
- createLabels();
- }
-
- private static void createLabels() throws IOException, InterruptedException {
- PrivilegedExceptionAction<VisibilityLabelsResponse> action =
- new PrivilegedExceptionAction<VisibilityLabelsResponse>() {
- @Override
- public VisibilityLabelsResponse run() throws Exception {
- String[] labels = { SECRET, TOPSECRET, CONFIDENTIAL, PUBLIC, PRIVATE };
- try (Connection conn = ConnectionFactory.createConnection(conf)) {
- VisibilityClient.addLabels(conn, labels);
- LOG.info("Added labels ");
- } catch (Throwable t) {
- LOG.error("Error in adding labels" , t);
- throw new IOException(t);
- }
- return null;
- }
- };
- SUPERUSER.runAs(action);
- }
-
- @AfterClass
- public static void releaseCluster() throws Exception {
- util.shutdownMiniCluster();
- }
-
- @Test
- public void testMROnTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1);
- util.deleteTable(tableName);
- }
-
- @Test
- public void testMROnTableWithDeletes() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
-
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1);
- issueDeleteAndVerifyData(tableName);
- util.deleteTable(tableName);
- }
-
- private void issueDeleteAndVerifyData(TableName tableName) throws IOException {
- LOG.debug("Validating table after delete.");
- Table table = util.getConnection().getTable(tableName);
- boolean verified = false;
- long pause = conf.getLong("hbase.client.pause", 5 * 1000);
- int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- Delete d = new Delete(Bytes.toBytes("KEY"));
- d.addFamily(Bytes.toBytes(FAMILY));
- d.setCellVisibility(new CellVisibility("private&secret"));
- table.delete(d);
-
- Scan scan = new Scan();
- // Scan entire family.
- scan.addFamily(Bytes.toBytes(FAMILY));
- scan.setAuthorizations(new Authorizations("secret", "private"));
- ResultScanner resScanner = table.getScanner(scan);
- Result[] next = resScanner.next(5);
- assertEquals(0, next.length);
- verified = true;
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- table.close();
- assertTrue(verified);
- }
-
- @Test
- public void testMROnTableWithBulkload() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
- "-D" + ImportTsv.COLUMNS_CONF_KEY
- + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1);
- util.deleteTable(tableName);
- }
-
- @Test
- public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
- String FAMILY = "FAM";
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table.getNameAsString()),"hfiles");
- // Prepare the arguments required for the test.
- String[] args =
- new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
- "-D" + ImportTsv.COLUMNS_CONF_KEY
- + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
- table.getNameAsString()
- };
- String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
- doMROnTableTest(util, FAMILY, data, args, 4);
- util.deleteTable(table);
- }
-
- @Test
- public void testMRWithOutputFormat() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
- // Prepare the arguments required for the test.
- String[] args = new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
- String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1);
- util.deleteTable(tableName);
- }
-
- @Test
- public void testBulkOutputWithInvalidLabels() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
- // Prepare the arguments required for the test.
- String[] args =
- new String[] { "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-
- // 2 Data rows, one with valid label and one with invalid label
- String data =
- "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1, 2);
- util.deleteTable(tableName);
- }
-
- @Test
- public void testBulkOutputWithTsvImporterTextMapperWithInvalidLabels() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName() + UUID.randomUUID());
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
- // Prepare the arguments required for the test.
- String[] args =
- new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
-
- // 2 Data rows, one with valid label and one with invalid label
- String data =
- "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
- util.createTable(tableName, FAMILY);
- doMROnTableTest(util, FAMILY, data, args, 1, 2);
- util.deleteTable(tableName);
- }
-
- protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
- String[] args, int valueMultiplier) throws Exception {
- return doMROnTableTest(util, family, data, args, valueMultiplier, -1);
- }
-
- /**
- * Run an ImportTsv job and perform basic validation on the results. Returns
- * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
- * for further validation as necessary. This method is static to insure
- * non-reliance on instance's util/conf facilities.
- *
- * @param args
- * Any arguments to pass BEFORE inputFile path is appended.
- *
- * @param expectedKVCount Expected KV count. pass -1 to skip the kvcount check
- *
- * @return The Tool instance used to run the test.
- */
- protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
- String[] args, int valueMultiplier,int expectedKVCount) throws Exception {
- TableName table = TableName.valueOf(args[args.length - 1]);
- Configuration conf = new Configuration(util.getConfiguration());
-
- // populate input file
- FileSystem fs = FileSystem.get(conf);
- Path inputPath = fs.makeQualified(new Path(util
- .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
- FSDataOutputStream op = fs.create(inputPath, true);
- if (data == null) {
- data = "KEY\u001bVALUE1\u001bVALUE2\n";
- }
- op.write(Bytes.toBytes(data));
- op.close();
- LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
- if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
- LOG.debug("Forcing combiner.");
- conf.setInt("mapreduce.map.combine.minspills", 1);
- }
-
- // run the import
- List<String> argv = new ArrayList<>(Arrays.asList(args));
- argv.add(inputPath.toString());
- Tool tool = new ImportTsv();
- LOG.debug("Running ImportTsv with arguments: " + argv);
- assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
-
- // Perform basic validation. If the input args did not include
- // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
- // Otherwise, validate presence of hfiles.
- boolean createdHFiles = false;
- String outputPath = null;
- for (String arg : argv) {
- if (arg.contains(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
- createdHFiles = true;
- // split '-Dfoo=bar' on '=' and keep 'bar'
- outputPath = arg.split("=")[1];
- break;
- }
- }
- LOG.debug("validating the table " + createdHFiles);
- if (createdHFiles)
- validateHFiles(fs, outputPath, family,expectedKVCount);
- else
- validateTable(conf, table, family, valueMultiplier);
-
- if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
- LOG.debug("Deleting test subdirectory");
- util.cleanupDataTestDirOnTestFS(table.getNameAsString());
- }
- return tool;
- }
-
- /**
- * Confirm ImportTsv via HFiles on fs.
- */
- private static void validateHFiles(FileSystem fs, String outputPath, String family,
- int expectedKVCount) throws IOException {
-
- // validate number and content of output columns
- LOG.debug("Validating HFiles.");
- Set<String> configFamilies = new HashSet<>();
- configFamilies.add(family);
- Set<String> foundFamilies = new HashSet<>();
- int actualKVCount = 0;
- for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
- LOG.debug("The output path has files");
- String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
- String cf = elements[elements.length - 1];
- foundFamilies.add(cf);
- assertTrue(String.format(
- "HFile ouput contains a column family (%s) not present in input families (%s)", cf,
- configFamilies), configFamilies.contains(cf));
- for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
- assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),
- hfile.getLen() > 0);
- if (expectedKVCount > -1) {
- actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
- }
- }
- }
- if (expectedKVCount > -1) {
- assertTrue(String.format(
- "KV count in output hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
- expectedKVCount), actualKVCount == expectedKVCount);
- }
- }
-
- /**
- * Confirm ImportTsv via data in online table.
- */
- private static void validateTable(Configuration conf, TableName tableName, String family,
- int valueMultiplier) throws IOException {
-
- LOG.debug("Validating table.");
- Table table = util.getConnection().getTable(tableName);
- boolean verified = false;
- long pause = conf.getLong("hbase.client.pause", 5 * 1000);
- int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- Scan scan = new Scan();
- // Scan entire family.
- scan.addFamily(Bytes.toBytes(family));
- scan.setAuthorizations(new Authorizations("secret","private"));
- ResultScanner resScanner = table.getScanner(scan);
- Result[] next = resScanner.next(5);
- assertEquals(1, next.length);
- for (Result res : resScanner) {
- LOG.debug("Getting results " + res.size());
- assertTrue(res.size() == 2);
- List<Cell> kvs = res.listCells();
- assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
- assertTrue(CellUtil.matchingValue(kvs.get(1),
- Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
- // Only one result set is expected, so let it loop.
- }
- verified = true;
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- table.close();
- assertTrue(verified);
- }
-
- /**
- * Method returns the total KVs in given hfile
- * @param fs File System
- * @param p HFile path
- * @return KV count in the given hfile
- * @throws IOException
- */
- private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
- Configuration conf = util.getConfiguration();
- HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
- reader.loadFileInfo();
- HFileScanner scanner = reader.getScanner(false, false);
- scanner.seekTo();
- int count = 0;
- do {
- count++;
- } while (scanner.next());
- reader.close();
- return count;
- }
-
-}
[02/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
deleted file mode 100644
index 9d8b8f0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapperForOprAttr.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
-import org.apache.hadoop.hbase.util.Bytes;
-
-/**
- *
- * Just shows a simple example of how the attributes can be extracted and added
- * to the puts
- */
-public class TsvImporterCustomTestMapperForOprAttr extends TsvImporterMapper {
- @Override
- protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i)
- throws BadTsvLineException, IOException {
- KeyValue kv;
- kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
- parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
- parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
- parsed.getColumnLength(i));
- if (parsed.getIndividualAttributes() != null) {
- String[] attributes = parsed.getIndividualAttributes();
- for (String attr : attributes) {
- String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR);
- if (split == null || split.length <= 1) {
- throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
- } else {
- if (split[0].length() <= 0 || split[1].length() <= 0) {
- throw new BadTsvLineException("Invalid attributes seperator specified" + attributes);
- }
- put.setAttribute(split[0], Bytes.toBytes(split[1]));
- }
- }
- }
- put.add(kv);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index f641887..a81d268 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -65,7 +65,6 @@ import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.TableNamespaceManager;
@@ -336,7 +335,7 @@ public class TestNamespaceAuditor {
byte[] columnFamily = Bytes.toBytes("info");
HTableDescriptor tableDescOne = new HTableDescriptor(tableTwo);
tableDescOne.addFamily(new HColumnDescriptor(columnFamily));
- ADMIN.createTable(tableDescOne, Bytes.toBytes("1"), Bytes.toBytes("2000"), initialRegions);
+ ADMIN.createTable(tableDescOne, Bytes.toBytes("0"), Bytes.toBytes("9"), initialRegions);
Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
try (Table table = connection.getTable(tableTwo)) {
UTIL.loadNumericRows(table, Bytes.toBytes("info"), 1000, 1999);
@@ -354,7 +353,7 @@ public class TestNamespaceAuditor {
hris = ADMIN.getTableRegions(tableTwo);
assertEquals(initialRegions - 1, hris.size());
Collections.sort(hris);
- ADMIN.split(tableTwo, Bytes.toBytes("500"));
+ ADMIN.split(tableTwo, Bytes.toBytes("3"));
// Not much we can do here until we have split return a Future.
Threads.sleep(5000);
hris = ADMIN.getTableRegions(tableTwo);
@@ -383,8 +382,7 @@ public class TestNamespaceAuditor {
Collections.sort(hris);
// verify that we cannot split
HRegionInfo hriToSplit2 = hris.get(1);
- ADMIN.split(tableTwo,
- TableInputFormatBase.getSplitKey(hriToSplit2.getStartKey(), hriToSplit2.getEndKey(), true));
+ ADMIN.split(tableTwo, Bytes.toBytes("6"));
Thread.sleep(2000);
assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size());
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
index 0aa39f6..477c870 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
@@ -42,7 +42,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.PerformanceEvaluation;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.HTable;
@@ -62,9 +61,11 @@ import org.junit.rules.TestName;
public class TestHRegionFileSystem {
private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final Log LOG = LogFactory.getLog(TestHRegionFileSystem.class);
+
+ public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
private static final byte[][] FAMILIES = {
- Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")),
- Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B")) };
+ Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")),
+ Bytes.add(FAMILY_NAME, Bytes.toBytes("-B")) };
private static final TableName TABLE_NAME = TableName.valueOf("TestTable");
@Rule
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
deleted file mode 100644
index e1cb8ba..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSmallTests.java
+++ /dev/null
@@ -1,1059 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.replication;
-
-import static org.junit.Assert.*;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.Waiter;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
-import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.replication.regionserver.Replication;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSource;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.ReplicationTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.JVMClusterUtil;
-import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-@Category({ReplicationTests.class, LargeTests.class})
-public class TestReplicationSmallTests extends TestReplicationBase {
-
- private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
- private static final String PEER_ID = "2";
-
- @Rule
- public TestName name = new TestName();
-
- /**
- * @throws java.lang.Exception
- */
- @Before
- public void setUp() throws Exception {
- // Starting and stopping replication can make us miss new logs,
- // rolling like this makes sure the most recent one gets added to the queue
- for ( JVMClusterUtil.RegionServerThread r :
- utility1.getHBaseCluster().getRegionServerThreads()) {
- utility1.getAdmin().rollWALWriter(r.getRegionServer().getServerName());
- }
- int rowCount = utility1.countRows(tableName);
- utility1.deleteTableData(tableName);
- // truncating the table will send one Delete per row to the slave cluster
- // in an async fashion, which is why we cannot just call deleteTableData on
- // utility2 since late writes could make it to the slave in some way.
- // Instead, we truncate the first table and wait for all the Deletes to
- // make it to the slave.
- Scan scan = new Scan();
- int lastCount = 0;
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for truncate");
- }
- ResultScanner scanner = htable2.getScanner(scan);
- Result[] res = scanner.next(rowCount);
- scanner.close();
- if (res.length != 0) {
- if (res.length < lastCount) {
- i--; // Don't increment timeout if we make progress
- }
- lastCount = res.length;
- LOG.info("Still got " + res.length + " rows");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- }
-
- /**
- * Verify that version and column delete marker types are replicated
- * correctly.
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testDeleteTypes() throws Exception {
- LOG.info("testDeleteTypes");
- final byte[] v1 = Bytes.toBytes("v1");
- final byte[] v2 = Bytes.toBytes("v2");
- final byte[] v3 = Bytes.toBytes("v3");
- htable1 = utility1.getConnection().getTable(tableName);
-
- long t = EnvironmentEdgeManager.currentTime();
- // create three versions for "row"
- Put put = new Put(row);
- put.addColumn(famName, row, t, v1);
- htable1.put(put);
-
- put = new Put(row);
- put.addColumn(famName, row, t + 1, v2);
- htable1.put(put);
-
- put = new Put(row);
- put.addColumn(famName, row, t + 2, v3);
- htable1.put(put);
-
- Get get = new Get(row);
- get.setMaxVersions();
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for put replication");
- }
- Result res = htable2.get(get);
- if (res.size() < 3) {
- LOG.info("Rows not available");
- Thread.sleep(SLEEP_TIME);
- } else {
- assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
- assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
- assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
- break;
- }
- }
- // place a version delete marker (delete last version)
- Delete d = new Delete(row);
- d.addColumn(famName, row, t);
- htable1.delete(d);
-
- get = new Get(row);
- get.setMaxVersions();
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for put replication");
- }
- Result res = htable2.get(get);
- if (res.size() > 2) {
- LOG.info("Version not deleted");
- Thread.sleep(SLEEP_TIME);
- } else {
- assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
- assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
- break;
- }
- }
-
- // place a column delete marker
- d = new Delete(row);
- d.addColumns(famName, row, t+2);
- htable1.delete(d);
-
- // now *both* of the remaining version should be deleted
- // at the replica
- get = new Get(row);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for del replication");
- }
- Result res = htable2.get(get);
- if (res.size() >= 1) {
- LOG.info("Rows not deleted");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- }
-
- /**
- * Add a row, check it's replicated, delete it, check's gone
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testSimplePutDelete() throws Exception {
- LOG.info("testSimplePutDelete");
- Put put = new Put(row);
- put.addColumn(famName, row, row);
-
- htable1 = utility1.getConnection().getTable(tableName);
- htable1.put(put);
-
- Get get = new Get(row);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for put replication");
- }
- Result res = htable2.get(get);
- if (res.isEmpty()) {
- LOG.info("Row not available");
- Thread.sleep(SLEEP_TIME);
- } else {
- assertArrayEquals(res.value(), row);
- break;
- }
- }
-
- Delete del = new Delete(row);
- htable1.delete(del);
-
- get = new Get(row);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for del replication");
- }
- Result res = htable2.get(get);
- if (res.size() >= 1) {
- LOG.info("Row not deleted");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- }
-
- /**
- * Try a small batch upload using the write buffer, check it's replicated
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testSmallBatch() throws Exception {
- LOG.info("testSmallBatch");
- // normal Batch tests
- loadData("", row);
-
- Scan scan = new Scan();
-
- ResultScanner scanner1 = htable1.getScanner(scan);
- Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
- scanner1.close();
- assertEquals(NB_ROWS_IN_BATCH, res1.length);
-
- waitForReplication(NB_ROWS_IN_BATCH, NB_RETRIES);
- }
-
- private void waitForReplication(int expectedRows, int retries) throws IOException, InterruptedException {
- Scan scan;
- for (int i = 0; i < retries; i++) {
- scan = new Scan();
- if (i== retries -1) {
- fail("Waited too much time for normal batch replication");
- }
- ResultScanner scanner = htable2.getScanner(scan);
- Result[] res = scanner.next(expectedRows);
- scanner.close();
- if (res.length != expectedRows) {
- LOG.info("Only got " + res.length + " rows");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- }
-
- private void loadData(String prefix, byte[] row) throws IOException {
- List<Put> puts = new ArrayList<>(NB_ROWS_IN_BATCH);
- for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
- Put put = new Put(Bytes.toBytes(prefix + Integer.toString(i)));
- put.addColumn(famName, row, row);
- puts.add(put);
- }
- htable1.put(puts);
- }
-
- /**
- * Test disable/enable replication, trying to insert, make sure nothing's
- * replicated, enable it, the insert should be replicated
- *
- * @throws Exception
- */
- @Test(timeout = 300000)
- public void testDisableEnable() throws Exception {
-
- // Test disabling replication
- admin.disablePeer(PEER_ID);
-
- byte[] rowkey = Bytes.toBytes("disable enable");
- Put put = new Put(rowkey);
- put.addColumn(famName, row, row);
- htable1.put(put);
-
- Get get = new Get(rowkey);
- for (int i = 0; i < NB_RETRIES; i++) {
- Result res = htable2.get(get);
- if (res.size() >= 1) {
- fail("Replication wasn't disabled");
- } else {
- LOG.info("Row not replicated, let's wait a bit more...");
- Thread.sleep(SLEEP_TIME);
- }
- }
-
- // Test enable replication
- admin.enablePeer(PEER_ID);
-
- for (int i = 0; i < NB_RETRIES; i++) {
- Result res = htable2.get(get);
- if (res.isEmpty()) {
- LOG.info("Row not available");
- Thread.sleep(SLEEP_TIME);
- } else {
- assertArrayEquals(res.value(), row);
- return;
- }
- }
- fail("Waited too much time for put replication");
- }
-
- /**
- * Integration test for TestReplicationAdmin, removes and re-add a peer
- * cluster
- *
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testAddAndRemoveClusters() throws Exception {
- LOG.info("testAddAndRemoveClusters");
- admin.removePeer(PEER_ID);
- Thread.sleep(SLEEP_TIME);
- byte[] rowKey = Bytes.toBytes("Won't be replicated");
- Put put = new Put(rowKey);
- put.addColumn(famName, row, row);
- htable1.put(put);
-
- Get get = new Get(rowKey);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i == NB_RETRIES-1) {
- break;
- }
- Result res = htable2.get(get);
- if (res.size() >= 1) {
- fail("Not supposed to be replicated");
- } else {
- LOG.info("Row not replicated, let's wait a bit more...");
- Thread.sleep(SLEEP_TIME);
- }
- }
- ReplicationPeerConfig rpc = new ReplicationPeerConfig();
- rpc.setClusterKey(utility2.getClusterKey());
- admin.addPeer(PEER_ID, rpc, null);
- Thread.sleep(SLEEP_TIME);
- rowKey = Bytes.toBytes("do rep");
- put = new Put(rowKey);
- put.addColumn(famName, row, row);
- LOG.info("Adding new row");
- htable1.put(put);
-
- get = new Get(rowKey);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for put replication");
- }
- Result res = htable2.get(get);
- if (res.isEmpty()) {
- LOG.info("Row not available");
- Thread.sleep(SLEEP_TIME*i);
- } else {
- assertArrayEquals(res.value(), row);
- break;
- }
- }
- }
-
-
- /**
- * Do a more intense version testSmallBatch, one that will trigger
- * wal rolling and other non-trivial code paths
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testLoading() throws Exception {
- LOG.info("Writing out rows to table1 in testLoading");
- List<Put> puts = new ArrayList<>(NB_ROWS_IN_BIG_BATCH);
- for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
- Put put = new Put(Bytes.toBytes(i));
- put.addColumn(famName, row, row);
- puts.add(put);
- }
- // The puts will be iterated through and flushed only when the buffer
- // size is reached.
- htable1.put(puts);
-
- Scan scan = new Scan();
-
- ResultScanner scanner = htable1.getScanner(scan);
- Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
- scanner.close();
-
- assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
-
- LOG.info("Looking in table2 for replicated rows in testLoading");
- long start = System.currentTimeMillis();
- // Retry more than NB_RETRIES. As it was, retries were done in 5 seconds and we'd fail
- // sometimes.
- final long retries = NB_RETRIES * 10;
- for (int i = 0; i < retries; i++) {
- scan = new Scan();
- scanner = htable2.getScanner(scan);
- res = scanner.next(NB_ROWS_IN_BIG_BATCH);
- scanner.close();
- if (res.length != NB_ROWS_IN_BIG_BATCH) {
- if (i == retries - 1) {
- int lastRow = -1;
- for (Result result : res) {
- int currentRow = Bytes.toInt(result.getRow());
- for (int row = lastRow+1; row < currentRow; row++) {
- LOG.error("Row missing: " + row);
- }
- lastRow = currentRow;
- }
- LOG.error("Last row: " + lastRow);
- fail("Waited too much time for normal batch replication, " +
- res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
- (System.currentTimeMillis() - start) + "ms");
- } else {
- LOG.info("Only got " + res.length + " rows... retrying");
- Thread.sleep(SLEEP_TIME);
- }
- } else {
- break;
- }
- }
- }
-
- /**
- * Do a small loading into a table, make sure the data is really the same,
- * then run the VerifyReplication job to check the results. Do a second
- * comparison where all the cells are different.
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testVerifyRepJob() throws Exception {
- // Populate the tables, at the same time it guarantees that the tables are
- // identical since it does the check
- testSmallBatch();
-
- String[] args = new String[] {PEER_ID, tableName.getNameAsString()};
- runVerifyReplication(args, NB_ROWS_IN_BATCH, 0);
-
- Scan scan = new Scan();
- ResultScanner rs = htable2.getScanner(scan);
- Put put = null;
- for (Result result : rs) {
- put = new Put(result.getRow());
- Cell firstVal = result.rawCells()[0];
- put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
- Bytes.toBytes("diff data"));
- htable2.put(put);
- }
- Delete delete = new Delete(put.getRow());
- htable2.delete(delete);
- runVerifyReplication(args, 0, NB_ROWS_IN_BATCH);
- }
-
- /**
- * Load a row into a table, make sure the data is really the same,
- * delete the row, make sure the delete marker is replicated,
- * run verify replication with and without raw to check the results.
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testVerifyRepJobWithRawOptions() throws Exception {
- LOG.info(name.getMethodName());
-
- final TableName tableName = TableName.valueOf(name.getMethodName());
- byte[] familyname = Bytes.toBytes("fam_raw");
- byte[] row = Bytes.toBytes("row_raw");
-
- Table lHtable1 = null;
- Table lHtable2 = null;
-
- try {
- HTableDescriptor table = new HTableDescriptor(tableName);
- HColumnDescriptor fam = new HColumnDescriptor(familyname);
- fam.setMaxVersions(100);
- fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
- table.addFamily(fam);
- scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (HColumnDescriptor f : table.getColumnFamilies()) {
- scopes.put(f.getName(), f.getScope());
- }
-
- Connection connection1 = ConnectionFactory.createConnection(conf1);
- Connection connection2 = ConnectionFactory.createConnection(conf2);
- try (Admin admin1 = connection1.getAdmin()) {
- admin1.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
- }
- try (Admin admin2 = connection2.getAdmin()) {
- admin2.createTable(table, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
- }
- utility1.waitUntilAllRegionsAssigned(tableName);
- utility2.waitUntilAllRegionsAssigned(tableName);
-
- lHtable1 = utility1.getConnection().getTable(tableName);
- lHtable2 = utility2.getConnection().getTable(tableName);
-
- Put put = new Put(row);
- put.addColumn(familyname, row, row);
- lHtable1.put(put);
-
- Get get = new Get(row);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for put replication");
- }
- Result res = lHtable2.get(get);
- if (res.isEmpty()) {
- LOG.info("Row not available");
- Thread.sleep(SLEEP_TIME);
- } else {
- assertArrayEquals(res.value(), row);
- break;
- }
- }
-
- Delete del = new Delete(row);
- lHtable1.delete(del);
-
- get = new Get(row);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i==NB_RETRIES-1) {
- fail("Waited too much time for del replication");
- }
- Result res = lHtable2.get(get);
- if (res.size() >= 1) {
- LOG.info("Row not deleted");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
-
- // Checking verifyReplication for the default behavior.
- String[] argsWithoutRaw = new String[] {PEER_ID, tableName.getNameAsString()};
- runVerifyReplication(argsWithoutRaw, 0, 0);
-
- // Checking verifyReplication with raw
- String[] argsWithRawAsTrue = new String[] {"--raw", PEER_ID, tableName.getNameAsString()};
- runVerifyReplication(argsWithRawAsTrue, 1, 0);
- } finally {
- if (lHtable1 != null) {
- lHtable1.close();
- }
- if (lHtable2 != null) {
- lHtable2.close();
- }
- }
- }
-
- private void runVerifyReplication(String[] args, int expectedGoodRows, int expectedBadRows)
- throws IOException, InterruptedException, ClassNotFoundException {
- Job job = new VerifyReplication().createSubmittableJob(new Configuration(conf1), args);
- if (job == null) {
- fail("Job wasn't created, see the log");
- }
- if (!job.waitForCompletion(true)) {
- fail("Job failed, see the log");
- }
- assertEquals(expectedGoodRows, job.getCounters().
- findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
- assertEquals(expectedBadRows, job.getCounters().
- findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
- }
-
- @Test(timeout=300000)
- // VerifyReplication should honor versions option
- public void testHBase14905() throws Exception {
- // normal Batch tests
- byte[] qualifierName = Bytes.toBytes("f1");
- Put put = new Put(Bytes.toBytes("r1"));
- put.addColumn(famName, qualifierName, Bytes.toBytes("v1002"));
- htable1.put(put);
- put.addColumn(famName, qualifierName, Bytes.toBytes("v1001"));
- htable1.put(put);
- put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
- htable1.put(put);
-
- Scan scan = new Scan();
- scan.setMaxVersions(100);
- ResultScanner scanner1 = htable1.getScanner(scan);
- Result[] res1 = scanner1.next(1);
- scanner1.close();
-
- assertEquals(1, res1.length);
- assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-
- for (int i = 0; i < NB_RETRIES; i++) {
- scan = new Scan();
- scan.setMaxVersions(100);
- scanner1 = htable2.getScanner(scan);
- res1 = scanner1.next(1);
- scanner1.close();
- if (res1.length != 1) {
- LOG.info("Only got " + res1.length + " rows");
- Thread.sleep(SLEEP_TIME);
- } else {
- int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
- if (cellNumber != 3) {
- LOG.info("Only got " + cellNumber + " cells");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- if (i == NB_RETRIES-1) {
- fail("Waited too much time for normal batch replication");
- }
- }
-
- put.addColumn(famName, qualifierName, Bytes.toBytes("v1111"));
- htable2.put(put);
- put.addColumn(famName, qualifierName, Bytes.toBytes("v1112"));
- htable2.put(put);
-
- scan = new Scan();
- scan.setMaxVersions(100);
- scanner1 = htable2.getScanner(scan);
- res1 = scanner1.next(NB_ROWS_IN_BATCH);
- scanner1.close();
-
- assertEquals(1, res1.length);
- assertEquals(5, res1[0].getColumnCells(famName, qualifierName).size());
-
- String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
- runVerifyReplication(args, 0, 1);
- }
-
- @Test(timeout=300000)
- // VerifyReplication should honor versions option
- public void testVersionMismatchHBase14905() throws Exception {
- // normal Batch tests
- byte[] qualifierName = Bytes.toBytes("f1");
- Put put = new Put(Bytes.toBytes("r1"));
- long ts = System.currentTimeMillis();
- put.addColumn(famName, qualifierName, ts + 1, Bytes.toBytes("v1"));
- htable1.put(put);
- put.addColumn(famName, qualifierName, ts + 2, Bytes.toBytes("v2"));
- htable1.put(put);
- put.addColumn(famName, qualifierName, ts + 3, Bytes.toBytes("v3"));
- htable1.put(put);
-
- Scan scan = new Scan();
- scan.setMaxVersions(100);
- ResultScanner scanner1 = htable1.getScanner(scan);
- Result[] res1 = scanner1.next(1);
- scanner1.close();
-
- assertEquals(1, res1.length);
- assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-
- for (int i = 0; i < NB_RETRIES; i++) {
- scan = new Scan();
- scan.setMaxVersions(100);
- scanner1 = htable2.getScanner(scan);
- res1 = scanner1.next(1);
- scanner1.close();
- if (res1.length != 1) {
- LOG.info("Only got " + res1.length + " rows");
- Thread.sleep(SLEEP_TIME);
- } else {
- int cellNumber = res1[0].getColumnCells(famName, Bytes.toBytes("f1")).size();
- if (cellNumber != 3) {
- LOG.info("Only got " + cellNumber + " cells");
- Thread.sleep(SLEEP_TIME);
- } else {
- break;
- }
- }
- if (i == NB_RETRIES-1) {
- fail("Waited too much time for normal batch replication");
- }
- }
-
- try {
- // Disabling replication and modifying the particular version of the cell to validate the feature.
- admin.disablePeer(PEER_ID);
- Put put2 = new Put(Bytes.toBytes("r1"));
- put2.addColumn(famName, qualifierName, ts +2, Bytes.toBytes("v99"));
- htable2.put(put2);
-
- scan = new Scan();
- scan.setMaxVersions(100);
- scanner1 = htable2.getScanner(scan);
- res1 = scanner1.next(NB_ROWS_IN_BATCH);
- scanner1.close();
- assertEquals(1, res1.length);
- assertEquals(3, res1[0].getColumnCells(famName, qualifierName).size());
-
- String[] args = new String[] {"--versions=100", PEER_ID, tableName.getNameAsString()};
- runVerifyReplication(args, 0, 1);
- }
- finally {
- admin.enablePeer(PEER_ID);
- }
- }
-
- /**
- * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
- * the compaction WALEdit
- * @throws Exception
- */
- @Test(timeout=300000)
- public void testCompactionWALEdits() throws Exception {
- WALProtos.CompactionDescriptor compactionDescriptor =
- WALProtos.CompactionDescriptor.getDefaultInstance();
- HRegionInfo hri = new HRegionInfo(htable1.getName(),
- HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
- WALEdit edit = WALEdit.createCompaction(hri, compactionDescriptor);
- Replication.scopeWALEdits(new WALKey(), edit,
- htable1.getConfiguration(), null);
- }
-
- /**
- * Test for HBASE-8663
- * Create two new Tables with colfamilies enabled for replication then run
- * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
- * TestReplicationAdmin is a better place for this testing but it would need mocks.
- * @throws Exception
- */
- @Test(timeout = 300000)
- public void testVerifyListReplicatedTable() throws Exception {
- LOG.info("testVerifyListReplicatedTable");
-
- final String tName = "VerifyListReplicated_";
- final String colFam = "cf1";
- final int numOfTables = 3;
-
- Admin hadmin = utility1.getAdmin();
-
- // Create Tables
- for (int i = 0; i < numOfTables; i++) {
- HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
- HColumnDescriptor cfd = new HColumnDescriptor(colFam);
- cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
- ht.addFamily(cfd);
- hadmin.createTable(ht);
- }
-
- // verify the result
- List<HashMap<String, String>> replicationColFams = admin.listReplicated();
- int[] match = new int[numOfTables]; // array of 3 with init value of zero
-
- for (int i = 0; i < replicationColFams.size(); i++) {
- HashMap<String, String> replicationEntry = replicationColFams.get(i);
- String tn = replicationEntry.get(ReplicationAdmin.TNAME);
- if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
- int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
- match[m]++; // should only increase once
- }
- }
-
- // check the matching result
- for (int i = 0; i < match.length; i++) {
- assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
- }
-
- // drop tables
- for (int i = 0; i < numOfTables; i++) {
- TableName tableName = TableName.valueOf(tName + i);
- hadmin.disableTable(tableName);
- hadmin.deleteTable(tableName);
- }
-
- hadmin.close();
- }
-
- /**
- * Test for HBase-15259 WALEdits under replay will also be replicated
- * */
- @Test
- public void testReplicationInReplay() throws Exception {
- final TableName tableName = htable1.getName();
-
- HRegion region = utility1.getMiniHBaseCluster().getRegions(tableName).get(0);
- HRegionInfo hri = region.getRegionInfo();
- NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (byte[] fam : htable1.getTableDescriptor().getFamiliesKeys()) {
- scopes.put(fam, 1);
- }
- final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
- int index = utility1.getMiniHBaseCluster().getServerWith(hri.getRegionName());
- WAL wal = utility1.getMiniHBaseCluster().getRegionServer(index).getWAL(region.getRegionInfo());
- final byte[] rowName = Bytes.toBytes("testReplicationInReplay");
- final byte[] qualifier = Bytes.toBytes("q");
- final byte[] value = Bytes.toBytes("v");
- WALEdit edit = new WALEdit(true);
- long now = EnvironmentEdgeManager.currentTime();
- edit.add(new KeyValue(rowName, famName, qualifier,
- now, value));
- WALKey walKey = new WALKey(hri.getEncodedNameAsBytes(), tableName, now, mvcc, scopes);
- wal.append(hri, walKey, edit, true);
- wal.sync();
-
- Get get = new Get(rowName);
- for (int i = 0; i < NB_RETRIES; i++) {
- if (i == NB_RETRIES-1) {
- break;
- }
- Result res = htable2.get(get);
- if (res.size() >= 1) {
- fail("Not supposed to be replicated for " + Bytes.toString(res.getRow()));
- } else {
- LOG.info("Row not replicated, let's wait a bit more...");
- Thread.sleep(SLEEP_TIME);
- }
- }
- }
-
- @Test(timeout=300000)
- public void testVerifyReplicationPrefixFiltering() throws Exception {
- final byte[] prefixRow = Bytes.toBytes("prefixrow");
- final byte[] prefixRow2 = Bytes.toBytes("secondrow");
- loadData("prefixrow", prefixRow);
- loadData("secondrow", prefixRow2);
- loadData("aaa", row);
- loadData("zzz", row);
- waitForReplication(NB_ROWS_IN_BATCH * 4, NB_RETRIES * 4);
- String[] args = new String[] {"--row-prefixes=prefixrow,secondrow", PEER_ID,
- tableName.getNameAsString()};
- runVerifyReplication(args, NB_ROWS_IN_BATCH *2, 0);
- }
-
- @Test(timeout = 300000)
- public void testVerifyReplicationSnapshotArguments() {
- String[] args =
- new String[] { "--sourceSnapshotName=snapshot1", "2", tableName.getNameAsString() };
- assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--sourceSnapshotTmpDir=tmp", "2", tableName.getNameAsString() };
- assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=tmp", "2",
- tableName.getNameAsString() };
- assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--peerSnapshotName=snapshot1", "2", tableName.getNameAsString() };
- assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--peerSnapshotTmpDir=/tmp/", "2", tableName.getNameAsString() };
- assertFalse(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--peerSnapshotName=snapshot1", "--peerSnapshotTmpDir=/tmp/",
- "--peerFSAddress=tempfs", "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2",
- tableName.getNameAsString() };
- assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
-
- args = new String[] { "--sourceSnapshotName=snapshot1", "--sourceSnapshotTmpDir=/tmp/",
- "--peerSnapshotName=snapshot2", "--peerSnapshotTmpDir=/tmp/", "--peerFSAddress=tempfs",
- "--peerHBaseRootAddress=hdfs://tempfs:50070/hbase/", "2", tableName.getNameAsString() };
-
- assertTrue(Lists.newArrayList(args).toString(), new VerifyReplication().doCommandLine(args));
- }
-
- @Test(timeout = 300000)
- public void testVerifyReplicationWithSnapshotSupport() throws Exception {
- // Populate the tables, at the same time it guarantees that the tables are
- // identical since it does the check
- testSmallBatch();
-
- // Take source and target tables snapshot
- Path rootDir = FSUtils.getRootDir(conf1);
- FileSystem fs = rootDir.getFileSystem(conf1);
- String sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
- SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
- new String(famName), sourceSnapshotName, rootDir, fs, true);
-
- // Take target snapshot
- Path peerRootDir = FSUtils.getRootDir(conf2);
- FileSystem peerFs = peerRootDir.getFileSystem(conf2);
- String peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
- SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
- new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
-
- String peerFSAddress = peerFs.getUri().toString();
- String temPath1 = utility1.getRandomDir().toString();
- String temPath2 = "/tmp2";
-
- String[] args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
- "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
- "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
- "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
-
- Job job = new VerifyReplication().createSubmittableJob(conf1, args);
- if (job == null) {
- fail("Job wasn't created, see the log");
- }
- if (!job.waitForCompletion(true)) {
- fail("Job failed, see the log");
- }
- assertEquals(NB_ROWS_IN_BATCH,
- job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
- assertEquals(0,
- job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
-
- Scan scan = new Scan();
- ResultScanner rs = htable2.getScanner(scan);
- Put put = null;
- for (Result result : rs) {
- put = new Put(result.getRow());
- Cell firstVal = result.rawCells()[0];
- put.addColumn(CellUtil.cloneFamily(firstVal), CellUtil.cloneQualifier(firstVal),
- Bytes.toBytes("diff data"));
- htable2.put(put);
- }
- Delete delete = new Delete(put.getRow());
- htable2.delete(delete);
-
- sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
- SnapshotTestingUtils.createSnapshotAndValidate(utility1.getHBaseAdmin(), tableName,
- new String(famName), sourceSnapshotName, rootDir, fs, true);
-
- peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
- SnapshotTestingUtils.createSnapshotAndValidate(utility2.getHBaseAdmin(), tableName,
- new String(famName), peerSnapshotName, peerRootDir, peerFs, true);
-
- args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
- "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
- "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
- "--peerHBaseRootAddress=" + FSUtils.getRootDir(conf2), "2", tableName.getNameAsString() };
-
- job = new VerifyReplication().createSubmittableJob(conf1, args);
- if (job == null) {
- fail("Job wasn't created, see the log");
- }
- if (!job.waitForCompletion(true)) {
- fail("Job failed, see the log");
- }
- assertEquals(0,
- job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
- assertEquals(NB_ROWS_IN_BATCH,
- job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
- }
-
- @Test
- public void testEmptyWALRecovery() throws Exception {
- final int numRs = utility1.getHBaseCluster().getRegionServerThreads().size();
-
- // for each RS, create an empty wal with same walGroupId
- final List<Path> emptyWalPaths = new ArrayList<>();
- long ts = System.currentTimeMillis();
- for (int i = 0; i < numRs; i++) {
- HRegionInfo regionInfo =
- utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
- WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
- Path currentWalPath = AbstractFSWALProvider.getCurrentFileName(wal);
- String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(currentWalPath.getName());
- Path emptyWalPath = new Path(utility1.getDataTestDir(), walGroupId + "." + ts);
- utility1.getTestFileSystem().create(emptyWalPath).close();
- emptyWalPaths.add(emptyWalPath);
- }
-
- // inject our empty wal into the replication queue
- for (int i = 0; i < numRs; i++) {
- Replication replicationService =
- (Replication) utility1.getHBaseCluster().getRegionServer(i).getReplicationSourceService();
- replicationService.preLogRoll(null, emptyWalPaths.get(i));
- replicationService.postLogRoll(null, emptyWalPaths.get(i));
- }
-
- // wait for ReplicationSource to start reading from our empty wal
- waitForLogAdvance(numRs, emptyWalPaths, false);
-
- // roll the original wal, which enqueues a new wal behind our empty wal
- for (int i = 0; i < numRs; i++) {
- HRegionInfo regionInfo =
- utility1.getHBaseCluster().getRegions(htable1.getName()).get(0).getRegionInfo();
- WAL wal = utility1.getHBaseCluster().getRegionServer(i).getWAL(regionInfo);
- wal.rollWriter(true);
- }
-
- // ReplicationSource should advance past the empty wal, or else the test will fail
- waitForLogAdvance(numRs, emptyWalPaths, true);
-
- // we're now writing to the new wal
- // if everything works, the source should've stopped reading from the empty wal, and start
- // replicating from the new wal
- testSimplePutDelete();
- }
-
- /**
- * Waits for the ReplicationSource to start reading from the given paths
- * @param numRs number of regionservers
- * @param emptyWalPaths path for each regionserver
- * @param invert if true, waits until ReplicationSource is NOT reading from the given paths
- */
- private void waitForLogAdvance(final int numRs, final List<Path> emptyWalPaths,
- final boolean invert) throws Exception {
- Waiter.waitFor(conf1, 10000, new Waiter.Predicate<Exception>() {
- @Override
- public boolean evaluate() throws Exception {
- for (int i = 0; i < numRs; i++) {
- Replication replicationService = (Replication) utility1.getHBaseCluster()
- .getRegionServer(i).getReplicationSourceService();
- for (ReplicationSourceInterface rsi : replicationService.getReplicationManager()
- .getSources()) {
- ReplicationSource source = (ReplicationSource) rsi;
- if (!invert && !emptyWalPaths.get(i).equals(source.getCurrentPath())) {
- return false;
- }
- if (invert && emptyWalPaths.get(i).equals(source.getCurrentPath())) {
- return false;
- }
- }
- }
- return true;
- }
- });
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
index 07bb2b7..94991e1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/HBaseKerberosUtils.java
@@ -17,15 +17,22 @@
*/
package org.apache.hadoop.hbase.security;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
-
import org.apache.hadoop.hbase.shaded.com.google.common.base.Strings;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import java.io.IOException;
+import java.net.InetAddress;
@InterfaceAudience.Private
public class HBaseKerberosUtils {
+ private static final Log LOG = LogFactory.getLog(HBaseKerberosUtils.class);
+
public static final String KRB_PRINCIPAL = "hbase.regionserver.kerberos.principal";
public static final String MASTER_KRB_PRINCIPAL = "hbase.master.kerberos.principal";
public static final String KRB_KEYTAB_FILE = "hbase.regionserver.keytab.file";
@@ -81,4 +88,21 @@ public class HBaseKerberosUtils {
conf.set(KRB_PRINCIPAL, System.getProperty(KRB_PRINCIPAL));
conf.set(MASTER_KRB_PRINCIPAL, System.getProperty(KRB_PRINCIPAL));
}
+
+ public static UserGroupInformation loginAndReturnUGI(Configuration conf, String username)
+ throws IOException {
+ String hostname = InetAddress.getLocalHost().getHostName();
+ String keyTabFileConfKey = "hbase." + username + ".keytab.file";
+ String keyTabFileLocation = conf.get(keyTabFileConfKey);
+ String principalConfKey = "hbase." + username + ".kerberos.principal";
+ String principal = org.apache.hadoop.security.SecurityUtil
+ .getServerPrincipal(conf.get(principalConfKey), hostname);
+ if (keyTabFileLocation == null || principal == null) {
+ LOG.warn("Principal or key tab file null for : " + principalConfKey + ", "
+ + keyTabFileConfKey);
+ }
+ UserGroupInformation ugi =
+ UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTabFileLocation);
+ return ugi;
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
deleted file mode 100644
index 2e3cb5e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++ /dev/null
@@ -1,381 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import static org.apache.hadoop.util.ToolRunner.run;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-/**
- * Test Export Snapshot Tool
- */
-@Ignore
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestExportSnapshot {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private static final Log LOG = LogFactory.getLog(TestExportSnapshot.class);
-
- protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
- protected final static byte[] FAMILY = Bytes.toBytes("cf");
-
- @Rule
- public final TestName testName = new TestName();
-
- protected TableName tableName;
- private byte[] emptySnapshotName;
- private byte[] snapshotName;
- private int tableNumFiles;
- private Admin admin;
-
- public static void setUpBaseConf(Configuration conf) {
- conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
- conf.setInt("hbase.regionserver.msginterval", 100);
- conf.setInt("hbase.client.pause", 250);
- conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
- conf.setBoolean("hbase.master.enabletable.roundrobin", true);
- conf.setInt("mapreduce.map.maxattempts", 10);
- // If a single node has enough failures (default 3), resource manager will blacklist it.
- // With only 2 nodes and tests injecting faults, we don't want that.
- conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
- }
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- setUpBaseConf(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(1, 3);
- TEST_UTIL.startMiniMapReduceCluster();
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- TEST_UTIL.shutdownMiniMapReduceCluster();
- TEST_UTIL.shutdownMiniCluster();
- }
-
- /**
- * Create a table and take a snapshot of the table used by the export test.
- */
- @Before
- public void setUp() throws Exception {
- this.admin = TEST_UTIL.getAdmin();
-
- tableName = TableName.valueOf("testtb-" + testName.getMethodName());
- snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
- emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
-
- // create Table
- createTable();
-
- // Take an empty snapshot
- admin.snapshot(emptySnapshotName, tableName);
-
- // Add some rows
- SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 50, FAMILY);
- tableNumFiles = admin.getTableRegions(tableName).size();
-
- // take a snapshot
- admin.snapshot(snapshotName, tableName);
- }
-
- protected void createTable() throws Exception {
- SnapshotTestingUtils.createPreSplitTable(TEST_UTIL, tableName, 2, FAMILY);
- }
-
- protected interface RegionPredicate {
- boolean evaluate(final HRegionInfo regionInfo);
- }
-
- protected RegionPredicate getBypassRegionPredicate() {
- return null;
- }
-
- @After
- public void tearDown() throws Exception {
- TEST_UTIL.deleteTable(tableName);
- SnapshotTestingUtils.deleteAllSnapshots(TEST_UTIL.getAdmin());
- SnapshotTestingUtils.deleteArchiveDirectory(TEST_UTIL);
- }
-
- /**
- * Verify if exported snapshot and copied files matches the original one.
- */
- @Test
- public void testExportFileSystemState() throws Exception {
- testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
- }
-
- @Test
- public void testExportFileSystemStateWithSkipTmp() throws Exception {
- TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);
- try {
- testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles);
- } finally {
- TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, false);
- }
- }
-
- @Test
- public void testEmptyExportFileSystemState() throws Exception {
- testExportFileSystemState(tableName, emptySnapshotName, emptySnapshotName, 0);
- }
-
- @Test
- public void testConsecutiveExports() throws Exception {
- Path copyDir = getLocalDestinationDir();
- testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, false);
- testExportFileSystemState(tableName, snapshotName, snapshotName, tableNumFiles, copyDir, true);
- removeExportDir(copyDir);
- }
-
- @Test
- public void testExportWithTargetName() throws Exception {
- final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
- testExportFileSystemState(tableName, snapshotName, targetName, tableNumFiles);
- }
-
- private void testExportFileSystemState(final TableName tableName, final byte[] snapshotName,
- final byte[] targetName, int filesExpected) throws Exception {
- testExportFileSystemState(tableName, snapshotName, targetName,
- filesExpected, getHdfsDestinationDir(), false);
- }
-
- protected void testExportFileSystemState(final TableName tableName,
- final byte[] snapshotName, final byte[] targetName, int filesExpected,
- Path copyDir, boolean overwrite) throws Exception {
- testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
- filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
- overwrite, getBypassRegionPredicate(), true);
- }
-
- /**
- * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
- */
- protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
- final byte[] snapshotName, final byte[] targetName, final int filesExpected,
- final Path sourceDir, Path copyDir, final boolean overwrite,
- final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
- URI hdfsUri = FileSystem.get(conf).getUri();
- FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
- copyDir = copyDir.makeQualified(fs);
-
- List<String> opts = new ArrayList<>();
- opts.add("--snapshot");
- opts.add(Bytes.toString(snapshotName));
- opts.add("--copy-to");
- opts.add(copyDir.toString());
- if (targetName != snapshotName) {
- opts.add("--target");
- opts.add(Bytes.toString(targetName));
- }
- if (overwrite) opts.add("--overwrite");
-
- // Export Snapshot
- int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
- assertEquals(success ? 0 : 1, res);
- if (!success) {
- final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
- assertFalse(fs.exists(new Path(copyDir, targetDir)));
- return;
- }
-
- // Verify File-System state
- FileStatus[] rootFiles = fs.listStatus(copyDir);
- assertEquals(filesExpected > 0 ? 2 : 1, rootFiles.length);
- for (FileStatus fileStatus: rootFiles) {
- String name = fileStatus.getPath().getName();
- assertTrue(fileStatus.isDirectory());
- assertTrue(name.equals(HConstants.SNAPSHOT_DIR_NAME) ||
- name.equals(HConstants.HFILE_ARCHIVE_DIRECTORY));
- }
-
- // compare the snapshot metadata and verify the hfiles
- final FileSystem hdfs = FileSystem.get(hdfsUri, conf);
- final Path snapshotDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(snapshotName));
- final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
- verifySnapshotDir(hdfs, new Path(sourceDir, snapshotDir),
- fs, new Path(copyDir, targetDir));
- Set<String> snapshotFiles = verifySnapshot(conf, fs, copyDir, tableName,
- Bytes.toString(targetName), bypassregionPredicate);
- assertEquals(filesExpected, snapshotFiles.size());
- }
-
- /**
- * Check that ExportSnapshot will succeed if something fails but the retry succeed.
- */
- @Test
- public void testExportRetry() throws Exception {
- Path copyDir = getLocalDestinationDir();
- FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
- copyDir = copyDir.makeQualified(fs);
- Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
- conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
- conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
- conf.setInt("mapreduce.map.maxattempts", 3);
- testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
- TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
- }
-
- /**
- * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
- */
- @Test
- public void testExportFailure() throws Exception {
- Path copyDir = getLocalDestinationDir();
- FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
- copyDir = copyDir.makeQualified(fs);
- Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
- conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
- conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
- conf.setInt("mapreduce.map.maxattempts", 3);
- testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
- TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
- }
-
- /*
- * verify if the snapshot folder on file-system 1 match the one on file-system 2
- */
- protected static void verifySnapshotDir(final FileSystem fs1, final Path root1,
- final FileSystem fs2, final Path root2) throws IOException {
- assertEquals(listFiles(fs1, root1, root1), listFiles(fs2, root2, root2));
- }
-
- protected Set<String> verifySnapshot(final FileSystem fs, final Path rootDir,
- final TableName tableName, final String snapshotName) throws IOException {
- return verifySnapshot(TEST_UTIL.getConfiguration(), fs, rootDir, tableName,
- snapshotName, getBypassRegionPredicate());
- }
-
- /*
- * Verify if the files exists
- */
- protected static Set<String> verifySnapshot(final Configuration conf, final FileSystem fs,
- final Path rootDir, final TableName tableName, final String snapshotName,
- final RegionPredicate bypassregionPredicate) throws IOException {
- final Path exportedSnapshot = new Path(rootDir,
- new Path(HConstants.SNAPSHOT_DIR_NAME, snapshotName));
- final Set<String> snapshotFiles = new HashSet<>();
- final Path exportedArchive = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
- SnapshotReferenceUtil.visitReferencedFiles(conf, fs, exportedSnapshot,
- new SnapshotReferenceUtil.SnapshotVisitor() {
- @Override
- public void storeFile(final HRegionInfo regionInfo, final String family,
- final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
- if (bypassregionPredicate != null && bypassregionPredicate.evaluate(regionInfo))
- return;
-
- String hfile = storeFile.getName();
- snapshotFiles.add(hfile);
- if (storeFile.hasReference()) {
- // Nothing to do here, we have already the reference embedded
- } else {
- verifyNonEmptyFile(new Path(exportedArchive,
- new Path(FSUtils.getTableDir(new Path("./"), tableName),
- new Path(regionInfo.getEncodedName(), new Path(family, hfile)))));
- }
- }
-
- private void verifyNonEmptyFile(final Path path) throws IOException {
- assertTrue(path + " should exists", fs.exists(path));
- assertTrue(path + " should not be empty", fs.getFileStatus(path).getLen() > 0);
- }
- });
-
- // Verify Snapshot description
- SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, exportedSnapshot);
- assertTrue(desc.getName().equals(snapshotName));
- assertTrue(desc.getTable().equals(tableName.getNameAsString()));
- return snapshotFiles;
- }
-
- private static Set<String> listFiles(final FileSystem fs, final Path root, final Path dir)
- throws IOException {
- Set<String> files = new HashSet<>();
- int rootPrefix = root.makeQualified(fs).toString().length();
- FileStatus[] list = FSUtils.listStatus(fs, dir);
- if (list != null) {
- for (FileStatus fstat: list) {
- LOG.debug(fstat.getPath());
- if (fstat.isDirectory()) {
- files.addAll(listFiles(fs, root, fstat.getPath()));
- } else {
- files.add(fstat.getPath().makeQualified(fs).toString().substring(rootPrefix));
- }
- }
- }
- return files;
- }
-
- private Path getHdfsDestinationDir() {
- Path rootDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
- Path path = new Path(new Path(rootDir, "export-test"), "export-" + System.currentTimeMillis());
- LOG.info("HDFS export destination path: " + path);
- return path;
- }
-
- private Path getLocalDestinationDir() {
- Path path = TEST_UTIL.getDataTestDir("local-export-" + System.currentTimeMillis());
- LOG.info("Local export destination path: " + path);
- return path;
- }
-
- private static void removeExportDir(final Path path) throws IOException {
- FileSystem fs = FileSystem.get(path.toUri(), new Configuration());
- fs.delete(path, true);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
deleted file mode 100644
index e31e81e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotHelpers.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
-import org.apache.hadoop.hbase.testclassification.RegionServerTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Export Snapshot Tool helpers
- */
-@Category({RegionServerTests.class, SmallTests.class})
-public class TestExportSnapshotHelpers {
- /**
- * Verfy the result of getBalanceSplits() method.
- * The result are groups of files, used as input list for the "export" mappers.
- * All the groups should have similar amount of data.
- *
- * The input list is a pair of file path and length.
- * The getBalanceSplits() function sort it by length,
- * and assign to each group a file, going back and forth through the groups.
- */
- @Test
- public void testBalanceSplit() throws Exception {
- // Create a list of files
- List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<>(21);
- for (long i = 0; i <= 20; i++) {
- SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
- .setType(SnapshotFileInfo.Type.HFILE)
- .setHfile("file-" + i)
- .build();
- files.add(new Pair<>(fileInfo, i));
- }
-
- // Create 5 groups (total size 210)
- // group 0: 20, 11, 10, 1 (total size: 42)
- // group 1: 19, 12, 9, 2 (total size: 42)
- // group 2: 18, 13, 8, 3 (total size: 42)
- // group 3: 17, 12, 7, 4 (total size: 42)
- // group 4: 16, 11, 6, 5 (total size: 42)
- List<List<Pair<SnapshotFileInfo, Long>>> splits = ExportSnapshot.getBalancedSplits(files, 5);
- assertEquals(5, splits.size());
-
- String[] split0 = new String[] {"file-20", "file-11", "file-10", "file-1", "file-0"};
- verifyBalanceSplit(splits.get(0), split0, 42);
- String[] split1 = new String[] {"file-19", "file-12", "file-9", "file-2"};
- verifyBalanceSplit(splits.get(1), split1, 42);
- String[] split2 = new String[] {"file-18", "file-13", "file-8", "file-3"};
- verifyBalanceSplit(splits.get(2), split2, 42);
- String[] split3 = new String[] {"file-17", "file-14", "file-7", "file-4"};
- verifyBalanceSplit(splits.get(3), split3, 42);
- String[] split4 = new String[] {"file-16", "file-15", "file-6", "file-5"};
- verifyBalanceSplit(splits.get(4), split4, 42);
- }
-
- private void verifyBalanceSplit(final List<Pair<SnapshotFileInfo, Long>> split,
- final String[] expected, final long expectedSize) {
- assertEquals(expected.length, split.size());
- long totalSize = 0;
- for (int i = 0; i < expected.length; ++i) {
- Pair<SnapshotFileInfo, Long> fileInfo = split.get(i);
- assertEquals(expected[i], fileInfo.getFirst().getHfile());
- totalSize += fileInfo.getSecond();
- }
- assertEquals(expectedSize, totalSize);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
deleted file mode 100644
index 00778502..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils.SnapshotMock;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Test Export Snapshot Tool
- */
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestExportSnapshotNoCluster {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private static final Log LOG = LogFactory.getLog(TestExportSnapshotNoCluster.class);
-
- protected final static HBaseCommonTestingUtility TEST_UTIL = new HBaseCommonTestingUtility();
-
- private static FileSystem fs;
- private static Path testDir;
-
- public static void setUpBaseConf(Configuration conf) {
- conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
- conf.setInt("hbase.regionserver.msginterval", 100);
- conf.setInt("hbase.client.pause", 250);
- conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
- conf.setBoolean("hbase.master.enabletable.roundrobin", true);
- conf.setInt("mapreduce.map.maxattempts", 10);
- conf.set(HConstants.HBASE_DIR, testDir.toString());
- }
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- testDir = TEST_UTIL.getDataTestDir();
- fs = testDir.getFileSystem(TEST_UTIL.getConfiguration());
-
- setUpBaseConf(TEST_UTIL.getConfiguration());
- }
-
- /**
- * Mock a snapshot with files in the archive dir,
- * two regions, and one reference file.
- */
- @Test
- public void testSnapshotWithRefsExportFileSystemState() throws Exception {
- SnapshotMock snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
- SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2("tableWithRefsV1",
- "tableWithRefsV1");
- testSnapshotWithRefsExportFileSystemState(builder);
-
- snapshotMock = new SnapshotMock(TEST_UTIL.getConfiguration(), fs, testDir);
- builder = snapshotMock.createSnapshotV2("tableWithRefsV2", "tableWithRefsV2");
- testSnapshotWithRefsExportFileSystemState(builder);
- }
-
- /**
- * Generates a couple of regions for the specified SnapshotMock,
- * and then it will run the export and verification.
- */
- private void testSnapshotWithRefsExportFileSystemState(SnapshotMock.SnapshotBuilder builder)
- throws Exception {
- Path[] r1Files = builder.addRegion();
- Path[] r2Files = builder.addRegion();
- builder.commit();
- int snapshotFilesCount = r1Files.length + r2Files.length;
-
- byte[] snapshotName = Bytes.toBytes(builder.getSnapshotDescription().getName());
- TableName tableName = builder.getTableDescriptor().getTableName();
- TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
- tableName, snapshotName, snapshotName, snapshotFilesCount,
- testDir, getDestinationDir(), false, null, true);
- }
-
- private Path getDestinationDir() {
- Path path = new Path(new Path(testDir, "export-test"), "export-" + System.currentTimeMillis());
- LOG.info("HDFS export destination path: " + path);
- return path;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
deleted file mode 100644
index 7407a7d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.snapshot;
-
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.mob.MobConstants;
-import org.apache.hadoop.hbase.mob.MobUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Export Snapshot Tool
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestMobExportSnapshot extends TestExportSnapshot {
-
- public static void setUpBaseConf(Configuration conf) {
- TestExportSnapshot.setUpBaseConf(conf);
- conf.setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0);
- }
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- setUpBaseConf(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(1, 3);
- TEST_UTIL.startMiniMapReduceCluster();
- }
-
- @Override
- protected void createTable() throws Exception {
- MobSnapshotTestingUtils.createPreSplitMobTable(TEST_UTIL, tableName, 2, FAMILY);
- }
-
- @Override
- protected RegionPredicate getBypassRegionPredicate() {
- return new RegionPredicate() {
- @Override
- public boolean evaluate(final HRegionInfo regionInfo) {
- return MobUtils.isMobRegionInfo(regionInfo);
- }
- };
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
deleted file mode 100644
index 98d03c0..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
-
-/**
- * Reruns TestMobExportSnapshot using MobExportSnapshot in secure mode.
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- setUpBaseConf(TEST_UTIL.getConfiguration());
- // Setup separate test-data directory for MR cluster and set corresponding configurations.
- // Otherwise, different test classes running MR cluster can step on each other.
- TEST_UTIL.getDataTestDir();
-
- // set the always on security provider
- UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
- HadoopSecurityEnabledUserProviderForTesting.class);
-
- // setup configuration
- SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
-
- TEST_UTIL.startMiniCluster(1, 3);
- TEST_UTIL.startMiniMapReduceCluster();
-
- // Wait for the ACL table to become available
- TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
deleted file mode 100644
index 7d4832c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.snapshot;
-
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.mapreduce.HadoopSecurityEnabledUserProviderForTesting;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.access.AccessControlLists;
-import org.apache.hadoop.hbase.security.access.SecureTestUtil;
-
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-
-/**
- * Reruns TestExportSnapshot using ExportSnapshot in secure mode.
- */
-@Ignore
-@Category({VerySlowRegionServerTests.class, LargeTests.class})
-public class TestSecureExportSnapshot extends TestExportSnapshot {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- setUpBaseConf(TEST_UTIL.getConfiguration());
- // Setup separate test-data directory for MR cluster and set corresponding configurations.
- // Otherwise, different test classes running MR cluster can step on each other.
- TEST_UTIL.getDataTestDir();
-
- // set the always on security provider
- UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
- HadoopSecurityEnabledUserProviderForTesting.class);
-
- // setup configuration
- SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
-
- TEST_UTIL.startMiniCluster(1, 3);
- TEST_UTIL.startMiniMapReduceCluster();
-
- // Wait for the ACL table to become available
- TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
index 236994a..0487bf4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/HFileTestUtil.java
@@ -22,6 +22,8 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.Locale;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -29,6 +31,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.TagType;
@@ -50,6 +53,17 @@ import org.apache.hadoop.hbase.regionserver.StoreFile;
*/
public class HFileTestUtil {
+ public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
+ "Encoding algorithm (e.g. prefix "
+ + "compression) to use for data blocks in the test column family, "
+ + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
+ public static final String OPT_DATA_BLOCK_ENCODING =
+ HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase(Locale.ROOT);
+ /** Column family used by the test */
+ public static byte[] DEFAULT_COLUMN_FAMILY = Bytes.toBytes("test_cf");
+ /** Column families used by the test */
+ public static final byte[][] DEFAULT_COLUMN_FAMILIES = { DEFAULT_COLUMN_FAMILY };
+
/**
* Create an HFile with the given number of rows between a given
* start key and end key @ family:qualifier. The value will be the key value.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
index 2ea01bb..0b3c612 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestDataGeneratorWithTags.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.MultiThreadedAction.DefaultDataGenerator;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
@InterfaceAudience.Private
public class LoadTestDataGeneratorWithTags extends DefaultDataGenerator {
@@ -74,7 +75,7 @@ public class LoadTestDataGeneratorWithTags extends DefaultDataGenerator {
List<Tag> tags;
for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
Cell cell = cellScanner.current();
- byte[] tag = LoadTestTool.generateData(random,
+ byte[] tag = LoadTestDataGenerator.generateData(random,
minTagLength + random.nextInt(maxTagLength - minTagLength));
tags = new ArrayList<>();
for (int n = 0; n < numTags; n++) {
[38/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
new file mode 100644
index 0000000..3c3060b
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
@@ -0,0 +1,140 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * This is used to partition the output keys into groups of keys.
+ * Keys are grouped according to the regions that currently exist
+ * so that each reducer fills a single region so load is distributed.
+ *
+ * <p>This class is not suitable as partitioner creating hfiles
+ * for incremental bulk loads as region spread will likely change between time of
+ * hfile creation and load time. See {@link LoadIncrementalHFiles}
+ * and <a href="http://hbase.apache.org/book.html#arch.bulk.load">Bulk Load</a>.</p>
+ *
+ * @param <KEY> The type of the key.
+ * @param <VALUE> The type of the value.
+ */
+@InterfaceAudience.Public
+public class HRegionPartitioner<KEY, VALUE>
+extends Partitioner<ImmutableBytesWritable, VALUE>
+implements Configurable {
+
+ private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
+ private Configuration conf = null;
+ // Connection and locator are not cleaned up; they just die when partitioner is done.
+ private Connection connection;
+ private RegionLocator locator;
+ private byte[][] startKeys;
+
+ /**
+ * Gets the partition number for a given key (hence record) given the total
+ * number of partitions i.e. number of reduce-tasks for the job.
+ *
+ * <p>Typically a hash function on a all or a subset of the key.</p>
+ *
+ * @param key The key to be partitioned.
+ * @param value The entry value.
+ * @param numPartitions The total number of partitions.
+ * @return The partition number for the <code>key</code>.
+ * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
+ * java.lang.Object, java.lang.Object, int)
+ */
+ @Override
+ public int getPartition(ImmutableBytesWritable key,
+ VALUE value, int numPartitions) {
+ byte[] region = null;
+ // Only one region return 0
+ if (this.startKeys.length == 1){
+ return 0;
+ }
+ try {
+ // Not sure if this is cached after a split so we could have problems
+ // here if a region splits while mapping
+ region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ for (int i = 0; i < this.startKeys.length; i++){
+ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
+ if (i >= numPartitions-1){
+ // cover if we have less reduces then regions.
+ return (Integer.toString(i).hashCode()
+ & Integer.MAX_VALUE) % numPartitions;
+ }
+ return i;
+ }
+ }
+ // if above fails to find start key that match we need to return something
+ return 0;
+ }
+
+ /**
+ * Returns the current configuration.
+ *
+ * @return The current configuration.
+ * @see org.apache.hadoop.conf.Configurable#getConf()
+ */
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Sets the configuration. This is used to determine the start keys for the
+ * given table.
+ *
+ * @param configuration The configuration to set.
+ * @see org.apache.hadoop.conf.Configurable#setConf(
+ * org.apache.hadoop.conf.Configuration)
+ */
+ @Override
+ public void setConf(Configuration configuration) {
+ this.conf = HBaseConfiguration.create(configuration);
+ try {
+ this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(conf));
+ TableName tableName = TableName.valueOf(conf.get(TableOutputFormat.OUTPUT_TABLE));
+ this.locator = this.connection.getRegionLocator(tableName);
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ try {
+ this.startKeys = this.locator.getStartKeys();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
new file mode 100644
index 0000000..2c8caf5
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
@@ -0,0 +1,747 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Charsets;
+import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Ordering;
+
+public class HashTable extends Configured implements Tool {
+
+ private static final Log LOG = LogFactory.getLog(HashTable.class);
+
+ private static final int DEFAULT_BATCH_SIZE = 8000;
+
+ private final static String HASH_BATCH_SIZE_CONF_KEY = "hash.batch.size";
+ final static String PARTITIONS_FILE_NAME = "partitions";
+ final static String MANIFEST_FILE_NAME = "manifest";
+ final static String HASH_DATA_DIR = "hashes";
+ final static String OUTPUT_DATA_FILE_PREFIX = "part-r-";
+ private final static String TMP_MANIFEST_FILE_NAME = "manifest.tmp";
+
+ TableHash tableHash = new TableHash();
+ Path destPath;
+
+ public HashTable(Configuration conf) {
+ super(conf);
+ }
+
+ public static class TableHash {
+
+ Path hashDir;
+
+ String tableName;
+ String families = null;
+ long batchSize = DEFAULT_BATCH_SIZE;
+ int numHashFiles = 0;
+ byte[] startRow = HConstants.EMPTY_START_ROW;
+ byte[] stopRow = HConstants.EMPTY_END_ROW;
+ int scanBatch = 0;
+ int versions = -1;
+ long startTime = 0;
+ long endTime = 0;
+
+ List<ImmutableBytesWritable> partitions;
+
+ public static TableHash read(Configuration conf, Path hashDir) throws IOException {
+ TableHash tableHash = new TableHash();
+ FileSystem fs = hashDir.getFileSystem(conf);
+ tableHash.hashDir = hashDir;
+ tableHash.readPropertiesFile(fs, new Path(hashDir, MANIFEST_FILE_NAME));
+ tableHash.readPartitionFile(fs, conf, new Path(hashDir, PARTITIONS_FILE_NAME));
+ return tableHash;
+ }
+
+ void writePropertiesFile(FileSystem fs, Path path) throws IOException {
+ Properties p = new Properties();
+ p.setProperty("table", tableName);
+ if (families != null) {
+ p.setProperty("columnFamilies", families);
+ }
+ p.setProperty("targetBatchSize", Long.toString(batchSize));
+ p.setProperty("numHashFiles", Integer.toString(numHashFiles));
+ if (!isTableStartRow(startRow)) {
+ p.setProperty("startRowHex", Bytes.toHex(startRow));
+ }
+ if (!isTableEndRow(stopRow)) {
+ p.setProperty("stopRowHex", Bytes.toHex(stopRow));
+ }
+ if (scanBatch > 0) {
+ p.setProperty("scanBatch", Integer.toString(scanBatch));
+ }
+ if (versions >= 0) {
+ p.setProperty("versions", Integer.toString(versions));
+ }
+ if (startTime != 0) {
+ p.setProperty("startTimestamp", Long.toString(startTime));
+ }
+ if (endTime != 0) {
+ p.setProperty("endTimestamp", Long.toString(endTime));
+ }
+
+ try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) {
+ p.store(osw, null);
+ }
+ }
+
+ void readPropertiesFile(FileSystem fs, Path path) throws IOException {
+ Properties p = new Properties();
+ try (FSDataInputStream in = fs.open(path)) {
+ try (InputStreamReader isr = new InputStreamReader(in, Charsets.UTF_8)) {
+ p.load(isr);
+ }
+ }
+ tableName = p.getProperty("table");
+ families = p.getProperty("columnFamilies");
+ batchSize = Long.parseLong(p.getProperty("targetBatchSize"));
+ numHashFiles = Integer.parseInt(p.getProperty("numHashFiles"));
+
+ String startRowHex = p.getProperty("startRowHex");
+ if (startRowHex != null) {
+ startRow = Bytes.fromHex(startRowHex);
+ }
+ String stopRowHex = p.getProperty("stopRowHex");
+ if (stopRowHex != null) {
+ stopRow = Bytes.fromHex(stopRowHex);
+ }
+
+ String scanBatchString = p.getProperty("scanBatch");
+ if (scanBatchString != null) {
+ scanBatch = Integer.parseInt(scanBatchString);
+ }
+
+ String versionString = p.getProperty("versions");
+ if (versionString != null) {
+ versions = Integer.parseInt(versionString);
+ }
+
+ String startTimeString = p.getProperty("startTimestamp");
+ if (startTimeString != null) {
+ startTime = Long.parseLong(startTimeString);
+ }
+
+ String endTimeString = p.getProperty("endTimestamp");
+ if (endTimeString != null) {
+ endTime = Long.parseLong(endTimeString);
+ }
+ }
+
+ Scan initScan() throws IOException {
+ Scan scan = new Scan();
+ scan.setCacheBlocks(false);
+ if (startTime != 0 || endTime != 0) {
+ scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
+ }
+ if (scanBatch > 0) {
+ scan.setBatch(scanBatch);
+ }
+ if (versions >= 0) {
+ scan.setMaxVersions(versions);
+ }
+ if (!isTableStartRow(startRow)) {
+ scan.setStartRow(startRow);
+ }
+ if (!isTableEndRow(stopRow)) {
+ scan.setStopRow(stopRow);
+ }
+ if(families != null) {
+ for(String fam : families.split(",")) {
+ scan.addFamily(Bytes.toBytes(fam));
+ }
+ }
+ return scan;
+ }
+
+ /**
+ * Choose partitions between row ranges to hash to a single output file
+ * Selects region boundaries that fall within the scan range, and groups them
+ * into the desired number of partitions.
+ */
+ void selectPartitions(Pair<byte[][], byte[][]> regionStartEndKeys) {
+ List<byte[]> startKeys = new ArrayList<>();
+ for (int i = 0; i < regionStartEndKeys.getFirst().length; i++) {
+ byte[] regionStartKey = regionStartEndKeys.getFirst()[i];
+ byte[] regionEndKey = regionStartEndKeys.getSecond()[i];
+
+ // if scan begins after this region, or starts before this region, then drop this region
+ // in other words:
+ // IF (scan begins before the end of this region
+ // AND scan ends before the start of this region)
+ // THEN include this region
+ if ((isTableStartRow(startRow) || isTableEndRow(regionEndKey)
+ || Bytes.compareTo(startRow, regionEndKey) < 0)
+ && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey)
+ || Bytes.compareTo(stopRow, regionStartKey) > 0)) {
+ startKeys.add(regionStartKey);
+ }
+ }
+
+ int numRegions = startKeys.size();
+ if (numHashFiles == 0) {
+ numHashFiles = numRegions / 100;
+ }
+ if (numHashFiles == 0) {
+ numHashFiles = 1;
+ }
+ if (numHashFiles > numRegions) {
+ // can't partition within regions
+ numHashFiles = numRegions;
+ }
+
+ // choose a subset of start keys to group regions into ranges
+ partitions = new ArrayList<>(numHashFiles - 1);
+ // skip the first start key as it is not a partition between ranges.
+ for (long i = 1; i < numHashFiles; i++) {
+ int splitIndex = (int) (numRegions * i / numHashFiles);
+ partitions.add(new ImmutableBytesWritable(startKeys.get(splitIndex)));
+ }
+ }
+
+ void writePartitionFile(Configuration conf, Path path) throws IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ @SuppressWarnings("deprecation")
+ SequenceFile.Writer writer = SequenceFile.createWriter(
+ fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
+
+ for (int i = 0; i < partitions.size(); i++) {
+ writer.append(partitions.get(i), NullWritable.get());
+ }
+ writer.close();
+ }
+
+ private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
+ throws IOException {
+ @SuppressWarnings("deprecation")
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+ ImmutableBytesWritable key = new ImmutableBytesWritable();
+ partitions = new ArrayList<>();
+ while (reader.next(key)) {
+ partitions.add(new ImmutableBytesWritable(key.copyBytes()));
+ }
+ reader.close();
+
+ if (!Ordering.natural().isOrdered(partitions)) {
+ throw new IOException("Partitions are not ordered!");
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("tableName=").append(tableName);
+ if (families != null) {
+ sb.append(", families=").append(families);
+ }
+ sb.append(", batchSize=").append(batchSize);
+ sb.append(", numHashFiles=").append(numHashFiles);
+ if (!isTableStartRow(startRow)) {
+ sb.append(", startRowHex=").append(Bytes.toHex(startRow));
+ }
+ if (!isTableEndRow(stopRow)) {
+ sb.append(", stopRowHex=").append(Bytes.toHex(stopRow));
+ }
+ if (scanBatch >= 0) {
+ sb.append(", scanBatch=").append(scanBatch);
+ }
+ if (versions >= 0) {
+ sb.append(", versions=").append(versions);
+ }
+ if (startTime != 0) {
+ sb.append("startTime=").append(startTime);
+ }
+ if (endTime != 0) {
+ sb.append("endTime=").append(endTime);
+ }
+ return sb.toString();
+ }
+
+ static String getDataFileName(int hashFileIndex) {
+ return String.format(HashTable.OUTPUT_DATA_FILE_PREFIX + "%05d", hashFileIndex);
+ }
+
+ /**
+ * Open a TableHash.Reader starting at the first hash at or after the given key.
+ * @throws IOException
+ */
+ public Reader newReader(Configuration conf, ImmutableBytesWritable startKey)
+ throws IOException {
+ return new Reader(conf, startKey);
+ }
+
+ public class Reader implements java.io.Closeable {
+ private final Configuration conf;
+
+ private int hashFileIndex;
+ private MapFile.Reader mapFileReader;
+
+ private boolean cachedNext;
+ private ImmutableBytesWritable key;
+ private ImmutableBytesWritable hash;
+
+ Reader(Configuration conf, ImmutableBytesWritable startKey) throws IOException {
+ this.conf = conf;
+ int partitionIndex = Collections.binarySearch(partitions, startKey);
+ if (partitionIndex >= 0) {
+ // if the key is equal to a partition, then go the file after that partition
+ hashFileIndex = partitionIndex+1;
+ } else {
+ // if the key is between partitions, then go to the file between those partitions
+ hashFileIndex = -1-partitionIndex;
+ }
+ openHashFile();
+
+ // MapFile's don't make it easy to seek() so that the subsequent next() returns
+ // the desired key/value pair. So we cache it for the first call of next().
+ hash = new ImmutableBytesWritable();
+ key = (ImmutableBytesWritable) mapFileReader.getClosest(startKey, hash);
+ if (key == null) {
+ cachedNext = false;
+ hash = null;
+ } else {
+ cachedNext = true;
+ }
+ }
+
+ /**
+ * Read the next key/hash pair.
+ * Returns true if such a pair exists and false when at the end of the data.
+ */
+ public boolean next() throws IOException {
+ if (cachedNext) {
+ cachedNext = false;
+ return true;
+ }
+ key = new ImmutableBytesWritable();
+ hash = new ImmutableBytesWritable();
+ while (true) {
+ boolean hasNext = mapFileReader.next(key, hash);
+ if (hasNext) {
+ return true;
+ }
+ hashFileIndex++;
+ if (hashFileIndex < TableHash.this.numHashFiles) {
+ mapFileReader.close();
+ openHashFile();
+ } else {
+ key = null;
+ hash = null;
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Get the current key
+ * @return the current key or null if there is no current key
+ */
+ public ImmutableBytesWritable getCurrentKey() {
+ return key;
+ }
+
+ /**
+ * Get the current hash
+ * @return the current hash or null if there is no current hash
+ */
+ public ImmutableBytesWritable getCurrentHash() {
+ return hash;
+ }
+
+ private void openHashFile() throws IOException {
+ if (mapFileReader != null) {
+ mapFileReader.close();
+ }
+ Path dataDir = new Path(TableHash.this.hashDir, HASH_DATA_DIR);
+ Path dataFile = new Path(dataDir, getDataFileName(hashFileIndex));
+ mapFileReader = new MapFile.Reader(dataFile, conf);
+ }
+
+ @Override
+ public void close() throws IOException {
+ mapFileReader.close();
+ }
+ }
+ }
+
+ static boolean isTableStartRow(byte[] row) {
+ return Bytes.equals(HConstants.EMPTY_START_ROW, row);
+ }
+
+ static boolean isTableEndRow(byte[] row) {
+ return Bytes.equals(HConstants.EMPTY_END_ROW, row);
+ }
+
+ public Job createSubmittableJob(String[] args) throws IOException {
+ Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
+ generatePartitions(partitionsPath);
+
+ Job job = Job.getInstance(getConf(),
+ getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
+ Configuration jobConf = job.getConfiguration();
+ jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
+ job.setJarByClass(HashTable.class);
+
+ TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
+ HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+
+ // use a TotalOrderPartitioner and reducers to group region output into hash files
+ job.setPartitionerClass(TotalOrderPartitioner.class);
+ TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
+ job.setReducerClass(Reducer.class); // identity reducer
+ job.setNumReduceTasks(tableHash.numHashFiles);
+ job.setOutputKeyClass(ImmutableBytesWritable.class);
+ job.setOutputValueClass(ImmutableBytesWritable.class);
+ job.setOutputFormatClass(MapFileOutputFormat.class);
+ FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
+
+ return job;
+ }
+
+ private void generatePartitions(Path partitionsPath) throws IOException {
+ Connection connection = ConnectionFactory.createConnection(getConf());
+ Pair<byte[][], byte[][]> regionKeys
+ = connection.getRegionLocator(TableName.valueOf(tableHash.tableName)).getStartEndKeys();
+ connection.close();
+
+ tableHash.selectPartitions(regionKeys);
+ LOG.info("Writing " + tableHash.partitions.size() + " partition keys to " + partitionsPath);
+
+ tableHash.writePartitionFile(getConf(), partitionsPath);
+ }
+
+ static class ResultHasher {
+ private MessageDigest digest;
+
+ private boolean batchStarted = false;
+ private ImmutableBytesWritable batchStartKey;
+ private ImmutableBytesWritable batchHash;
+ private long batchSize = 0;
+
+
+ public ResultHasher() {
+ try {
+ digest = MessageDigest.getInstance("MD5");
+ } catch (NoSuchAlgorithmException e) {
+ Throwables.propagate(e);
+ }
+ }
+
+ public void startBatch(ImmutableBytesWritable row) {
+ if (batchStarted) {
+ throw new RuntimeException("Cannot start new batch without finishing existing one.");
+ }
+ batchStarted = true;
+ batchSize = 0;
+ batchStartKey = row;
+ batchHash = null;
+ }
+
+ public void hashResult(Result result) {
+ if (!batchStarted) {
+ throw new RuntimeException("Cannot add to batch that has not been started.");
+ }
+ for (Cell cell : result.rawCells()) {
+ int rowLength = cell.getRowLength();
+ int familyLength = cell.getFamilyLength();
+ int qualifierLength = cell.getQualifierLength();
+ int valueLength = cell.getValueLength();
+ digest.update(cell.getRowArray(), cell.getRowOffset(), rowLength);
+ digest.update(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);
+ digest.update(cell.getQualifierArray(), cell.getQualifierOffset(), qualifierLength);
+ long ts = cell.getTimestamp();
+ for (int i = 8; i > 0; i--) {
+ digest.update((byte) ts);
+ ts >>>= 8;
+ }
+ digest.update(cell.getValueArray(), cell.getValueOffset(), valueLength);
+
+ batchSize += rowLength + familyLength + qualifierLength + 8 + valueLength;
+ }
+ }
+
+ public void finishBatch() {
+ if (!batchStarted) {
+ throw new RuntimeException("Cannot finish batch that has not started.");
+ }
+ batchStarted = false;
+ batchHash = new ImmutableBytesWritable(digest.digest());
+ }
+
+ public boolean isBatchStarted() {
+ return batchStarted;
+ }
+
+ public ImmutableBytesWritable getBatchStartKey() {
+ return batchStartKey;
+ }
+
+ public ImmutableBytesWritable getBatchHash() {
+ return batchHash;
+ }
+
+ public long getBatchSize() {
+ return batchSize;
+ }
+ }
+
+ public static class HashMapper
+ extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+ private ResultHasher hasher;
+ private long targetBatchSize;
+
+ private ImmutableBytesWritable currentRow;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ targetBatchSize = context.getConfiguration()
+ .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
+ hasher = new ResultHasher();
+
+ TableSplit split = (TableSplit) context.getInputSplit();
+ hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
+ }
+
+ @Override
+ protected void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+
+ if (currentRow == null || !currentRow.equals(key)) {
+ currentRow = new ImmutableBytesWritable(key); // not immutable
+
+ if (hasher.getBatchSize() >= targetBatchSize) {
+ hasher.finishBatch();
+ context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
+ hasher.startBatch(currentRow);
+ }
+ }
+
+ hasher.hashResult(value);
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ hasher.finishBatch();
+ context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
+ }
+ }
+
+ private void writeTempManifestFile() throws IOException {
+ Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
+ FileSystem fs = tempManifestPath.getFileSystem(getConf());
+ tableHash.writePropertiesFile(fs, tempManifestPath);
+ }
+
+ private void completeManifest() throws IOException {
+ Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
+ Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME);
+ FileSystem fs = tempManifestPath.getFileSystem(getConf());
+ fs.rename(tempManifestPath, manifestPath);
+ }
+
+ private static final int NUM_ARGS = 2;
+ private static void printUsage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ System.err.println();
+ }
+ System.err.println("Usage: HashTable [options] <tablename> <outputpath>");
+ System.err.println();
+ System.err.println("Options:");
+ System.err.println(" batchsize the target amount of bytes to hash in each batch");
+ System.err.println(" rows are added to the batch until this size is reached");
+ System.err.println(" (defaults to " + DEFAULT_BATCH_SIZE + " bytes)");
+ System.err.println(" numhashfiles the number of hash files to create");
+ System.err.println(" if set to fewer than number of regions then");
+ System.err.println(" the job will create this number of reducers");
+ System.err.println(" (defaults to 1/100 of regions -- at least 1)");
+ System.err.println(" startrow the start row");
+ System.err.println(" stoprow the stop row");
+ System.err.println(" starttime beginning of the time range (unixtime in millis)");
+ System.err.println(" without endtime means from starttime to forever");
+ System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
+ System.err.println(" scanbatch scanner batch size to support intra row scans");
+ System.err.println(" versions number of cell versions to include");
+ System.err.println(" families comma-separated list of families to include");
+ System.err.println();
+ System.err.println("Args:");
+ System.err.println(" tablename Name of the table to hash");
+ System.err.println(" outputpath Filesystem path to put the output data");
+ System.err.println();
+ System.err.println("Examples:");
+ System.err.println(" To hash 'TestTable' in 32kB batches for a 1 hour window into 50 files:");
+ System.err.println(" $ hbase " +
+ "org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=32000 --numhashfiles=50"
+ + " --starttime=1265875194289 --endtime=1265878794289 --families=cf2,cf3"
+ + " TestTable /hashes/testTable");
+ }
+
+ private boolean doCommandLine(final String[] args) {
+ if (args.length < NUM_ARGS) {
+ printUsage(null);
+ return false;
+ }
+ try {
+
+ tableHash.tableName = args[args.length-2];
+ destPath = new Path(args[args.length-1]);
+
+ for (int i = 0; i < args.length - NUM_ARGS; i++) {
+ String cmd = args[i];
+ if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ printUsage(null);
+ return false;
+ }
+
+ final String batchSizeArgKey = "--batchsize=";
+ if (cmd.startsWith(batchSizeArgKey)) {
+ tableHash.batchSize = Long.parseLong(cmd.substring(batchSizeArgKey.length()));
+ continue;
+ }
+
+ final String numHashFilesArgKey = "--numhashfiles=";
+ if (cmd.startsWith(numHashFilesArgKey)) {
+ tableHash.numHashFiles = Integer.parseInt(cmd.substring(numHashFilesArgKey.length()));
+ continue;
+ }
+
+ final String startRowArgKey = "--startrow=";
+ if (cmd.startsWith(startRowArgKey)) {
+ tableHash.startRow = Bytes.fromHex(cmd.substring(startRowArgKey.length()));
+ continue;
+ }
+
+ final String stopRowArgKey = "--stoprow=";
+ if (cmd.startsWith(stopRowArgKey)) {
+ tableHash.stopRow = Bytes.fromHex(cmd.substring(stopRowArgKey.length()));
+ continue;
+ }
+
+ final String startTimeArgKey = "--starttime=";
+ if (cmd.startsWith(startTimeArgKey)) {
+ tableHash.startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+ continue;
+ }
+
+ final String endTimeArgKey = "--endtime=";
+ if (cmd.startsWith(endTimeArgKey)) {
+ tableHash.endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+ continue;
+ }
+
+ final String scanBatchArgKey = "--scanbatch=";
+ if (cmd.startsWith(scanBatchArgKey)) {
+ tableHash.scanBatch = Integer.parseInt(cmd.substring(scanBatchArgKey.length()));
+ continue;
+ }
+
+ final String versionsArgKey = "--versions=";
+ if (cmd.startsWith(versionsArgKey)) {
+ tableHash.versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+ continue;
+ }
+
+ final String familiesArgKey = "--families=";
+ if (cmd.startsWith(familiesArgKey)) {
+ tableHash.families = cmd.substring(familiesArgKey.length());
+ continue;
+ }
+
+ printUsage("Invalid argument '" + cmd + "'");
+ return false;
+ }
+ if ((tableHash.startTime != 0 || tableHash.endTime != 0)
+ && (tableHash.startTime >= tableHash.endTime)) {
+ printUsage("Invalid time range filter: starttime="
+ + tableHash.startTime + " >= endtime=" + tableHash.endTime);
+ return false;
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ printUsage("Can't start because " + e.getMessage());
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Main entry point.
+ */
+ public static void main(String[] args) throws Exception {
+ int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args);
+ System.exit(ret);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
+ if (!doCommandLine(otherArgs)) {
+ return 1;
+ }
+
+ Job job = createSubmittableJob(otherArgs);
+ writeTempManifestFile();
+ if (!job.waitForCompletion(true)) {
+ LOG.info("Map-reduce job failed!");
+ return 1;
+ }
+ completeManifest();
+ return 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
new file mode 100644
index 0000000..7103ef8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
@@ -0,0 +1,67 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Pass the given key and record as-is to the reduce phase.
+ */
+@InterfaceAudience.Public
+public class IdentityTableMapper
+extends TableMapper<ImmutableBytesWritable, Result> {
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name.
+ * @param scan The scan with the columns to scan.
+ * @param mapper The mapper class.
+ * @param job The job configuration.
+ * @throws IOException When setting up the job fails.
+ */
+ @SuppressWarnings("rawtypes")
+ public static void initJob(String table, Scan scan,
+ Class<? extends TableMapper> mapper, Job job) throws IOException {
+ TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
+ ImmutableBytesWritable.class, Result.class, job);
+ }
+
+ /**
+ * Pass the key, value to reduce.
+ *
+ * @param key The current key.
+ * @param value The current value.
+ * @param context The current context.
+ * @throws IOException When writing the record fails.
+ * @throws InterruptedException When the job is aborted.
+ */
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ context.write(key, value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
new file mode 100644
index 0000000..73475db
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
@@ -0,0 +1,79 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Convenience class that simply writes all values (which must be
+ * {@link org.apache.hadoop.hbase.client.Put Put} or
+ * {@link org.apache.hadoop.hbase.client.Delete Delete} instances)
+ * passed to it out to the configured HBase table. This works in combination
+ * with {@link TableOutputFormat} which actually does the writing to HBase.<p>
+ *
+ * Keys are passed along but ignored in TableOutputFormat. However, they can
+ * be used to control how your values will be divided up amongst the specified
+ * number of reducers. <p>
+ *
+ * You can also use the {@link TableMapReduceUtil} class to set up the two
+ * classes in one step:
+ * <blockquote><code>
+ * TableMapReduceUtil.initTableReducerJob("table", IdentityTableReducer.class, job);
+ * </code></blockquote>
+ * This will also set the proper {@link TableOutputFormat} which is given the
+ * <code>table</code> parameter. The
+ * {@link org.apache.hadoop.hbase.client.Put Put} or
+ * {@link org.apache.hadoop.hbase.client.Delete Delete} define the
+ * row and columns implicitly.
+ */
+@InterfaceAudience.Public
+public class IdentityTableReducer
+extends TableReducer<Writable, Mutation, Writable> {
+
+ @SuppressWarnings("unused")
+ private static final Log LOG = LogFactory.getLog(IdentityTableReducer.class);
+
+ /**
+ * Writes each given record, consisting of the row key and the given values,
+ * to the configured {@link org.apache.hadoop.mapreduce.OutputFormat}.
+ * It is emitting the row key and each {@link org.apache.hadoop.hbase.client.Put Put}
+ * or {@link org.apache.hadoop.hbase.client.Delete Delete} as separate pairs.
+ *
+ * @param key The current row key.
+ * @param values The {@link org.apache.hadoop.hbase.client.Put Put} or
+ * {@link org.apache.hadoop.hbase.client.Delete Delete} list for the given
+ * row.
+ * @param context The context of the reduce.
+ * @throws IOException When writing the record fails.
+ * @throws InterruptedException When the job gets interrupted.
+ */
+ @Override
+ public void reduce(Writable key, Iterable<Mutation> values, Context context)
+ throws IOException, InterruptedException {
+ for(Mutation putOrDelete : values) {
+ context.write(key, putOrDelete);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
new file mode 100644
index 0000000..18dcf35
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
@@ -0,0 +1,780 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.zookeeper.KeeperException;
+
+
+/**
+ * Import data written by {@link Export}.
+ */
+@InterfaceAudience.Public
+public class Import extends Configured implements Tool {
+ private static final Log LOG = LogFactory.getLog(Import.class);
+ final static String NAME = "import";
+ public final static String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
+ public final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
+ public final static String FILTER_CLASS_CONF_KEY = "import.filter.class";
+ public final static String FILTER_ARGS_CONF_KEY = "import.filter.args";
+ public final static String TABLE_NAME = "import.table.name";
+ public final static String WAL_DURABILITY = "import.wal.durability";
+ public final static String HAS_LARGE_RESULT= "import.bulk.hasLargeResult";
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ public static class KeyValueWritableComparablePartitioner
+ extends Partitioner<KeyValueWritableComparable, KeyValue> {
+ private static KeyValueWritableComparable[] START_KEYS = null;
+ @Override
+ public int getPartition(KeyValueWritableComparable key, KeyValue value,
+ int numPartitions) {
+ for (int i = 0; i < START_KEYS.length; ++i) {
+ if (key.compareTo(START_KEYS[i]) <= 0) {
+ return i;
+ }
+ }
+ return START_KEYS.length;
+ }
+
+ }
+
+ public static class KeyValueWritableComparable
+ implements WritableComparable<KeyValueWritableComparable> {
+
+ private KeyValue kv = null;
+
+ static {
+ // register this comparator
+ WritableComparator.define(KeyValueWritableComparable.class,
+ new KeyValueWritableComparator());
+ }
+
+ public KeyValueWritableComparable() {
+ }
+
+ public KeyValueWritableComparable(KeyValue kv) {
+ this.kv = kv;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ KeyValue.write(kv, out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ kv = KeyValue.create(in);
+ }
+
+ @Override
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
+ justification="This is wrong, yes, but we should be purging Writables, not fixing them")
+ public int compareTo(KeyValueWritableComparable o) {
+ return CellComparator.COMPARATOR.compare(this.kv, ((KeyValueWritableComparable)o).kv);
+ }
+
+ public static class KeyValueWritableComparator extends WritableComparator {
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ try {
+ KeyValueWritableComparable kv1 = new KeyValueWritableComparable();
+ kv1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
+ KeyValueWritableComparable kv2 = new KeyValueWritableComparable();
+ kv2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
+ return compare(kv1, kv2);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ }
+
+ public static class KeyValueReducer
+ extends
+ Reducer<KeyValueWritableComparable, KeyValue, ImmutableBytesWritable, KeyValue> {
+ protected void reduce(
+ KeyValueWritableComparable row,
+ Iterable<KeyValue> kvs,
+ Reducer<KeyValueWritableComparable,
+ KeyValue, ImmutableBytesWritable, KeyValue>.Context context)
+ throws java.io.IOException, InterruptedException {
+ int index = 0;
+ for (KeyValue kv : kvs) {
+ context.write(new ImmutableBytesWritable(kv.getRowArray()), kv);
+ if (++index % 100 == 0)
+ context.setStatus("Wrote " + index + " KeyValues, "
+ + "and the rowkey whose is being wrote is " + Bytes.toString(kv.getRowArray()));
+ }
+ }
+ }
+
+ public static class KeyValueSortImporter
+ extends TableMapper<KeyValueWritableComparable, KeyValue> {
+ private Map<byte[], byte[]> cfRenameMap;
+ private Filter filter;
+ private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
+
+ /**
+ * @param row The current table row key.
+ * @param value The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ */
+ @Override
+ public void map(ImmutableBytesWritable row, Result value,
+ Context context)
+ throws IOException {
+ try {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Considering the row."
+ + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
+ }
+ if (filter == null
+ || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
+ (short) row.getLength()))) {
+ for (Cell kv : value.rawCells()) {
+ kv = filterKv(filter, kv);
+ // skip if we filtered it out
+ if (kv == null) continue;
+ // TODO get rid of ensureKeyValue
+ KeyValue ret = KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap));
+ context.write(new KeyValueWritableComparable(ret.createKeyOnly(false)), ret);
+ }
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void setup(Context context) throws IOException {
+ cfRenameMap = createCfRenameMap(context.getConfiguration());
+ filter = instantiateFilter(context.getConfiguration());
+ int reduceNum = context.getNumReduceTasks();
+ Configuration conf = context.getConfiguration();
+ TableName tableName = TableName.valueOf(context.getConfiguration().get(TABLE_NAME));
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+ byte[][] startKeys = regionLocator.getStartKeys();
+ if (startKeys.length != reduceNum) {
+ throw new IOException("Region split after job initialization");
+ }
+ KeyValueWritableComparable[] startKeyWraps =
+ new KeyValueWritableComparable[startKeys.length - 1];
+ for (int i = 1; i < startKeys.length; ++i) {
+ startKeyWraps[i - 1] =
+ new KeyValueWritableComparable(KeyValueUtil.createFirstOnRow(startKeys[i]));
+ }
+ KeyValueWritableComparablePartitioner.START_KEYS = startKeyWraps;
+ }
+ }
+ }
+
+ /**
+ * A mapper that just writes out KeyValues.
+ */
+ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
+ justification="Writables are going away and this has been this way forever")
+ public static class KeyValueImporter extends TableMapper<ImmutableBytesWritable, KeyValue> {
+ private Map<byte[], byte[]> cfRenameMap;
+ private Filter filter;
+ private static final Log LOG = LogFactory.getLog(KeyValueImporter.class);
+
+ /**
+ * @param row The current table row key.
+ * @param value The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ */
+ @Override
+ public void map(ImmutableBytesWritable row, Result value,
+ Context context)
+ throws IOException {
+ try {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Considering the row."
+ + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
+ }
+ if (filter == null
+ || !filter.filterRowKey(CellUtil.createFirstOnRow(row.get(), row.getOffset(),
+ (short) row.getLength()))) {
+ for (Cell kv : value.rawCells()) {
+ kv = filterKv(filter, kv);
+ // skip if we filtered it out
+ if (kv == null) continue;
+ // TODO get rid of ensureKeyValue
+ context.write(row, KeyValueUtil.ensureKeyValue(convertKv(kv, cfRenameMap)));
+ }
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void setup(Context context) {
+ cfRenameMap = createCfRenameMap(context.getConfiguration());
+ filter = instantiateFilter(context.getConfiguration());
+ }
+ }
+
+ /**
+ * Write table content out to files in hdfs.
+ */
+ public static class Importer extends TableMapper<ImmutableBytesWritable, Mutation> {
+ private Map<byte[], byte[]> cfRenameMap;
+ private List<UUID> clusterIds;
+ private Filter filter;
+ private Durability durability;
+
+ /**
+ * @param row The current table row key.
+ * @param value The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ */
+ @Override
+ public void map(ImmutableBytesWritable row, Result value,
+ Context context)
+ throws IOException {
+ try {
+ writeResult(row, value, context);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void writeResult(ImmutableBytesWritable key, Result result, Context context)
+ throws IOException, InterruptedException {
+ Put put = null;
+ Delete delete = null;
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Considering the row."
+ + Bytes.toString(key.get(), key.getOffset(), key.getLength()));
+ }
+ if (filter == null
+ || !filter.filterRowKey(CellUtil.createFirstOnRow(key.get(), key.getOffset(),
+ (short) key.getLength()))) {
+ processKV(key, result, context, put, delete);
+ }
+ }
+
+ protected void processKV(ImmutableBytesWritable key, Result result, Context context, Put put,
+ Delete delete) throws IOException, InterruptedException {
+ for (Cell kv : result.rawCells()) {
+ kv = filterKv(filter, kv);
+ // skip if we filter it out
+ if (kv == null) continue;
+
+ kv = convertKv(kv, cfRenameMap);
+ // Deletes and Puts are gathered and written when finished
+ /*
+ * If there are sequence of mutations and tombstones in an Export, and after Import the same
+ * sequence should be restored as it is. If we combine all Delete tombstones into single
+ * request then there is chance of ignoring few DeleteFamily tombstones, because if we
+ * submit multiple DeleteFamily tombstones in single Delete request then we are maintaining
+ * only newest in hbase table and ignoring other. Check - HBASE-12065
+ */
+ if (CellUtil.isDeleteFamily(kv)) {
+ Delete deleteFamily = new Delete(key.get());
+ deleteFamily.add(kv);
+ if (durability != null) {
+ deleteFamily.setDurability(durability);
+ }
+ deleteFamily.setClusterIds(clusterIds);
+ context.write(key, deleteFamily);
+ } else if (CellUtil.isDelete(kv)) {
+ if (delete == null) {
+ delete = new Delete(key.get());
+ }
+ delete.add(kv);
+ } else {
+ if (put == null) {
+ put = new Put(key.get());
+ }
+ addPutToKv(put, kv);
+ }
+ }
+ if (put != null) {
+ if (durability != null) {
+ put.setDurability(durability);
+ }
+ put.setClusterIds(clusterIds);
+ context.write(key, put);
+ }
+ if (delete != null) {
+ if (durability != null) {
+ delete.setDurability(durability);
+ }
+ delete.setClusterIds(clusterIds);
+ context.write(key, delete);
+ }
+ }
+
+ protected void addPutToKv(Put put, Cell kv) throws IOException {
+ put.add(kv);
+ }
+
+ @Override
+ public void setup(Context context) {
+ LOG.info("Setting up " + getClass() + " mapper.");
+ Configuration conf = context.getConfiguration();
+ cfRenameMap = createCfRenameMap(conf);
+ filter = instantiateFilter(conf);
+ String durabilityStr = conf.get(WAL_DURABILITY);
+ if(durabilityStr != null){
+ durability = Durability.valueOf(durabilityStr.toUpperCase(Locale.ROOT));
+ LOG.info("setting WAL durability to " + durability);
+ } else {
+ LOG.info("setting WAL durability to default.");
+ }
+ // TODO: This is kind of ugly doing setup of ZKW just to read the clusterid.
+ ZooKeeperWatcher zkw = null;
+ Exception ex = null;
+ try {
+ zkw = new ZooKeeperWatcher(conf, context.getTaskAttemptID().toString(), null);
+ clusterIds = Collections.singletonList(ZKClusterId.getUUIDForCluster(zkw));
+ } catch (ZooKeeperConnectionException e) {
+ ex = e;
+ LOG.error("Problem connecting to ZooKeper during task setup", e);
+ } catch (KeeperException e) {
+ ex = e;
+ LOG.error("Problem reading ZooKeeper data during task setup", e);
+ } catch (IOException e) {
+ ex = e;
+ LOG.error("Problem setting up task", e);
+ } finally {
+ if (zkw != null) zkw.close();
+ }
+ if (clusterIds == null) {
+ // exit early if setup fails
+ throw new RuntimeException(ex);
+ }
+ }
+ }
+
+ /**
+ * Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
+ * optionally not include in the job output
+ * @param conf {@link Configuration} from which to load the filter
+ * @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
+ * @throws IllegalArgumentException if the filter is misconfigured
+ */
+ public static Filter instantiateFilter(Configuration conf) {
+ // get the filter, if it was configured
+ Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
+ if (filterClass == null) {
+ LOG.debug("No configured filter class, accepting all keyvalues.");
+ return null;
+ }
+ LOG.debug("Attempting to create filter:" + filterClass);
+ String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
+ ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
+ try {
+ Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
+ return (Filter) m.invoke(null, quotedArgs);
+ } catch (IllegalAccessException e) {
+ LOG.error("Couldn't instantiate filter!", e);
+ throw new RuntimeException(e);
+ } catch (SecurityException e) {
+ LOG.error("Couldn't instantiate filter!", e);
+ throw new RuntimeException(e);
+ } catch (NoSuchMethodException e) {
+ LOG.error("Couldn't instantiate filter!", e);
+ throw new RuntimeException(e);
+ } catch (IllegalArgumentException e) {
+ LOG.error("Couldn't instantiate filter!", e);
+ throw new RuntimeException(e);
+ } catch (InvocationTargetException e) {
+ LOG.error("Couldn't instantiate filter!", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static ArrayList<byte[]> toQuotedByteArrays(String... stringArgs) {
+ ArrayList<byte[]> quotedArgs = new ArrayList<>();
+ for (String stringArg : stringArgs) {
+ // all the filters' instantiation methods expected quoted args since they are coming from
+ // the shell, so add them here, though it shouldn't really be needed :-/
+ quotedArgs.add(Bytes.toBytes("'" + stringArg + "'"));
+ }
+ return quotedArgs;
+ }
+
+ /**
+ * Attempt to filter out the keyvalue
+ * @param kv {@link KeyValue} on which to apply the filter
+ * @return <tt>null</tt> if the key should not be written, otherwise returns the original
+ * {@link KeyValue}
+ */
+ public static Cell filterKv(Filter filter, Cell kv) throws IOException {
+ // apply the filter and skip this kv if the filter doesn't apply
+ if (filter != null) {
+ Filter.ReturnCode code = filter.filterKeyValue(kv);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Filter returned:" + code + " for the key value:" + kv);
+ }
+ // if its not an accept type, then skip this kv
+ if (!(code.equals(Filter.ReturnCode.INCLUDE) || code
+ .equals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL))) {
+ return null;
+ }
+ }
+ return kv;
+ }
+
+ // helper: create a new KeyValue based on CF rename map
+ private static Cell convertKv(Cell kv, Map<byte[], byte[]> cfRenameMap) {
+ if(cfRenameMap != null) {
+ // If there's a rename mapping for this CF, create a new KeyValue
+ byte[] newCfName = cfRenameMap.get(CellUtil.cloneFamily(kv));
+ if(newCfName != null) {
+ kv = new KeyValue(kv.getRowArray(), // row buffer
+ kv.getRowOffset(), // row offset
+ kv.getRowLength(), // row length
+ newCfName, // CF buffer
+ 0, // CF offset
+ newCfName.length, // CF length
+ kv.getQualifierArray(), // qualifier buffer
+ kv.getQualifierOffset(), // qualifier offset
+ kv.getQualifierLength(), // qualifier length
+ kv.getTimestamp(), // timestamp
+ KeyValue.Type.codeToType(kv.getTypeByte()), // KV Type
+ kv.getValueArray(), // value buffer
+ kv.getValueOffset(), // value offset
+ kv.getValueLength()); // value length
+ }
+ }
+ return kv;
+ }
+
+ // helper: make a map from sourceCfName to destCfName by parsing a config key
+ private static Map<byte[], byte[]> createCfRenameMap(Configuration conf) {
+ Map<byte[], byte[]> cfRenameMap = null;
+ String allMappingsPropVal = conf.get(CF_RENAME_PROP);
+ if(allMappingsPropVal != null) {
+ // The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
+ String[] allMappings = allMappingsPropVal.split(",");
+ for (String mapping: allMappings) {
+ if(cfRenameMap == null) {
+ cfRenameMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+ }
+ String [] srcAndDest = mapping.split(":");
+ if(srcAndDest.length != 2) {
+ continue;
+ }
+ cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
+ }
+ }
+ return cfRenameMap;
+ }
+
+ /**
+ * <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
+ * the mapper how to rename column families.
+ *
+ * <p>Alternately, instead of calling this function, you could set the configuration key
+ * {@link #CF_RENAME_PROP} yourself. The value should look like
+ * <pre>srcCf1:destCf1,srcCf2:destCf2,....</pre>. This would have the same effect on
+ * the mapper behavior.
+ *
+ * @param conf the Configuration in which the {@link #CF_RENAME_PROP} key will be
+ * set
+ * @param renameMap a mapping from source CF names to destination CF names
+ */
+ static public void configureCfRenaming(Configuration conf,
+ Map<String, String> renameMap) {
+ StringBuilder sb = new StringBuilder();
+ for(Map.Entry<String,String> entry: renameMap.entrySet()) {
+ String sourceCf = entry.getKey();
+ String destCf = entry.getValue();
+
+ if(sourceCf.contains(":") || sourceCf.contains(",") ||
+ destCf.contains(":") || destCf.contains(",")) {
+ throw new IllegalArgumentException("Illegal character in CF names: "
+ + sourceCf + ", " + destCf);
+ }
+
+ if(sb.length() != 0) {
+ sb.append(",");
+ }
+ sb.append(sourceCf + ":" + destCf);
+ }
+ conf.set(CF_RENAME_PROP, sb.toString());
+ }
+
+ /**
+ * Add a Filter to be instantiated on import
+ * @param conf Configuration to update (will be passed to the job)
+ * @param clazz {@link Filter} subclass to instantiate on the server.
+ * @param filterArgs List of arguments to pass to the filter on instantiation
+ */
+ public static void addFilterAndArguments(Configuration conf, Class<? extends Filter> clazz,
+ List<String> filterArgs) throws IOException {
+ conf.set(Import.FILTER_CLASS_CONF_KEY, clazz.getName());
+ conf.setStrings(Import.FILTER_ARGS_CONF_KEY, filterArgs.toArray(new String[filterArgs.size()]));
+ }
+
+ /**
+ * Sets up the actual job.
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ TableName tableName = TableName.valueOf(args[0]);
+ conf.set(TABLE_NAME, tableName.getNameAsString());
+ Path inputDir = new Path(args[1]);
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(Importer.class);
+ FileInputFormat.setInputPaths(job, inputDir);
+ job.setInputFormatClass(SequenceFileInputFormat.class);
+ String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+
+ // make sure we get the filter in the jars
+ try {
+ Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
+ if (filter != null) {
+ TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
+ LOG.info("Use Large Result!!");
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ Table table = conn.getTable(tableName);
+ RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+ job.setMapperClass(KeyValueSortImporter.class);
+ job.setReducerClass(KeyValueReducer.class);
+ Path outputDir = new Path(hfileOutPath);
+ FileOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(KeyValueWritableComparable.class);
+ job.setMapOutputValueClass(KeyValue.class);
+ job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class",
+ KeyValueWritableComparable.KeyValueWritableComparator.class,
+ RawComparator.class);
+ Path partitionsPath =
+ new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
+ FileSystem fs = FileSystem.get(job.getConfiguration());
+ fs.deleteOnExit(partitionsPath);
+ job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
+ job.setNumReduceTasks(regionLocator.getStartKeys().length);
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+ }
+ } else if (hfileOutPath != null) {
+ LOG.info("writing to hfiles for bulk load.");
+ job.setMapperClass(KeyValueImporter.class);
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ Table table = conn.getTable(tableName);
+ RegionLocator regionLocator = conn.getRegionLocator(tableName)){
+ job.setReducerClass(KeyValueSortReducer.class);
+ Path outputDir = new Path(hfileOutPath);
+ FileOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+ job.setMapOutputValueClass(KeyValue.class);
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+ }
+ } else {
+ LOG.info("writing directly to table from Mapper.");
+ // No reducers. Just write straight to table. Call initTableReducerJob
+ // because it sets up the TableOutputFormat.
+ job.setMapperClass(Importer.class);
+ TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
+ job.setNumReduceTasks(0);
+ }
+ return job;
+ }
+
+ /*
+ * @param errorMsg Error message. Can be null.
+ */
+ private static void usage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ System.err.println("Usage: Import [options] <tablename> <inputdir>");
+ System.err.println("By default Import will load data directly into HBase. To instead generate");
+ System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
+ System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
+ System.err.println("If there is a large result that includes too much KeyValue "
+ + "whitch can occur OOME caused by the memery sort in reducer, pass the option:");
+ System.err.println(" -D" + HAS_LARGE_RESULT + "=true");
+ System.err
+ .println(" To apply a generic org.apache.hadoop.hbase.filter.Filter to the input, use");
+ System.err.println(" -D" + FILTER_CLASS_CONF_KEY + "=<name of filter class>");
+ System.err.println(" -D" + FILTER_ARGS_CONF_KEY + "=<comma separated list of args for filter");
+ System.err.println(" NOTE: The filter will be applied BEFORE doing key renames via the "
+ + CF_RENAME_PROP + " property. Futher, filters will only use the"
+ + " Filter#filterRowKey(byte[] buffer, int offset, int length) method to identify "
+ + " whether the current row needs to be ignored completely for processing and "
+ + " Filter#filterKeyValue(KeyValue) method to determine if the KeyValue should be added;"
+ + " Filter.ReturnCode#INCLUDE and #INCLUDE_AND_NEXT_COL will be considered as including"
+ + " the KeyValue.");
+ System.err.println("To import data exported from HBase 0.94, use");
+ System.err.println(" -Dhbase.import.version=0.94");
+ System.err.println(" -D " + JOB_NAME_CONF_KEY
+ + "=jobName - use the specified mapreduce job name for the import");
+ System.err.println("For performance consider the following options:\n"
+ + " -Dmapreduce.map.speculative=false\n"
+ + " -Dmapreduce.reduce.speculative=false\n"
+ + " -D" + WAL_DURABILITY + "=<Used while writing data to hbase."
+ +" Allowed values are the supported durability values"
+ +" like SKIP_WAL/ASYNC_WAL/SYNC_WAL/...>");
+ }
+
+ /**
+ * If the durability is set to {@link Durability#SKIP_WAL} and the data is imported to hbase, we
+ * need to flush all the regions of the table as the data is held in memory and is also not
+ * present in the Write Ahead Log to replay in scenarios of a crash. This method flushes all the
+ * regions of the table in the scenarios of import data to hbase with {@link Durability#SKIP_WAL}
+ */
+ public static void flushRegionsIfNecessary(Configuration conf) throws IOException,
+ InterruptedException {
+ String tableName = conf.get(TABLE_NAME);
+ Admin hAdmin = null;
+ Connection connection = null;
+ String durability = conf.get(WAL_DURABILITY);
+ // Need to flush if the data is written to hbase and skip wal is enabled.
+ if (conf.get(BULK_OUTPUT_CONF_KEY) == null && durability != null
+ && Durability.SKIP_WAL.name().equalsIgnoreCase(durability)) {
+ LOG.info("Flushing all data that skipped the WAL.");
+ try {
+ connection = ConnectionFactory.createConnection(conf);
+ hAdmin = connection.getAdmin();
+ hAdmin.flush(TableName.valueOf(tableName));
+ } finally {
+ if (hAdmin != null) {
+ hAdmin.close();
+ }
+ if (connection != null) {
+ connection.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ usage("Wrong number of arguments: " + args.length);
+ return -1;
+ }
+ String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER);
+ if (inputVersionString != null) {
+ getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString);
+ }
+ Job job = createSubmittableJob(getConf(), args);
+ boolean isJobSuccessful = job.waitForCompletion(true);
+ if(isJobSuccessful){
+ // Flush all the regions of the table
+ flushRegionsIfNecessary(getConf());
+ }
+ long inputRecords = job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
+ long outputRecords = job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue();
+ if (outputRecords < inputRecords) {
+ System.err.println("Warning, not all records were imported (maybe filtered out).");
+ if (outputRecords == 0) {
+ System.err.println("If the data was exported from HBase 0.94 "+
+ "consider using -Dhbase.import.version=0.94.");
+ }
+ }
+
+ return (isJobSuccessful ? 0 : 1);
+ }
+
+ /**
+ * Main entry point.
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new Import(), args);
+ System.exit(errCode);
+ }
+
+}
[14/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
deleted file mode 100644
index ff458ff..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
+++ /dev/null
@@ -1,1027 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.security.token.TokenUtil;
-import org.apache.hadoop.hbase.util.Base64;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.zookeeper.ZKConfig;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.StringUtils;
-
-import com.codahale.metrics.MetricRegistry;
-
-/**
- * Utility for {@link TableMapper} and {@link TableReducer}
- */
-@SuppressWarnings({ "rawtypes", "unchecked" })
-@InterfaceAudience.Public
-public class TableMapReduceUtil {
- private static final Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(String table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job)
- throws IOException {
- initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
- job, true);
- }
-
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(TableName table,
- Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass,
- Job job) throws IOException {
- initTableMapperJob(table.getNameAsString(),
- scan,
- mapper,
- outputKeyClass,
- outputValueClass,
- job,
- true);
- }
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table Binary representation of the table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(byte[] table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job)
- throws IOException {
- initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
- job, true);
- }
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(String table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
- throws IOException {
- initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
- addDependencyJars, true, inputFormatClass);
- }
-
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @param initCredentials whether to initialize hbase auth credentials for the job
- * @param inputFormatClass the input format
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(String table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars, boolean initCredentials,
- Class<? extends InputFormat> inputFormatClass)
- throws IOException {
- job.setInputFormatClass(inputFormatClass);
- if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
- if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
- job.setMapperClass(mapper);
- if (Put.class.equals(outputValueClass)) {
- job.setCombinerClass(PutCombiner.class);
- }
- Configuration conf = job.getConfiguration();
- HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
- conf.set(TableInputFormat.INPUT_TABLE, table);
- conf.set(TableInputFormat.SCAN, convertScanToString(scan));
- conf.setStrings("io.serializations", conf.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName(),
- KeyValueSerialization.class.getName());
- if (addDependencyJars) {
- addDependencyJars(job);
- }
- if (initCredentials) {
- initCredentials(job);
- }
- }
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table Binary representation of the table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @param inputFormatClass The class of the input format
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(byte[] table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
- throws IOException {
- initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
- outputValueClass, job, addDependencyJars, inputFormatClass);
- }
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table Binary representation of the table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(byte[] table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars)
- throws IOException {
- initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
- outputValueClass, job, addDependencyJars, TableInputFormat.class);
- }
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(String table, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars)
- throws IOException {
- initTableMapperJob(table, scan, mapper, outputKeyClass,
- outputValueClass, job, addDependencyJars, TableInputFormat.class);
- }
-
- /**
- * Enable a basic on-heap cache for these jobs. Any BlockCache implementation based on
- * direct memory will likely cause the map tasks to OOM when opening the region. This
- * is done here instead of in TableSnapshotRegionRecordReader in case an advanced user
- * wants to override this behavior in their job.
- */
- public static void resetCacheConfig(Configuration conf) {
- conf.setFloat(
- HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
- conf.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0f);
- conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
- }
-
- /**
- * Sets up the job for reading from one or more table snapshots, with one or more scans
- * per snapshot.
- * It bypasses hbase servers and read directly from snapshot files.
- *
- * @param snapshotScans map of snapshot name to scans on that snapshot.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- */
- public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
- Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
- Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
- MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);
-
- job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
- if (outputValueClass != null) {
- job.setMapOutputValueClass(outputValueClass);
- }
- if (outputKeyClass != null) {
- job.setMapOutputKeyClass(outputKeyClass);
- }
- job.setMapperClass(mapper);
- Configuration conf = job.getConfiguration();
- HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
-
- if (addDependencyJars) {
- addDependencyJars(job);
- addDependencyJarsForClasses(job.getConfiguration(), MetricRegistry.class);
- }
-
- resetCacheConfig(job.getConfiguration());
- }
-
- /**
- * Sets up the job for reading from a table snapshot. It bypasses hbase servers
- * and read directly from snapshot files.
- *
- * @param snapshotName The name of the snapshot (of a table) to read from.
- * @param scan The scan instance with the columns, time range etc.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- *
- * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
- * have write permissions to this directory, and this should not be a subdirectory of rootdir.
- * After the job is finished, restore directory can be deleted.
- * @throws IOException When setting up the details fails.
- * @see TableSnapshotInputFormat
- */
- public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars, Path tmpRestoreDir)
- throws IOException {
- TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
- initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
- outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
- resetCacheConfig(job.getConfiguration());
- }
-
- /**
- * Use this before submitting a Multi TableMap job. It will appropriately set
- * up the job.
- *
- * @param scans The list of {@link Scan} objects to read from.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is carrying
- * all necessary HBase configuration.
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(List<Scan> scans,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job) throws IOException {
- initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
- true);
- }
-
- /**
- * Use this before submitting a Multi TableMap job. It will appropriately set
- * up the job.
- *
- * @param scans The list of {@link Scan} objects to read from.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is carrying
- * all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the
- * configured job classes via the distributed cache (tmpjars).
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(List<Scan> scans,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars) throws IOException {
- initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
- addDependencyJars, true);
- }
-
- /**
- * Use this before submitting a Multi TableMap job. It will appropriately set
- * up the job.
- *
- * @param scans The list of {@link Scan} objects to read from.
- * @param mapper The mapper class to use.
- * @param outputKeyClass The class of the output key.
- * @param outputValueClass The class of the output value.
- * @param job The current job to adjust. Make sure the passed job is carrying
- * all necessary HBase configuration.
- * @param addDependencyJars upload HBase jars and jars for any of the
- * configured job classes via the distributed cache (tmpjars).
- * @param initCredentials whether to initialize hbase auth credentials for the job
- * @throws IOException When setting up the details fails.
- */
- public static void initTableMapperJob(List<Scan> scans,
- Class<? extends TableMapper> mapper,
- Class<?> outputKeyClass,
- Class<?> outputValueClass, Job job,
- boolean addDependencyJars,
- boolean initCredentials) throws IOException {
- job.setInputFormatClass(MultiTableInputFormat.class);
- if (outputValueClass != null) {
- job.setMapOutputValueClass(outputValueClass);
- }
- if (outputKeyClass != null) {
- job.setMapOutputKeyClass(outputKeyClass);
- }
- job.setMapperClass(mapper);
- Configuration conf = job.getConfiguration();
- HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
- List<String> scanStrings = new ArrayList<>();
-
- for (Scan scan : scans) {
- scanStrings.add(convertScanToString(scan));
- }
- job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
- scanStrings.toArray(new String[scanStrings.size()]));
-
- if (addDependencyJars) {
- addDependencyJars(job);
- }
-
- if (initCredentials) {
- initCredentials(job);
- }
- }
-
- public static void initCredentials(Job job) throws IOException {
- UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
- if (userProvider.isHadoopSecurityEnabled()) {
- // propagate delegation related props from launcher job to MR job
- if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
- job.getConfiguration().set("mapreduce.job.credentials.binary",
- System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
- }
- }
-
- if (userProvider.isHBaseSecurityEnabled()) {
- try {
- // init credentials for remote cluster
- String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
- User user = userProvider.getCurrent();
- if (quorumAddress != null) {
- Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
- quorumAddress, TableOutputFormat.OUTPUT_CONF_PREFIX);
- Connection peerConn = ConnectionFactory.createConnection(peerConf);
- try {
- TokenUtil.addTokenForJob(peerConn, user, job);
- } finally {
- peerConn.close();
- }
- }
-
- Connection conn = ConnectionFactory.createConnection(job.getConfiguration());
- try {
- TokenUtil.addTokenForJob(conn, user, job);
- } finally {
- conn.close();
- }
- } catch (InterruptedException ie) {
- LOG.info("Interrupted obtaining user authentication token");
- Thread.currentThread().interrupt();
- }
- }
- }
-
- /**
- * Obtain an authentication token, for the specified cluster, on behalf of the current user
- * and add it to the credentials for the given map reduce job.
- *
- * The quorumAddress is the key to the ZK ensemble, which contains:
- * hbase.zookeeper.quorum, hbase.zookeeper.client.port and
- * zookeeper.znode.parent
- *
- * @param job The job that requires the permission.
- * @param quorumAddress string that contains the 3 required configuratins
- * @throws IOException When the authentication token cannot be obtained.
- * @deprecated Since 1.2.0, use {@link #initCredentialsForCluster(Job, Configuration)} instead.
- */
- @Deprecated
- public static void initCredentialsForCluster(Job job, String quorumAddress)
- throws IOException {
- Configuration peerConf = HBaseConfiguration.createClusterConf(job.getConfiguration(),
- quorumAddress);
- initCredentialsForCluster(job, peerConf);
- }
-
- /**
- * Obtain an authentication token, for the specified cluster, on behalf of the current user
- * and add it to the credentials for the given map reduce job.
- *
- * @param job The job that requires the permission.
- * @param conf The configuration to use in connecting to the peer cluster
- * @throws IOException When the authentication token cannot be obtained.
- */
- public static void initCredentialsForCluster(Job job, Configuration conf)
- throws IOException {
- UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
- if (userProvider.isHBaseSecurityEnabled()) {
- try {
- Connection peerConn = ConnectionFactory.createConnection(conf);
- try {
- TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
- } finally {
- peerConn.close();
- }
- } catch (InterruptedException e) {
- LOG.info("Interrupted obtaining user authentication token");
- Thread.interrupted();
- }
- }
- }
-
- /**
- * Writes the given scan into a Base64 encoded string.
- *
- * @param scan The scan to write out.
- * @return The scan saved in a Base64 encoded string.
- * @throws IOException When writing the scan fails.
- */
- public static String convertScanToString(Scan scan) throws IOException {
- ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
- return Base64.encodeBytes(proto.toByteArray());
- }
-
- /**
- * Converts the given Base64 string back into a Scan instance.
- *
- * @param base64 The scan details.
- * @return The newly created Scan instance.
- * @throws IOException When reading the scan instance fails.
- */
- public static Scan convertStringToScan(String base64) throws IOException {
- byte [] decoded = Base64.decode(base64);
- return ProtobufUtil.toScan(ClientProtos.Scan.parseFrom(decoded));
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job to adjust.
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReducerJob(String table,
- Class<? extends TableReducer> reducer, Job job)
- throws IOException {
- initTableReducerJob(table, reducer, job, null);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job to adjust.
- * @param partitioner Partitioner to use. Pass <code>null</code> to use
- * default partitioner.
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReducerJob(String table,
- Class<? extends TableReducer> reducer, Job job,
- Class partitioner) throws IOException {
- initTableReducerJob(table, reducer, job, partitioner, null, null, null);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param partitioner Partitioner to use. Pass <code>null</code> to use
- * default partitioner.
- * @param quorumAddress Distant cluster to write to; default is null for
- * output to the cluster that is designated in <code>hbase-site.xml</code>.
- * Set this String to the zookeeper ensemble of an alternate remote cluster
- * when you would have the reduce write a cluster that is other than the
- * default; e.g. copying tables between clusters, the source would be
- * designated by <code>hbase-site.xml</code> and this param would have the
- * ensemble address of the remote cluster. The format to pass is particular.
- * Pass <code> <hbase.zookeeper.quorum>:<
- * hbase.zookeeper.client.port>:<zookeeper.znode.parent>
- * </code> such as <code>server,server2,server3:2181:/hbase</code>.
- * @param serverClass redefined hbase.regionserver.class
- * @param serverImpl redefined hbase.regionserver.impl
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReducerJob(String table,
- Class<? extends TableReducer> reducer, Job job,
- Class partitioner, String quorumAddress, String serverClass,
- String serverImpl) throws IOException {
- initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
- serverClass, serverImpl, true);
- }
-
- /**
- * Use this before submitting a TableReduce job. It will
- * appropriately set up the JobConf.
- *
- * @param table The output table.
- * @param reducer The reducer class to use.
- * @param job The current job to adjust. Make sure the passed job is
- * carrying all necessary HBase configuration.
- * @param partitioner Partitioner to use. Pass <code>null</code> to use
- * default partitioner.
- * @param quorumAddress Distant cluster to write to; default is null for
- * output to the cluster that is designated in <code>hbase-site.xml</code>.
- * Set this String to the zookeeper ensemble of an alternate remote cluster
- * when you would have the reduce write a cluster that is other than the
- * default; e.g. copying tables between clusters, the source would be
- * designated by <code>hbase-site.xml</code> and this param would have the
- * ensemble address of the remote cluster. The format to pass is particular.
- * Pass <code> <hbase.zookeeper.quorum>:<
- * hbase.zookeeper.client.port>:<zookeeper.znode.parent>
- * </code> such as <code>server,server2,server3:2181:/hbase</code>.
- * @param serverClass redefined hbase.regionserver.class
- * @param serverImpl redefined hbase.regionserver.impl
- * @param addDependencyJars upload HBase jars and jars for any of the configured
- * job classes via the distributed cache (tmpjars).
- * @throws IOException When determining the region count fails.
- */
- public static void initTableReducerJob(String table,
- Class<? extends TableReducer> reducer, Job job,
- Class partitioner, String quorumAddress, String serverClass,
- String serverImpl, boolean addDependencyJars) throws IOException {
-
- Configuration conf = job.getConfiguration();
- HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
- job.setOutputFormatClass(TableOutputFormat.class);
- if (reducer != null) job.setReducerClass(reducer);
- conf.set(TableOutputFormat.OUTPUT_TABLE, table);
- conf.setStrings("io.serializations", conf.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName());
- // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
- if (quorumAddress != null) {
- // Calling this will validate the format
- ZKConfig.validateClusterKey(quorumAddress);
- conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
- }
- if (serverClass != null && serverImpl != null) {
- conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
- conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
- }
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(Writable.class);
- if (partitioner == HRegionPartitioner.class) {
- job.setPartitionerClass(HRegionPartitioner.class);
- int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
- if (job.getNumReduceTasks() > regions) {
- job.setNumReduceTasks(regions);
- }
- } else if (partitioner != null) {
- job.setPartitionerClass(partitioner);
- }
-
- if (addDependencyJars) {
- addDependencyJars(job);
- }
-
- initCredentials(job);
- }
-
- /**
- * Ensures that the given number of reduce tasks for the given job
- * configuration does not exceed the number of regions for the given table.
- *
- * @param table The table to get the region count for.
- * @param job The current job to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- public static void limitNumReduceTasks(String table, Job job)
- throws IOException {
- int regions =
- MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table));
- if (job.getNumReduceTasks() > regions)
- job.setNumReduceTasks(regions);
- }
-
- /**
- * Sets the number of reduce tasks for the given job configuration to the
- * number of regions the given table has.
- *
- * @param table The table to get the region count for.
- * @param job The current job to adjust.
- * @throws IOException When retrieving the table details fails.
- */
- public static void setNumReduceTasks(String table, Job job)
- throws IOException {
- job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),
- TableName.valueOf(table)));
- }
-
- /**
- * Sets the number of rows to return and cache with each scanner iteration.
- * Higher caching values will enable faster mapreduce jobs at the expense of
- * requiring more heap to contain the cached rows.
- *
- * @param job The current job to adjust.
- * @param batchSize The number of rows to return in batch with each scanner
- * iteration.
- */
- public static void setScannerCaching(Job job, int batchSize) {
- job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
- }
-
- /**
- * Add HBase and its dependencies (only) to the job configuration.
- * <p>
- * This is intended as a low-level API, facilitating code reuse between this
- * class and its mapred counterpart. It also of use to external tools that
- * need to build a MapReduce job that interacts with HBase but want
- * fine-grained control over the jars shipped to the cluster.
- * </p>
- * @param conf The Configuration object to extend with dependencies.
- * @see org.apache.hadoop.hbase.mapred.TableMapReduceUtil
- * @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
- */
- public static void addHBaseDependencyJars(Configuration conf) throws IOException {
-
- // PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
- // dependencies, MR jobs that write encoded hfiles will fail.
- // We used reflection here so to prevent a circular module dependency.
- // TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
- Class prefixTreeCodecClass = null;
- try {
- prefixTreeCodecClass =
- Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
- } catch (ClassNotFoundException e) {
- // this will show up in unit tests but should not show in real deployments
- LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
- " Continuing without it.");
- }
-
- addDependencyJarsForClasses(conf,
- // explicitly pull a class from each module
- org.apache.hadoop.hbase.HConstants.class, // hbase-common
- org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // hbase-protocol
- org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.class, // hbase-protocol-shaded
- org.apache.hadoop.hbase.client.Put.class, // hbase-client
- org.apache.hadoop.hbase.CompatibilityFactory.class, // hbase-hadoop-compat
- org.apache.hadoop.hbase.mapreduce.JobUtil.class, // hbase-hadoop2-compat
- org.apache.hadoop.hbase.mapreduce.TableMapper.class, // hbase-server
- org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class, // hbase-metrics
- org.apache.hadoop.hbase.metrics.Snapshot.class, // hbase-metrics-api
- prefixTreeCodecClass, // hbase-prefix-tree (if null will be skipped)
- // pull necessary dependencies
- org.apache.zookeeper.ZooKeeper.class,
- org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
- com.google.protobuf.Message.class,
- org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists.class,
- org.apache.htrace.Trace.class,
- com.codahale.metrics.MetricRegistry.class);
- }
-
- /**
- * Returns a classpath string built from the content of the "tmpjars" value in {@code conf}.
- * Also exposed to shell scripts via `bin/hbase mapredcp`.
- */
- public static String buildDependencyClasspath(Configuration conf) {
- if (conf == null) {
- throw new IllegalArgumentException("Must provide a configuration object.");
- }
- Set<String> paths = new HashSet<>(conf.getStringCollection("tmpjars"));
- if (paths.isEmpty()) {
- throw new IllegalArgumentException("Configuration contains no tmpjars.");
- }
- StringBuilder sb = new StringBuilder();
- for (String s : paths) {
- // entries can take the form 'file:/path/to/file.jar'.
- int idx = s.indexOf(":");
- if (idx != -1) s = s.substring(idx + 1);
- if (sb.length() > 0) sb.append(File.pathSeparator);
- sb.append(s);
- }
- return sb.toString();
- }
-
- /**
- * Add the HBase dependency jars as well as jars for any of the configured
- * job classes to the job configuration, so that JobClient will ship them
- * to the cluster and add them to the DistributedCache.
- */
- public static void addDependencyJars(Job job) throws IOException {
- addHBaseDependencyJars(job.getConfiguration());
- try {
- addDependencyJarsForClasses(job.getConfiguration(),
- // when making changes here, consider also mapred.TableMapReduceUtil
- // pull job classes
- job.getMapOutputKeyClass(),
- job.getMapOutputValueClass(),
- job.getInputFormatClass(),
- job.getOutputKeyClass(),
- job.getOutputValueClass(),
- job.getOutputFormatClass(),
- job.getPartitionerClass(),
- job.getCombinerClass());
- } catch (ClassNotFoundException e) {
- throw new IOException(e);
- }
- }
-
- /**
- * Add the jars containing the given classes to the job's configuration
- * such that JobClient will ship them to the cluster and add them to
- * the DistributedCache.
- * @deprecated rely on {@link #addDependencyJars(Job)} instead.
- */
- @Deprecated
- public static void addDependencyJars(Configuration conf,
- Class<?>... classes) throws IOException {
- LOG.warn("The addDependencyJars(Configuration, Class<?>...) method has been deprecated since it"
- + " is easy to use incorrectly. Most users should rely on addDependencyJars(Job) " +
- "instead. See HBASE-8386 for more details.");
- addDependencyJarsForClasses(conf, classes);
- }
-
- /**
- * Add the jars containing the given classes to the job's configuration
- * such that JobClient will ship them to the cluster and add them to
- * the DistributedCache.
- *
- * N.B. that this method at most adds one jar per class given. If there is more than one
- * jar available containing a class with the same name as a given class, we don't define
- * which of those jars might be chosen.
- *
- * @param conf The Hadoop Configuration to modify
- * @param classes will add just those dependencies needed to find the given classes
- * @throws IOException if an underlying library call fails.
- */
- @InterfaceAudience.Private
- public static void addDependencyJarsForClasses(Configuration conf,
- Class<?>... classes) throws IOException {
-
- FileSystem localFs = FileSystem.getLocal(conf);
- Set<String> jars = new HashSet<>();
- // Add jars that are already in the tmpjars variable
- jars.addAll(conf.getStringCollection("tmpjars"));
-
- // add jars as we find them to a map of contents jar name so that we can avoid
- // creating new jars for classes that have already been packaged.
- Map<String, String> packagedClasses = new HashMap<>();
-
- // Add jars containing the specified classes
- for (Class<?> clazz : classes) {
- if (clazz == null) continue;
-
- Path path = findOrCreateJar(clazz, localFs, packagedClasses);
- if (path == null) {
- LOG.warn("Could not find jar for class " + clazz +
- " in order to ship it to the cluster.");
- continue;
- }
- if (!localFs.exists(path)) {
- LOG.warn("Could not validate jar file " + path + " for class "
- + clazz);
- continue;
- }
- jars.add(path.toString());
- }
- if (jars.isEmpty()) return;
-
- conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
- }
-
- /**
- * Finds the Jar for a class or creates it if it doesn't exist. If the class is in
- * a directory in the classpath, it creates a Jar on the fly with the
- * contents of the directory and returns the path to that Jar. If a Jar is
- * created, it is created in the system temporary directory. Otherwise,
- * returns an existing jar that contains a class of the same name. Maintains
- * a mapping from jar contents to the tmp jar created.
- * @param my_class the class to find.
- * @param fs the FileSystem with which to qualify the returned path.
- * @param packagedClasses a map of class name to path.
- * @return a jar file that contains the class.
- * @throws IOException
- */
- private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
- Map<String, String> packagedClasses)
- throws IOException {
- // attempt to locate an existing jar for the class.
- String jar = findContainingJar(my_class, packagedClasses);
- if (null == jar || jar.isEmpty()) {
- jar = getJar(my_class);
- updateMap(jar, packagedClasses);
- }
-
- if (null == jar || jar.isEmpty()) {
- return null;
- }
-
- LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
- return new Path(jar).makeQualified(fs);
- }
-
- /**
- * Add entries to <code>packagedClasses</code> corresponding to class files
- * contained in <code>jar</code>.
- * @param jar The jar who's content to list.
- * @param packagedClasses map[class -> jar]
- */
- private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
- if (null == jar || jar.isEmpty()) {
- return;
- }
- ZipFile zip = null;
- try {
- zip = new ZipFile(jar);
- for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
- ZipEntry entry = iter.nextElement();
- if (entry.getName().endsWith("class")) {
- packagedClasses.put(entry.getName(), jar);
- }
- }
- } finally {
- if (null != zip) zip.close();
- }
- }
-
- /**
- * Find a jar that contains a class of the same name, if any. It will return
- * a jar file, even if that is not the first thing on the class path that
- * has a class with the same name. Looks first on the classpath and then in
- * the <code>packagedClasses</code> map.
- * @param my_class the class to find.
- * @return a jar file that contains the class, or null.
- * @throws IOException
- */
- private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
- throws IOException {
- ClassLoader loader = my_class.getClassLoader();
-
- String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
-
- if (loader != null) {
- // first search the classpath
- for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
- URL url = itr.nextElement();
- if ("jar".equals(url.getProtocol())) {
- String toReturn = url.getPath();
- if (toReturn.startsWith("file:")) {
- toReturn = toReturn.substring("file:".length());
- }
- // URLDecoder is a misnamed class, since it actually decodes
- // x-www-form-urlencoded MIME type rather than actual
- // URL encoding (which the file path has). Therefore it would
- // decode +s to ' 's which is incorrect (spaces are actually
- // either unencoded or encoded as "%20"). Replace +s first, so
- // that they are kept sacred during the decoding process.
- toReturn = toReturn.replaceAll("\\+", "%2B");
- toReturn = URLDecoder.decode(toReturn, "UTF-8");
- return toReturn.replaceAll("!.*$", "");
- }
- }
- }
-
- // now look in any jars we've packaged using JarFinder. Returns null when
- // no jar is found.
- return packagedClasses.get(class_file);
- }
-
- /**
- * Invoke 'getJar' on a custom JarFinder implementation. Useful for some job
- * configuration contexts (HBASE-8140) and also for testing on MRv2.
- * check if we have HADOOP-9426.
- * @param my_class the class to find.
- * @return a jar file that contains the class, or null.
- */
- private static String getJar(Class<?> my_class) {
- String ret = null;
- try {
- ret = JarFinder.getJar(my_class);
- } catch (Exception e) {
- // toss all other exceptions, related to reflection failure
- throw new RuntimeException("getJar invocation failed.", e);
- }
-
- return ret;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
deleted file mode 100644
index 9a7dcb7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapper.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-
-/**
- * Extends the base <code>Mapper</code> class to add the required input key
- * and value classes.
- *
- * @param <KEYOUT> The type of the key.
- * @param <VALUEOUT> The type of the value.
- * @see org.apache.hadoop.mapreduce.Mapper
- */
-@InterfaceAudience.Public
-public abstract class TableMapper<KEYOUT, VALUEOUT>
-extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
deleted file mode 100644
index 749fd85..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputCommitter.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Small committer class that does not do anything.
- */
-@InterfaceAudience.Public
-public class TableOutputCommitter extends OutputCommitter {
-
- @Override
- public void abortTask(TaskAttemptContext arg0) throws IOException {
- }
-
- @Override
- public void cleanupJob(JobContext arg0) throws IOException {
- }
-
- @Override
- public void commitTask(TaskAttemptContext arg0) throws IOException {
- }
-
- @Override
- public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
- return false;
- }
-
- @Override
- public void setupJob(JobContext arg0) throws IOException {
- }
-
- @Override
- public void setupTask(TaskAttemptContext arg0) throws IOException {
- }
-
- public boolean isRecoverySupported() {
- return true;
- }
-
- public void recoverTask(TaskAttemptContext taskContext)
- throws IOException
- {
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
deleted file mode 100644
index 5986df8..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
- * while the output value <u>must</u> be either a {@link Put} or a
- * {@link Delete} instance.
- */
-@InterfaceAudience.Public
-public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
-implements Configurable {
-
- private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
-
- /** Job parameter that specifies the output table. */
- public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
-
- /**
- * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
- * For keys matching this prefix, the prefix is stripped, and the value is set in the
- * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
- * would be set in the configuration as "key1 = value1". Use this to set properties
- * which should only be applied to the {@code TableOutputFormat} configuration and not the
- * input configuration.
- */
- public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
-
- /**
- * Optional job parameter to specify a peer cluster.
- * Used specifying remote cluster when copying between hbase clusters (the
- * source is picked up from <code>hbase-site.xml</code>).
- * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
- */
- public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
-
- /** Optional job parameter to specify peer cluster's ZK client port */
- public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
-
- /** Optional specification of the rs class name of the peer cluster */
- public static final String
- REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
- /** Optional specification of the rs impl name of the peer cluster */
- public static final String
- REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
-
- /** The configuration. */
- private Configuration conf = null;
-
- /**
- * Writes the reducer output to an HBase table.
- */
- protected class TableRecordWriter
- extends RecordWriter<KEY, Mutation> {
-
- private Connection connection;
- private BufferedMutator mutator;
-
- /**
- * @throws IOException
- *
- */
- public TableRecordWriter() throws IOException {
- String tableName = conf.get(OUTPUT_TABLE);
- this.connection = ConnectionFactory.createConnection(conf);
- this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
- LOG.info("Created table instance for " + tableName);
- }
- /**
- * Closes the writer, in this case flush table commits.
- *
- * @param context The context.
- * @throws IOException When closing the writer fails.
- * @see RecordWriter#close(TaskAttemptContext)
- */
- @Override
- public void close(TaskAttemptContext context) throws IOException {
- try {
- if (mutator != null) {
- mutator.close();
- }
- } finally {
- if (connection != null) {
- connection.close();
- }
- }
- }
-
- /**
- * Writes a key/value pair into the table.
- *
- * @param key The key.
- * @param value The value.
- * @throws IOException When writing fails.
- * @see RecordWriter#write(Object, Object)
- */
- @Override
- public void write(KEY key, Mutation value)
- throws IOException {
- if (!(value instanceof Put) && !(value instanceof Delete)) {
- throw new IOException("Pass a Delete or a Put");
- }
- mutator.mutate(value);
- }
- }
-
- /**
- * Creates a new record writer.
- *
- * Be aware that the baseline javadoc gives the impression that there is a single
- * {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
- * RecordWriter per call of this method. You must close the returned RecordWriter when done.
- * Failure to do so will drop writes.
- *
- * @param context The current task context.
- * @return The newly created writer instance.
- * @throws IOException When creating the writer fails.
- * @throws InterruptedException When the jobs is cancelled.
- */
- @Override
- public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TableRecordWriter();
- }
-
- /**
- * Checks if the output table exists and is enabled.
- *
- * @param context The current context.
- * @throws IOException When the check fails.
- * @throws InterruptedException When the job is aborted.
- * @see OutputFormat#checkOutputSpecs(JobContext)
- */
- @Override
- public void checkOutputSpecs(JobContext context) throws IOException,
- InterruptedException {
-
- try (Admin admin = ConnectionFactory.createConnection(getConf()).getAdmin()) {
- TableName tableName = TableName.valueOf(this.conf.get(OUTPUT_TABLE));
- if (!admin.tableExists(tableName)) {
- throw new TableNotFoundException("Can't write, table does not exist:" +
- tableName.getNameAsString());
- }
-
- if (!admin.isTableEnabled(tableName)) {
- throw new TableNotEnabledException("Can't write, table is not enabled: " +
- tableName.getNameAsString());
- }
- }
- }
-
- /**
- * Returns the output committer.
- *
- * @param context The current context.
- * @return The committer.
- * @throws IOException When creating the committer fails.
- * @throws InterruptedException When the job is aborted.
- * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
- */
- @Override
- public OutputCommitter getOutputCommitter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TableOutputCommitter();
- }
-
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- @Override
- public void setConf(Configuration otherConf) {
- String tableName = otherConf.get(OUTPUT_TABLE);
- if(tableName == null || tableName.length() <= 0) {
- throw new IllegalArgumentException("Must specify table name");
- }
-
- String address = otherConf.get(QUORUM_ADDRESS);
- int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
- String serverClass = otherConf.get(REGION_SERVER_CLASS);
- String serverImpl = otherConf.get(REGION_SERVER_IMPL);
-
- try {
- this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
-
- if (serverClass != null) {
- this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
- }
- if (zkClientPort != 0) {
- this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
- }
- } catch(IOException e) {
- LOG.error(e);
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
deleted file mode 100644
index f66520b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReader.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
- * pairs.
- */
-@InterfaceAudience.Public
-public class TableRecordReader
-extends RecordReader<ImmutableBytesWritable, Result> {
-
- private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
-
- /**
- * Restart from survivable exceptions by creating a new scanner.
- *
- * @param firstRow The first row to start at.
- * @throws IOException When restarting fails.
- */
- public void restart(byte[] firstRow) throws IOException {
- this.recordReaderImpl.restart(firstRow);
- }
-
- /**
- * @param table the {@link Table} to scan.
- */
- public void setTable(Table table) {
- this.recordReaderImpl.setHTable(table);
- }
-
- /**
- * Sets the scan defining the actual details like columns etc.
- *
- * @param scan The scan to set.
- */
- public void setScan(Scan scan) {
- this.recordReaderImpl.setScan(scan);
- }
-
- /**
- * Closes the split.
- *
- * @see org.apache.hadoop.mapreduce.RecordReader#close()
- */
- @Override
- public void close() {
- this.recordReaderImpl.close();
- }
-
- /**
- * Returns the current key.
- *
- * @return The current key.
- * @throws IOException
- * @throws InterruptedException When the job is aborted.
- * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey()
- */
- @Override
- public ImmutableBytesWritable getCurrentKey() throws IOException,
- InterruptedException {
- return this.recordReaderImpl.getCurrentKey();
- }
-
- /**
- * Returns the current value.
- *
- * @return The current value.
- * @throws IOException When the value is faulty.
- * @throws InterruptedException When the job is aborted.
- * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue()
- */
- @Override
- public Result getCurrentValue() throws IOException, InterruptedException {
- return this.recordReaderImpl.getCurrentValue();
- }
-
- /**
- * Initializes the reader.
- *
- * @param inputsplit The split to work with.
- * @param context The current task context.
- * @throws IOException When setting up the reader fails.
- * @throws InterruptedException When the job is aborted.
- * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
- * org.apache.hadoop.mapreduce.InputSplit,
- * org.apache.hadoop.mapreduce.TaskAttemptContext)
- */
- @Override
- public void initialize(InputSplit inputsplit,
- TaskAttemptContext context) throws IOException,
- InterruptedException {
- this.recordReaderImpl.initialize(inputsplit, context);
- }
-
- /**
- * Positions the record reader to the next record.
- *
- * @return <code>true</code> if there was another record.
- * @throws IOException When reading the record failed.
- * @throws InterruptedException When the job was aborted.
- * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue()
- */
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- return this.recordReaderImpl.nextKeyValue();
- }
-
- /**
- * The current progress of the record reader through its data.
- *
- * @return A number between 0.0 and 1.0, the fraction of the data read.
- * @see org.apache.hadoop.mapreduce.RecordReader#getProgress()
- */
- @Override
- public float getProgress() {
- return this.recordReaderImpl.getProgress();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
deleted file mode 100644
index 9a1c98e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java
+++ /dev/null
@@ -1,315 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.ScannerCallable;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.util.StringUtils;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
- * pairs.
- */
-@InterfaceAudience.Public
-public class TableRecordReaderImpl {
- public static final String LOG_PER_ROW_COUNT
- = "hbase.mapreduce.log.scanner.rowcount";
-
- private static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
-
- // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase
- @VisibleForTesting
- static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
- private ResultScanner scanner = null;
- private Scan scan = null;
- private Scan currentScan = null;
- private Table htable = null;
- private byte[] lastSuccessfulRow = null;
- private ImmutableBytesWritable key = null;
- private Result value = null;
- private TaskAttemptContext context = null;
- private Method getCounter = null;
- private long numRestarts = 0;
- private long numStale = 0;
- private long timestamp;
- private int rowcount;
- private boolean logScannerActivity = false;
- private int logPerRowCount = 100;
-
- /**
- * Restart from survivable exceptions by creating a new scanner.
- *
- * @param firstRow The first row to start at.
- * @throws IOException When restarting fails.
- */
- public void restart(byte[] firstRow) throws IOException {
- currentScan = new Scan(scan);
- currentScan.withStartRow(firstRow);
- currentScan.setScanMetricsEnabled(true);
- if (this.scanner != null) {
- if (logScannerActivity) {
- LOG.info("Closing the previously opened scanner object.");
- }
- this.scanner.close();
- }
- this.scanner = this.htable.getScanner(currentScan);
- if (logScannerActivity) {
- LOG.info("Current scan=" + currentScan.toString());
- timestamp = System.currentTimeMillis();
- rowcount = 0;
- }
- }
-
- /**
- * In new mapreduce APIs, TaskAttemptContext has two getCounter methods
- * Check if getCounter(String, String) method is available.
- * @return The getCounter method or null if not available.
- * @throws IOException
- */
- protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
- throws IOException {
- Method m = null;
- try {
- m = context.getClass().getMethod("getCounter",
- new Class [] {String.class, String.class});
- } catch (SecurityException e) {
- throw new IOException("Failed test for getCounter", e);
- } catch (NoSuchMethodException e) {
- // Ignore
- }
- return m;
- }
-
- /**
- * Sets the HBase table.
- *
- * @param htable The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
- */
- public void setHTable(Table htable) {
- Configuration conf = htable.getConfiguration();
- logScannerActivity = conf.getBoolean(
- ScannerCallable.LOG_SCANNER_ACTIVITY, false);
- logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
- this.htable = htable;
- }
-
- /**
- * Sets the scan defining the actual details like columns etc.
- *
- * @param scan The scan to set.
- */
- public void setScan(Scan scan) {
- this.scan = scan;
- }
-
- /**
- * Build the scanner. Not done in constructor to allow for extension.
- *
- * @throws IOException
- * @throws InterruptedException
- */
- public void initialize(InputSplit inputsplit,
- TaskAttemptContext context) throws IOException,
- InterruptedException {
- if (context != null) {
- this.context = context;
- getCounter = retrieveGetCounterWithStringsParams(context);
- }
- restart(scan.getStartRow());
- }
-
- /**
- * Closes the split.
- *
- *
- */
- public void close() {
- if (this.scanner != null) {
- this.scanner.close();
- }
- try {
- this.htable.close();
- } catch (IOException ioe) {
- LOG.warn("Error closing table", ioe);
- }
- }
-
- /**
- * Returns the current key.
- *
- * @return The current key.
- * @throws IOException
- * @throws InterruptedException When the job is aborted.
- */
- public ImmutableBytesWritable getCurrentKey() throws IOException,
- InterruptedException {
- return key;
- }
-
- /**
- * Returns the current value.
- *
- * @return The current value.
- * @throws IOException When the value is faulty.
- * @throws InterruptedException When the job is aborted.
- */
- public Result getCurrentValue() throws IOException, InterruptedException {
- return value;
- }
-
-
- /**
- * Positions the record reader to the next record.
- *
- * @return <code>true</code> if there was another record.
- * @throws IOException When reading the record failed.
- * @throws InterruptedException When the job was aborted.
- */
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (key == null) key = new ImmutableBytesWritable();
- if (value == null) value = new Result();
- try {
- try {
- value = this.scanner.next();
- if (value != null && value.isStale()) numStale++;
- if (logScannerActivity) {
- rowcount ++;
- if (rowcount >= logPerRowCount) {
- long now = System.currentTimeMillis();
- LOG.info("Mapper took " + (now-timestamp)
- + "ms to process " + rowcount + " rows");
- timestamp = now;
- rowcount = 0;
- }
- }
- } catch (IOException e) {
- // do not retry if the exception tells us not to do so
- if (e instanceof DoNotRetryIOException) {
- throw e;
- }
- // try to handle all other IOExceptions by restarting
- // the scanner, if the second call fails, it will be rethrown
- LOG.info("recovered from " + StringUtils.stringifyException(e));
- if (lastSuccessfulRow == null) {
- LOG.warn("We are restarting the first next() invocation," +
- " if your mapper has restarted a few other times like this" +
- " then you should consider killing this job and investigate" +
- " why it's taking so long.");
- }
- if (lastSuccessfulRow == null) {
- restart(scan.getStartRow());
- } else {
- restart(lastSuccessfulRow);
- scanner.next(); // skip presumed already mapped row
- }
- value = scanner.next();
- if (value != null && value.isStale()) numStale++;
- numRestarts++;
- }
- if (value != null && value.size() > 0) {
- key.set(value.getRow());
- lastSuccessfulRow = key.get();
- return true;
- }
-
- updateCounters();
- return false;
- } catch (IOException ioe) {
- if (logScannerActivity) {
- long now = System.currentTimeMillis();
- LOG.info("Mapper took " + (now-timestamp)
- + "ms to process " + rowcount + " rows");
- LOG.info(ioe);
- String lastRow = lastSuccessfulRow == null ?
- "null" : Bytes.toStringBinary(lastSuccessfulRow);
- LOG.info("lastSuccessfulRow=" + lastRow);
- }
- throw ioe;
- }
- }
-
- /**
- * If hbase runs on new version of mapreduce, RecordReader has access to
- * counters thus can update counters based on scanMetrics.
- * If hbase runs on old version of mapreduce, it won't be able to get
- * access to counters and TableRecorderReader can't update counter values.
- * @throws IOException
- */
- private void updateCounters() throws IOException {
- ScanMetrics scanMetrics = scanner.getScanMetrics();
- if (scanMetrics == null) {
- return;
- }
-
- updateCounters(scanMetrics, numRestarts, getCounter, context, numStale);
- }
-
- protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
- Method getCounter, TaskAttemptContext context, long numStale) {
- // we can get access to counters only if hbase uses new mapreduce APIs
- if (getCounter == null) {
- return;
- }
-
- try {
- for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
- Counter ct = (Counter)getCounter.invoke(context,
- HBASE_COUNTER_GROUP_NAME, entry.getKey());
-
- ct.increment(entry.getValue());
- }
- ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
- "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
- ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
- "NUM_SCAN_RESULTS_STALE")).increment(numStale);
- } catch (Exception e) {
- LOG.debug("can't update counter." + StringUtils.stringifyException(e));
- }
- }
-
- /**
- * The current progress of the record reader through its data.
- *
- * @return A number between 0.0 and 1.0, the fraction of the data read.
- */
- public float getProgress() {
- // Depends on the total number of tuples
- return 0;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
deleted file mode 100644
index f0bfc74..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableReducer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.mapreduce.Reducer;
-
-/**
- * Extends the basic <code>Reducer</code> class to add the required key and
- * value input/output classes. While the input key and value as well as the
- * output key can be anything handed in from the previous map phase the output
- * value <u>must</u> be either a {@link org.apache.hadoop.hbase.client.Put Put}
- * or a {@link org.apache.hadoop.hbase.client.Delete Delete} instance when
- * using the {@link TableOutputFormat} class.
- * <p>
- * This class is extended by {@link IdentityTableReducer} but can also be
- * subclassed to implement similar features or any custom code needed. It has
- * the advantage to enforce the output value to a specific basic type.
- *
- * @param <KEYIN> The type of the input key.
- * @param <VALUEIN> The type of the input value.
- * @param <KEYOUT> The type of the output key.
- * @see org.apache.hadoop.mapreduce.Reducer
- */
-@InterfaceAudience.Public
-public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT>
-extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
deleted file mode 100644
index 7e59c3b..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. The job
- * bypasses HBase servers, and directly accesses the underlying files (hfile, recovered edits,
- * wals, etc) directly to provide maximum performance. The snapshot is not required to be
- * restored to the live cluster or cloned. This also allows to run the mapreduce job from an
- * online or offline hbase cluster. The snapshot files can be exported by using the
- * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool, to a pure-hdfs cluster,
- * and this InputFormat can be used to run the mapreduce job directly over the snapshot files.
- * The snapshot should not be deleted while there are jobs reading from snapshot files.
- * <p>
- * Usage is similar to TableInputFormat, and
- * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job,
- * boolean, Path)}
- * can be used to configure the job.
- * <pre>{@code
- * Job job = new Job(conf);
- * Scan scan = new Scan();
- * TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- * scan, MyTableMapper.class, MyMapKeyOutput.class,
- * MyMapOutputValueWritable.class, job, true);
- * }
- * </pre>
- * <p>
- * Internally, this input format restores the snapshot into the given tmp directory. Similar to
- * {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading
- * from each RecordReader. An internal RegionScanner is used to execute the
- * {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
- * <p>
- * HBase owns all the data and snapshot files on the filesystem. Only the 'hbase' user can read from
- * snapshot files and data files.
- * To read from snapshot files directly from the file system, the user who is running the MR job
- * must have sufficient permissions to access snapshot and reference files.
- * This means that to run mapreduce over snapshot files, the MR job has to be run as the HBase
- * user or the user must have group or other privileges in the filesystem (See HBASE-8369).
- * Note that, given other users access to read from snapshot/data files will completely circumvent
- * the access control enforced by HBase.
- * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
- */
-@InterfaceAudience.Public
-public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
-
- public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
- private TableSnapshotInputFormatImpl.InputSplit delegate;
-
- // constructor for mapreduce framework / Writable
- public TableSnapshotRegionSplit() {
- this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
- }
-
- public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
- this.delegate = delegate;
- }
-
- public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
- List<String> locations, Scan scan, Path restoreDir) {
- this.delegate =
- new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
- }
-
- @Override
- public long getLength() throws IOException, InterruptedException {
- return delegate.getLength();
- }
-
- @Override
- public String[] getLocations() throws IOException, InterruptedException {
- return delegate.getLocations();
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- delegate.write(out);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- delegate.readFields(in);
- }
-
- public HRegionInfo getRegionInfo() {
- return delegate.getRegionInfo();
- }
-
- }
-
- @VisibleForTesting
- static class TableSnapshotRegionRecordReader extends
- RecordReader<ImmutableBytesWritable, Result> {
- private TableSnapshotInputFormatImpl.RecordReader delegate =
- new TableSnapshotInputFormatImpl.RecordReader();
- private TaskAttemptContext context;
- private Method getCounter;
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
- InterruptedException {
- this.context = context;
- getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
- delegate.initialize(
- ((TableSnapshotRegionSplit) split).delegate,
- context.getConfiguration());
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- boolean result = delegate.nextKeyValue();
- if (result) {
- ScanMetrics scanMetrics = delegate.getScanner().getScanMetrics();
- if (scanMetrics != null && context != null) {
- TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context, 0);
- }
- }
- return result;
- }
-
- @Override
- public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
- return delegate.getCurrentKey();
- }
-
- @Override
- public Result getCurrentValue() throws IOException, InterruptedException {
- return delegate.getCurrentValue();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- return delegate.getProgress();
- }
-
- @Override
- public void close() throws IOException {
- delegate.close();
- }
- }
-
- @Override
- public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
- InputSplit split, TaskAttemptContext context) throws IOException {
- return new TableSnapshotRegionRecordReader();
- }
-
- @Override
- public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
- List<InputSplit> results = new ArrayList<>();
- for (TableSnapshotInputFormatImpl.InputSplit split :
- TableSnapshotInputFormatImpl.getSplits(job.getConfiguration())) {
- results.add(new TableSnapshotRegionSplit(split));
- }
- return results;
- }
-
- /**
- * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
- * @param job the job to configure
- * @param snapshotName the name of the snapshot to read from
- * @param restoreDir a temporary directory to restore the snapshot into. Current user should
- * have write permissions to this directory, and this should not be a subdirectory of rootdir.
- * After the job is finished, restoreDir can be deleted.
- * @throws IOException if an error occurs
- */
- public static void setInput(Job job, String snapshotName, Path restoreDir)
- throws IOException {
- TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
- }
-}
[09/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
deleted file mode 100644
index e669f14..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ScanPerformanceEvaluation.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase;
-
-import java.io.IOException;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.TableSnapshotScanner;
-import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.mapreduce.TableMapper;
-import org.apache.hadoop.hbase.util.AbstractHBaseTool;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Stopwatch;
-
-/**
- * A simple performance evaluation tool for single client and MR scans
- * and snapshot scans.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class ScanPerformanceEvaluation extends AbstractHBaseTool {
-
- private static final String HBASE_COUNTER_GROUP_NAME = "HBase Counters";
-
- private String type;
- private String file;
- private String tablename;
- private String snapshotName;
- private String restoreDir;
- private String caching;
-
- @Override
- public void setConf(Configuration conf) {
- super.setConf(conf);
- Path rootDir;
- try {
- rootDir = FSUtils.getRootDir(conf);
- rootDir.getFileSystem(conf);
- } catch (IOException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- @Override
- protected void addOptions() {
- this.addRequiredOptWithArg("t", "type", "the type of the test. One of the following: streaming|scan|snapshotscan|scanmapreduce|snapshotscanmapreduce");
- this.addOptWithArg("f", "file", "the filename to read from");
- this.addOptWithArg("tn", "table", "the tablename to read from");
- this.addOptWithArg("sn", "snapshot", "the snapshot name to read from");
- this.addOptWithArg("rs", "restoredir", "the directory to restore the snapshot");
- this.addOptWithArg("ch", "caching", "scanner caching value");
- }
-
- @Override
- protected void processOptions(CommandLine cmd) {
- type = cmd.getOptionValue("type");
- file = cmd.getOptionValue("file");
- tablename = cmd.getOptionValue("table");
- snapshotName = cmd.getOptionValue("snapshot");
- restoreDir = cmd.getOptionValue("restoredir");
- caching = cmd.getOptionValue("caching");
- }
-
- protected void testHdfsStreaming(Path filename) throws IOException {
- byte[] buf = new byte[1024];
- FileSystem fs = filename.getFileSystem(getConf());
-
- // read the file from start to finish
- Stopwatch fileOpenTimer = Stopwatch.createUnstarted();
- Stopwatch streamTimer = Stopwatch.createUnstarted();
-
- fileOpenTimer.start();
- FSDataInputStream in = fs.open(filename);
- fileOpenTimer.stop();
-
- long totalBytes = 0;
- streamTimer.start();
- while (true) {
- int read = in.read(buf);
- if (read < 0) {
- break;
- }
- totalBytes += read;
- }
- streamTimer.stop();
-
- double throughput = (double)totalBytes / streamTimer.elapsed(TimeUnit.SECONDS);
-
- System.out.println("HDFS streaming: ");
- System.out.println("total time to open: " +
- fileOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to read: " + streamTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total bytes: " + totalBytes + " bytes ("
- + StringUtils.humanReadableInt(totalBytes) + ")");
- System.out.println("throghput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
- }
-
- private Scan getScan() {
- Scan scan = new Scan(); // default scan settings
- scan.setCacheBlocks(false);
- scan.setMaxVersions(1);
- scan.setScanMetricsEnabled(true);
- if (caching != null) {
- scan.setCaching(Integer.parseInt(caching));
- }
-
- return scan;
- }
-
- public void testScan() throws IOException {
- Stopwatch tableOpenTimer = Stopwatch.createUnstarted();
- Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
- Stopwatch scanTimer = Stopwatch.createUnstarted();
-
- tableOpenTimer.start();
- Connection connection = ConnectionFactory.createConnection(getConf());
- Table table = connection.getTable(TableName.valueOf(tablename));
- tableOpenTimer.stop();
-
- Scan scan = getScan();
- scanOpenTimer.start();
- ResultScanner scanner = table.getScanner(scan);
- scanOpenTimer.stop();
-
- long numRows = 0;
- long numCells = 0;
- scanTimer.start();
- while (true) {
- Result result = scanner.next();
- if (result == null) {
- break;
- }
- numRows++;
-
- numCells += result.rawCells().length;
- }
- scanTimer.stop();
- scanner.close();
- table.close();
- connection.close();
-
- ScanMetrics metrics = scan.getScanMetrics();
- long totalBytes = metrics.countOfBytesInResults.get();
- double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
- System.out.println("HBase scan: ");
- System.out.println("total time to open table: " +
- tableOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to open scanner: " +
- scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to scan: " +
- scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
- System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
-
- System.out.println("total bytes: " + totalBytes + " bytes ("
- + StringUtils.humanReadableInt(totalBytes) + ")");
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
- System.out.println("total rows : " + numRows);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
- System.out.println("total cells : " + numCells);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
- }
-
-
- public void testSnapshotScan() throws IOException {
- Stopwatch snapshotRestoreTimer = Stopwatch.createUnstarted();
- Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
- Stopwatch scanTimer = Stopwatch.createUnstarted();
-
- Path restoreDir = new Path(this.restoreDir);
-
- snapshotRestoreTimer.start();
- restoreDir.getFileSystem(conf).delete(restoreDir, true);
- snapshotRestoreTimer.stop();
-
- Scan scan = getScan();
- scanOpenTimer.start();
- TableSnapshotScanner scanner = new TableSnapshotScanner(conf, restoreDir, snapshotName, scan);
- scanOpenTimer.stop();
-
- long numRows = 0;
- long numCells = 0;
- scanTimer.start();
- while (true) {
- Result result = scanner.next();
- if (result == null) {
- break;
- }
- numRows++;
-
- numCells += result.rawCells().length;
- }
- scanTimer.stop();
- scanner.close();
-
- ScanMetrics metrics = scanner.getScanMetrics();
- long totalBytes = metrics.countOfBytesInResults.get();
- double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
- System.out.println("HBase scan snapshot: ");
- System.out.println("total time to restore snapshot: " +
- snapshotRestoreTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to open scanner: " +
- scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to scan: " +
- scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
- System.out.println("Scan metrics:\n" + metrics.getMetricsMap());
-
- System.out.println("total bytes: " + totalBytes + " bytes ("
- + StringUtils.humanReadableInt(totalBytes) + ")");
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
- System.out.println("total rows : " + numRows);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
- System.out.println("total cells : " + numCells);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
-
- }
-
- public static enum ScanCounter {
- NUM_ROWS,
- NUM_CELLS,
- }
-
- public static class MyMapper<KEYOUT, VALUEOUT> extends TableMapper<KEYOUT, VALUEOUT> {
- @Override
- protected void map(ImmutableBytesWritable key, Result value,
- Context context) throws IOException,
- InterruptedException {
- context.getCounter(ScanCounter.NUM_ROWS).increment(1);
- context.getCounter(ScanCounter.NUM_CELLS).increment(value.rawCells().length);
- }
- }
-
- public void testScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
- Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
- Stopwatch scanTimer = Stopwatch.createUnstarted();
-
- Scan scan = getScan();
-
- String jobName = "testScanMapReduce";
-
- Job job = new Job(conf);
- job.setJobName(jobName);
-
- job.setJarByClass(getClass());
-
- TableMapReduceUtil.initTableMapperJob(
- this.tablename,
- scan,
- MyMapper.class,
- NullWritable.class,
- NullWritable.class,
- job
- );
-
- job.setNumReduceTasks(0);
- job.setOutputKeyClass(NullWritable.class);
- job.setOutputValueClass(NullWritable.class);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- scanTimer.start();
- job.waitForCompletion(true);
- scanTimer.stop();
-
- Counters counters = job.getCounters();
- long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
- long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
-
- long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
- double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
- System.out.println("HBase scan mapreduce: ");
- System.out.println("total time to open scanner: " +
- scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
- System.out.println("total bytes: " + totalBytes + " bytes ("
- + StringUtils.humanReadableInt(totalBytes) + ")");
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
- System.out.println("total rows : " + numRows);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
- System.out.println("total cells : " + numCells);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
- }
-
- public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
- Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
- Stopwatch scanTimer = Stopwatch.createUnstarted();
-
- Scan scan = getScan();
-
- String jobName = "testSnapshotScanMapReduce";
-
- Job job = new Job(conf);
- job.setJobName(jobName);
-
- job.setJarByClass(getClass());
-
- TableMapReduceUtil.initTableSnapshotMapperJob(
- this.snapshotName,
- scan,
- MyMapper.class,
- NullWritable.class,
- NullWritable.class,
- job,
- true,
- new Path(restoreDir)
- );
-
- job.setNumReduceTasks(0);
- job.setOutputKeyClass(NullWritable.class);
- job.setOutputValueClass(NullWritable.class);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- scanTimer.start();
- job.waitForCompletion(true);
- scanTimer.stop();
-
- Counters counters = job.getCounters();
- long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
- long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
-
- long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
- double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
- double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
-
- System.out.println("HBase scan mapreduce: ");
- System.out.println("total time to open scanner: " +
- scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
- System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
-
- System.out.println("total bytes: " + totalBytes + " bytes ("
- + StringUtils.humanReadableInt(totalBytes) + ")");
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
- System.out.println("total rows : " + numRows);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
- System.out.println("total cells : " + numCells);
- System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
- }
-
- @Override
- protected int doWork() throws Exception {
- if (type.equals("streaming")) {
- testHdfsStreaming(new Path(file));
- } else if (type.equals("scan")){
- testScan();
- } else if (type.equals("snapshotscan")) {
- testSnapshotScan();
- } else if (type.equals("scanmapreduce")) {
- testScanMapReduce();
- } else if (type.equals("snapshotscanmapreduce")) {
- testSnapshotScanMapReduce();
- }
- return 0;
- }
-
- public static void main (String[] args) throws Exception {
- int ret = ToolRunner.run(HBaseConfiguration.create(), new ScanPerformanceEvaluation(), args);
- System.exit(ret);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
deleted file mode 100644
index 86a3d3f..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase;
-
-import static org.junit.Assert.*;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.util.NoSuchElementException;
-import java.util.Queue;
-import java.util.Random;
-import java.util.LinkedList;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.PerformanceEvaluation.RandomReadTest;
-import org.apache.hadoop.hbase.PerformanceEvaluation.TestOptions;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.codehaus.jackson.JsonGenerationException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.Snapshot;
-import com.codahale.metrics.UniformReservoir;
-
-@Category({MiscTests.class, SmallTests.class})
-public class TestPerformanceEvaluation {
- private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
-
- @Test
- public void testSerialization()
- throws JsonGenerationException, JsonMappingException, IOException {
- PerformanceEvaluation.TestOptions options = new PerformanceEvaluation.TestOptions();
- assertTrue(!options.isAutoFlush());
- options.setAutoFlush(true);
- ObjectMapper mapper = new ObjectMapper();
- String optionsString = mapper.writeValueAsString(options);
- PerformanceEvaluation.TestOptions optionsDeserialized =
- mapper.readValue(optionsString, PerformanceEvaluation.TestOptions.class);
- assertTrue(optionsDeserialized.isAutoFlush());
- }
-
- /**
- * Exercise the mr spec writing. Simple assertions to make sure it is basically working.
- * @throws IOException
- */
- @Ignore @Test
- public void testWriteInputFile() throws IOException {
- TestOptions opts = new PerformanceEvaluation.TestOptions();
- final int clients = 10;
- opts.setNumClientThreads(clients);
- opts.setPerClientRunRows(10);
- Path dir =
- PerformanceEvaluation.writeInputFile(HTU.getConfiguration(), opts, HTU.getDataTestDir());
- FileSystem fs = FileSystem.get(HTU.getConfiguration());
- Path p = new Path(dir, PerformanceEvaluation.JOB_INPUT_FILENAME);
- long len = fs.getFileStatus(p).getLen();
- assertTrue(len > 0);
- byte [] content = new byte[(int)len];
- FSDataInputStream dis = fs.open(p);
- try {
- dis.readFully(content);
- BufferedReader br =
- new BufferedReader(new InputStreamReader(new ByteArrayInputStream(content)));
- int count = 0;
- while (br.readLine() != null) {
- count++;
- }
- assertEquals(clients, count);
- } finally {
- dis.close();
- }
- }
-
- @Test
- public void testSizeCalculation() {
- TestOptions opts = new PerformanceEvaluation.TestOptions();
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- int rows = opts.getPerClientRunRows();
- // Default row count
- final int defaultPerClientRunRows = 1024 * 1024;
- assertEquals(defaultPerClientRunRows, rows);
- // If size is 2G, then twice the row count.
- opts.setSize(2.0f);
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
- // If two clients, then they get half the rows each.
- opts.setNumClientThreads(2);
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- assertEquals(defaultPerClientRunRows, opts.getPerClientRunRows());
- // What if valueSize is 'random'? Then half of the valueSize so twice the rows.
- opts.valueRandom = true;
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- assertEquals(defaultPerClientRunRows * 2, opts.getPerClientRunRows());
- }
-
- @Test
- public void testRandomReadCalculation() {
- TestOptions opts = new PerformanceEvaluation.TestOptions();
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- int rows = opts.getPerClientRunRows();
- // Default row count
- final int defaultPerClientRunRows = 1024 * 1024;
- assertEquals(defaultPerClientRunRows, rows);
- // If size is 2G, then twice the row count.
- opts.setSize(2.0f);
- opts.setPerClientRunRows(1000);
- opts.setCmdName(PerformanceEvaluation.RANDOM_READ);
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- assertEquals(1000, opts.getPerClientRunRows());
- // If two clients, then they get half the rows each.
- opts.setNumClientThreads(2);
- opts = PerformanceEvaluation.calculateRowsAndSize(opts);
- assertEquals(1000, opts.getPerClientRunRows());
- Random random = new Random();
- // assuming we will get one before this loop expires
- boolean foundValue = false;
- for (int i = 0; i < 10000000; i++) {
- int randomRow = PerformanceEvaluation.generateRandomRow(random, opts.totalRows);
- if (randomRow > 1000) {
- foundValue = true;
- break;
- }
- }
- assertTrue("We need to get a value more than 1000", foundValue);
- }
-
- @Test
- public void testZipfian()
- throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException,
- IllegalArgumentException, InvocationTargetException {
- TestOptions opts = new PerformanceEvaluation.TestOptions();
- opts.setValueZipf(true);
- final int valueSize = 1024;
- opts.setValueSize(valueSize);
- RandomReadTest rrt = new RandomReadTest(null, opts, null);
- Constructor<?> ctor =
- Histogram.class.getDeclaredConstructor(com.codahale.metrics.Reservoir.class);
- ctor.setAccessible(true);
- Histogram histogram = (Histogram)ctor.newInstance(new UniformReservoir(1024 * 500));
- for (int i = 0; i < 100; i++) {
- histogram.update(rrt.getValueLength(null));
- }
- Snapshot snapshot = histogram.getSnapshot();
- double stddev = snapshot.getStdDev();
- assertTrue(stddev != 0 && stddev != 1.0);
- assertTrue(snapshot.getStdDev() != 0);
- double median = snapshot.getMedian();
- assertTrue(median != 0 && median != 1 && median != valueSize);
- }
-
- @Test
- public void testParseOptsWithThreads() {
- Queue<String> opts = new LinkedList<>();
- String cmdName = "sequentialWrite";
- int threads = 1;
- opts.offer(cmdName);
- opts.offer(String.valueOf(threads));
- PerformanceEvaluation.TestOptions options = PerformanceEvaluation.parseOpts(opts);
- assertNotNull(options);
- assertNotNull(options.getCmdName());
- assertEquals(cmdName, options.getCmdName());
- assertEquals(threads, options.getNumClientThreads());
- }
-
- @Test
- public void testParseOptsWrongThreads() {
- Queue<String> opts = new LinkedList<>();
- String cmdName = "sequentialWrite";
- opts.offer(cmdName);
- opts.offer("qq");
- try {
- PerformanceEvaluation.parseOpts(opts);
- } catch (IllegalArgumentException e) {
- System.out.println(e.getMessage());
- assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
- assertTrue(e.getCause() instanceof NumberFormatException);
- }
- }
-
- @Test
- public void testParseOptsNoThreads() {
- Queue<String> opts = new LinkedList<>();
- String cmdName = "sequentialWrite";
- try {
- PerformanceEvaluation.parseOpts(opts);
- } catch (IllegalArgumentException e) {
- System.out.println(e.getMessage());
- assertEquals("Command " + cmdName + " does not have threads number", e.getMessage());
- assertTrue(e.getCause() instanceof NoSuchElementException);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
index 3322e6c..535a34d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client;
import java.io.IOException;
import java.util.Arrays;
+import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -29,8 +30,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.ClientTests;
@@ -45,7 +46,7 @@ import org.junit.experimental.categories.Category;
@Category({LargeTests.class, ClientTests.class})
public class TestTableSnapshotScanner {
- private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
+ private static final Log LOG = LogFactory.getLog(TestTableSnapshotScanner.class);
private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static final int NUM_REGION_SERVERS = 2;
private static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
@@ -55,6 +56,17 @@ public class TestTableSnapshotScanner {
private FileSystem fs;
private Path rootDir;
+ public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
+ int expectedRegionSize) throws Exception {
+ for (int i = 0; i < 100; i++) {
+ List<HRegionInfo> hRegionInfoList = util.getAdmin().getTableRegions(tableName);
+ if (hRegionInfoList.size() >= expectedRegionSize) {
+ break;
+ }
+ Thread.sleep(1000);
+ }
+ }
+
public void setupCluster() throws Exception {
setupConf(UTIL.getConfiguration());
UTIL.startMiniCluster(NUM_REGION_SERVERS, true);
@@ -129,7 +141,7 @@ public class TestTableSnapshotScanner {
// split to 2 regions
admin.split(tableName, Bytes.toBytes("eee"));
- TestTableSnapshotInputFormat.blockUntilSplitFinished(UTIL, tableName, 2);
+ blockUntilSplitFinished(UTIL, tableName, 2);
Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
deleted file mode 100644
index ab6a86d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestDriver.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.util.ProgramDriver;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestDriver {
-
- @Test
- public void testDriverMainMethod() throws Throwable {
- ProgramDriver programDriverMock = mock(ProgramDriver.class);
- Driver.setProgramDriver(programDriverMock);
- Driver.main(new String[]{});
- verify(programDriverMock).driver(Mockito.any(String[].class));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
deleted file mode 100644
index 36e45e4..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestGroupingTableMap.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyZeroInteractions;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestGroupingTableMap {
-
- @Test
- @SuppressWarnings({ "deprecation", "unchecked" })
- public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes()
- throws Exception {
- GroupingTableMap gTableMap = null;
- try {
- Result result = mock(Result.class);
- Reporter reporter = mock(Reporter.class);
- gTableMap = new GroupingTableMap();
- Configuration cfg = new Configuration();
- cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
- JobConf jobConf = new JobConf(cfg);
- gTableMap.configure(jobConf);
-
- byte[] row = {};
- List<Cell> keyValues = ImmutableList.<Cell>of(
- new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
- new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")),
- new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
- when(result.listCells()).thenReturn(keyValues);
- OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
- mock(OutputCollector.class);
- gTableMap.map(null, result, outputCollectorMock, reporter);
- verify(result).listCells();
- verifyZeroInteractions(outputCollectorMock);
- } finally {
- if (gTableMap != null)
- gTableMap.close();
- }
- }
-
- @Test
- @SuppressWarnings({ "deprecation", "unchecked" })
- public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
- GroupingTableMap gTableMap = null;
- try {
- Result result = mock(Result.class);
- Reporter reporter = mock(Reporter.class);
- gTableMap = new GroupingTableMap();
- Configuration cfg = new Configuration();
- cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
- JobConf jobConf = new JobConf(cfg);
- gTableMap.configure(jobConf);
-
- byte[] row = {};
- List<Cell> keyValues = ImmutableList.<Cell>of(
- new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
- new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")),
- new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
- when(result.listCells()).thenReturn(keyValues);
- OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
- mock(OutputCollector.class);
- gTableMap.map(null, result, outputCollectorMock, reporter);
- verify(result).listCells();
- verify(outputCollectorMock, times(1))
- .collect(any(ImmutableBytesWritable.class), any(Result.class));
- verifyNoMoreInteractions(outputCollectorMock);
- } finally {
- if (gTableMap != null)
- gTableMap.close();
- }
- }
-
- @Test
- @SuppressWarnings({ "deprecation" })
- public void shouldCreateNewKey() throws Exception {
- GroupingTableMap gTableMap = null;
- try {
- Result result = mock(Result.class);
- Reporter reporter = mock(Reporter.class);
- final byte[] bSeparator = Bytes.toBytes(" ");
- gTableMap = new GroupingTableMap();
- Configuration cfg = new Configuration();
- cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
- JobConf jobConf = new JobConf(cfg);
- gTableMap.configure(jobConf);
-
- final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
- final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
- byte[] row = {};
- List<Cell> cells = ImmutableList.<Cell>of(
- new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), firstPartKeyValue),
- new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), secondPartKeyValue));
- when(result.listCells()).thenReturn(cells);
-
- final AtomicBoolean outputCollected = new AtomicBoolean();
- OutputCollector<ImmutableBytesWritable, Result> outputCollector =
- new OutputCollector<ImmutableBytesWritable, Result>() {
- @Override
- public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
- assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
- Bytes.concat(firstPartKeyValue, bSeparator,
- secondPartKeyValue), arg.copyBytes());
- outputCollected.set(true);
- }
- };
-
- gTableMap.map(null, result, outputCollector, reporter);
- verify(result).listCells();
- Assert.assertTrue("Output not received", outputCollected.get());
-
- final byte[] firstPartValue = Bytes.toBytes("238947928");
- final byte[] secondPartValue = Bytes.toBytes("4678456942345");
- byte[][] data = { firstPartValue, secondPartValue };
- ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
- assertArrayEquals(org.apache.hadoop.hbase.shaded.com.google.common.primitives.
- Bytes.concat(firstPartValue,
- bSeparator, secondPartValue), byteWritable.get());
- } finally {
- if (gTableMap != null)
- gTableMap.close();
- }
- }
-
- @Test
- @SuppressWarnings({ "deprecation" })
- public void shouldReturnNullFromCreateGroupKey() throws Exception {
- GroupingTableMap gTableMap = null;
- try {
- gTableMap = new GroupingTableMap();
- assertNull(gTableMap.createGroupKey(null));
- } finally {
- if(gTableMap != null)
- gTableMap.close();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
deleted file mode 100644
index 3fad1fe..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestIdentityTableMap.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestIdentityTableMap {
-
- @Test
- @SuppressWarnings({ "deprecation", "unchecked" })
- public void shouldCollectPredefinedTimes() throws IOException {
- int recordNumber = 999;
- Result resultMock = mock(Result.class);
- IdentityTableMap identityTableMap = null;
- try {
- Reporter reporterMock = mock(Reporter.class);
- identityTableMap = new IdentityTableMap();
- ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class);
- OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
- mock(OutputCollector.class);
-
- for (int i = 0; i < recordNumber; i++)
- identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock,
- reporterMock);
-
- verify(outputCollectorMock, times(recordNumber)).collect(
- Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class));
- } finally {
- if (identityTableMap != null)
- identityTableMap.close();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
deleted file mode 100644
index 665c547..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestMultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapred;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
-import static org.junit.Assert.assertTrue;
-
-@Category({ VerySlowMapReduceTests.class, LargeTests.class })
-public class TestMultiTableSnapshotInputFormat
- extends org.apache.hadoop.hbase.mapreduce.TestMultiTableSnapshotInputFormat {
-
- private static final Log LOG = LogFactory.getLog(TestMultiTableSnapshotInputFormat.class);
-
- @Override
- protected void runJob(String jobName, Configuration c, List<Scan> scans)
- throws IOException, InterruptedException, ClassNotFoundException {
- JobConf job = new JobConf(TEST_UTIL.getConfiguration());
-
- job.setJobName(jobName);
- job.setMapperClass(Mapper.class);
- job.setReducerClass(Reducer.class);
-
- TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
-
- TableMapReduceUtil.addDependencyJars(job);
-
- job.setReducerClass(Reducer.class);
- job.setNumReduceTasks(1); // one to get final "first" and "last" key
- FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
- LOG.info("Started " + job.getJobName());
-
- RunningJob runningJob = JobClient.runJob(job);
- runningJob.waitForCompletion();
- assertTrue(runningJob.isSuccessful());
- LOG.info("After map/reduce completion - job " + jobName);
- }
-
- public static class Mapper extends TestMultiTableSnapshotInputFormat.ScanMapper
- implements TableMap<ImmutableBytesWritable, ImmutableBytesWritable> {
-
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector,
- Reporter reporter) throws IOException {
- makeAssertions(key, value);
- outputCollector.collect(key, key);
- }
-
- /**
- * Closes this stream and releases any system resources associated
- * with it. If the stream is already closed then invoking this
- * method has no effect.
- *
- * @throws IOException if an I/O error occurs
- */
- @Override
- public void close() throws IOException {
- }
-
- @Override
- public void configure(JobConf jobConf) {
-
- }
- }
-
- public static class Reducer extends TestMultiTableSnapshotInputFormat.ScanReducer implements
- org.apache.hadoop.mapred.Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
- NullWritable, NullWritable> {
-
- private JobConf jobConf;
-
- @Override
- public void reduce(ImmutableBytesWritable key, Iterator<ImmutableBytesWritable> values,
- OutputCollector<NullWritable, NullWritable> outputCollector, Reporter reporter)
- throws IOException {
- makeAssertions(key, Lists.newArrayList(values));
- }
-
- /**
- * Closes this stream and releases any system resources associated
- * with it. If the stream is already closed then invoking this
- * method has no effect.
- *
- * @throws IOException if an I/O error occurs
- */
- @Override
- public void close() throws IOException {
- super.cleanup(this.jobConf);
- }
-
- @Override
- public void configure(JobConf jobConf) {
- this.jobConf = jobConf;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
deleted file mode 100644
index 4ebd8bf..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestRowCounter.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapred.RowCounter.RowCounterMapper;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestRowCounter {
-
- @Test
- @SuppressWarnings("deprecation")
- public void shouldPrintUsage() throws Exception {
- String expectedOutput = "rowcounter <outputdir> <tablename> <column1> [<column2>...]";
- String result = new OutputReader(System.out) {
- @Override
- void doRead() {
- assertEquals(-1, RowCounter.printUsage());
- }
- }.read();
-
- assertTrue(result.startsWith(expectedOutput));
- }
-
- @Test
- @SuppressWarnings("deprecation")
- public void shouldExitAndPrintUsageSinceParameterNumberLessThanThree()
- throws Exception {
- final String[] args = new String[] { "one", "two" };
- String line = "ERROR: Wrong number of parameters: " + args.length;
- String result = new OutputReader(System.err) {
- @Override
- void doRead() throws Exception {
- assertEquals(-1, new RowCounter().run(args));
- }
- }.read();
-
- assertTrue(result.startsWith(line));
- }
-
- @Test
- @SuppressWarnings({ "deprecation", "unchecked" })
- public void shouldRegInReportEveryIncomingRow() throws IOException {
- int iterationNumber = 999;
- RowCounter.RowCounterMapper mapper = new RowCounter.RowCounterMapper();
- Reporter reporter = mock(Reporter.class);
- for (int i = 0; i < iterationNumber; i++)
- mapper.map(mock(ImmutableBytesWritable.class), mock(Result.class),
- mock(OutputCollector.class), reporter);
-
- Mockito.verify(reporter, times(iterationNumber)).incrCounter(
- any(Enum.class), anyInt());
- }
-
- @Test
- @SuppressWarnings({ "deprecation" })
- public void shouldCreateAndRunSubmittableJob() throws Exception {
- RowCounter rCounter = new RowCounter();
- rCounter.setConf(HBaseConfiguration.create());
- String[] args = new String[] { "\temp", "tableA", "column1", "column2",
- "column3" };
- JobConf jobConfig = rCounter.createSubmittableJob(args);
-
- assertNotNull(jobConfig);
- assertEquals(0, jobConfig.getNumReduceTasks());
- assertEquals("rowcounter", jobConfig.getJobName());
- assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
- assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
- assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
- .join("column1", "column2", "column3"));
- assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
- }
-
- enum Outs {
- OUT, ERR
- }
-
- private static abstract class OutputReader {
- private final PrintStream ps;
- private PrintStream oldPrintStream;
- private Outs outs;
-
- protected OutputReader(PrintStream ps) {
- this.ps = ps;
- }
-
- protected String read() throws Exception {
- ByteArrayOutputStream outBytes = new ByteArrayOutputStream();
- if (ps == System.out) {
- oldPrintStream = System.out;
- outs = Outs.OUT;
- System.setOut(new PrintStream(outBytes));
- } else if (ps == System.err) {
- oldPrintStream = System.err;
- outs = Outs.ERR;
- System.setErr(new PrintStream(outBytes));
- } else {
- throw new IllegalStateException("OutputReader: unsupported PrintStream");
- }
-
- try {
- doRead();
- return new String(outBytes.toByteArray());
- } finally {
- switch (outs) {
- case OUT: {
- System.setOut(oldPrintStream);
- break;
- }
- case ERR: {
- System.setErr(oldPrintStream);
- break;
- }
- default:
- throw new IllegalStateException(
- "OutputReader: unsupported PrintStream");
- }
- }
- }
-
- abstract void doRead() throws Exception;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
deleted file mode 100644
index 2655ac2..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestSplitTable.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestSplitTable {
- @Rule
- public TestName name = new TestName();
-
- @Test
- @SuppressWarnings("deprecation")
- public void testSplitTableCompareTo() {
- TableSplit aTableSplit = new TableSplit(Bytes.toBytes("tableA"),
- Bytes.toBytes("aaa"), Bytes.toBytes("ddd"), "locationA");
-
- TableSplit bTableSplit = new TableSplit(Bytes.toBytes("tableA"),
- Bytes.toBytes("iii"), Bytes.toBytes("kkk"), "locationA");
-
- TableSplit cTableSplit = new TableSplit(Bytes.toBytes("tableA"),
- Bytes.toBytes("lll"), Bytes.toBytes("zzz"), "locationA");
-
- assertTrue(aTableSplit.compareTo(aTableSplit) == 0);
- assertTrue(bTableSplit.compareTo(bTableSplit) == 0);
- assertTrue(cTableSplit.compareTo(cTableSplit) == 0);
-
- assertTrue(aTableSplit.compareTo(bTableSplit) < 0);
- assertTrue(bTableSplit.compareTo(aTableSplit) > 0);
-
- assertTrue(aTableSplit.compareTo(cTableSplit) < 0);
- assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
-
- assertTrue(bTableSplit.compareTo(cTableSplit) < 0);
- assertTrue(cTableSplit.compareTo(bTableSplit) > 0);
-
- assertTrue(cTableSplit.compareTo(aTableSplit) > 0);
- }
-
- @Test
- @SuppressWarnings("deprecation")
- public void testSplitTableEquals() {
- byte[] tableA = Bytes.toBytes("tableA");
- byte[] aaa = Bytes.toBytes("aaa");
- byte[] ddd = Bytes.toBytes("ddd");
- String locationA = "locationA";
-
- TableSplit tablesplit = new TableSplit(tableA, aaa, ddd, locationA);
-
- TableSplit tableB = new TableSplit(Bytes.toBytes("tableB"), aaa, ddd, locationA);
- assertNotEquals(tablesplit.hashCode(), tableB.hashCode());
- assertNotEquals(tablesplit, tableB);
-
- TableSplit startBbb = new TableSplit(tableA, Bytes.toBytes("bbb"), ddd, locationA);
- assertNotEquals(tablesplit.hashCode(), startBbb.hashCode());
- assertNotEquals(tablesplit, startBbb);
-
- TableSplit endEee = new TableSplit(tableA, aaa, Bytes.toBytes("eee"), locationA);
- assertNotEquals(tablesplit.hashCode(), endEee.hashCode());
- assertNotEquals(tablesplit, endEee);
-
- TableSplit locationB = new TableSplit(tableA, aaa, ddd, "locationB");
- assertNotEquals(tablesplit.hashCode(), locationB.hashCode());
- assertNotEquals(tablesplit, locationB);
-
- TableSplit same = new TableSplit(tableA, aaa, ddd, locationA);
- assertEquals(tablesplit.hashCode(), same.hashCode());
- assertEquals(tablesplit, same);
- }
-
- @Test
- @SuppressWarnings("deprecation")
- public void testToString() {
- TableSplit split =
- new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
- "location");
- String str =
- "HBase table split(table name: " + name.getMethodName() + ", start row: row-start, "
- + "end row: row-end, region location: location)";
- Assert.assertEquals(str, split.toString());
-
- split = new TableSplit((TableName) null, null, null, null);
- str =
- "HBase table split(table name: null, start row: null, "
- + "end row: null, region location: null)";
- Assert.assertEquals(str, split.toString());
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
deleted file mode 100644
index 4b93843..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
+++ /dev/null
@@ -1,461 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.anyObject;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.spy;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.RegexStringComparator;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * This tests the TableInputFormat and its recovery semantics
- *
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTableInputFormat {
-
- private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
-
- private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
- static final byte[] FAMILY = Bytes.toBytes("family");
-
- private static final byte[][] columns = new byte[][] { FAMILY };
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void before() throws IOException {
- LOG.info("before");
- UTIL.ensureSomeRegionServersAvailable(1);
- LOG.info("before done");
- }
-
- /**
- * Setup a table with two rows and values.
- *
- * @param tableName
- * @return
- * @throws IOException
- */
- public static Table createTable(byte[] tableName) throws IOException {
- return createTable(tableName, new byte[][] { FAMILY });
- }
-
- /**
- * Setup a table with two rows and values per column family.
- *
- * @param tableName
- * @return
- * @throws IOException
- */
- public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
- Table table = UTIL.createTable(TableName.valueOf(tableName), families);
- Put p = new Put("aaa".getBytes());
- for (byte[] family : families) {
- p.addColumn(family, null, "value aaa".getBytes());
- }
- table.put(p);
- p = new Put("bbb".getBytes());
- for (byte[] family : families) {
- p.addColumn(family, null, "value bbb".getBytes());
- }
- table.put(p);
- return table;
- }
-
- /**
- * Verify that the result and key have expected values.
- *
- * @param r
- * @param key
- * @param expectedKey
- * @param expectedValue
- * @return
- */
- static boolean checkResult(Result r, ImmutableBytesWritable key,
- byte[] expectedKey, byte[] expectedValue) {
- assertEquals(0, key.compareTo(expectedKey));
- Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
- byte[] value = vals.values().iterator().next();
- assertTrue(Arrays.equals(value, expectedValue));
- return true; // if succeed
- }
-
- /**
- * Create table data and run tests on specified htable using the
- * o.a.h.hbase.mapred API.
- *
- * @param table
- * @throws IOException
- */
- static void runTestMapred(Table table) throws IOException {
- org.apache.hadoop.hbase.mapred.TableRecordReader trr =
- new org.apache.hadoop.hbase.mapred.TableRecordReader();
- trr.setStartRow("aaa".getBytes());
- trr.setEndRow("zzz".getBytes());
- trr.setHTable(table);
- trr.setInputColumns(columns);
-
- trr.init();
- Result r = new Result();
- ImmutableBytesWritable key = new ImmutableBytesWritable();
-
- boolean more = trr.next(key, r);
- assertTrue(more);
- checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
-
- more = trr.next(key, r);
- assertTrue(more);
- checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
-
- // no more data
- more = trr.next(key, r);
- assertFalse(more);
- }
-
- /**
- * Create a table that IOE's on first scanner next call
- *
- * @throws IOException
- */
- static Table createIOEScannerTable(byte[] name, final int failCnt)
- throws IOException {
- // build up a mock scanner stuff to fail the first time
- Answer<ResultScanner> a = new Answer<ResultScanner>() {
- int cnt = 0;
-
- @Override
- public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
- // first invocation return the busted mock scanner
- if (cnt++ < failCnt) {
- // create mock ResultScanner that always fails.
- Scan scan = mock(Scan.class);
- doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
- ResultScanner scanner = mock(ResultScanner.class);
- // simulate TimeoutException / IOException
- doThrow(new IOException("Injected exception")).when(scanner).next();
- return scanner;
- }
-
- // otherwise return the real scanner.
- return (ResultScanner) invocation.callRealMethod();
- }
- };
-
- Table htable = spy(createTable(name));
- doAnswer(a).when(htable).getScanner((Scan) anyObject());
- return htable;
- }
-
- /**
- * Create a table that throws a DoNoRetryIOException on first scanner next
- * call
- *
- * @throws IOException
- */
- static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
- throws IOException {
- // build up a mock scanner stuff to fail the first time
- Answer<ResultScanner> a = new Answer<ResultScanner>() {
- int cnt = 0;
-
- @Override
- public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
- // first invocation return the busted mock scanner
- if (cnt++ < failCnt) {
- // create mock ResultScanner that always fails.
- Scan scan = mock(Scan.class);
- doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
- ResultScanner scanner = mock(ResultScanner.class);
-
- invocation.callRealMethod(); // simulate NotServingRegionException
- doThrow(
- new NotServingRegionException("Injected simulated TimeoutException"))
- .when(scanner).next();
- return scanner;
- }
-
- // otherwise return the real scanner.
- return (ResultScanner) invocation.callRealMethod();
- }
- };
-
- Table htable = spy(createTable(name));
- doAnswer(a).when(htable).getScanner((Scan) anyObject());
- return htable;
- }
-
- /**
- * Run test assuming no errors using mapred api.
- *
- * @throws IOException
- */
- @Test
- public void testTableRecordReader() throws IOException {
- Table table = createTable("table1".getBytes());
- runTestMapred(table);
- }
-
- /**
- * Run test assuming Scanner IOException failure using mapred api,
- *
- * @throws IOException
- */
- @Test
- public void testTableRecordReaderScannerFail() throws IOException {
- Table htable = createIOEScannerTable("table2".getBytes(), 1);
- runTestMapred(htable);
- }
-
- /**
- * Run test assuming Scanner IOException failure using mapred api,
- *
- * @throws IOException
- */
- @Test(expected = IOException.class)
- public void testTableRecordReaderScannerFailTwice() throws IOException {
- Table htable = createIOEScannerTable("table3".getBytes(), 2);
- runTestMapred(htable);
- }
-
- /**
- * Run test assuming NotServingRegionException using mapred api.
- *
- * @throws org.apache.hadoop.hbase.DoNotRetryIOException
- */
- @Test
- public void testTableRecordReaderScannerTimeout() throws IOException {
- Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
- runTestMapred(htable);
- }
-
- /**
- * Run test assuming NotServingRegionException using mapred api.
- *
- * @throws org.apache.hadoop.hbase.DoNotRetryIOException
- */
- @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
- public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
- Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
- runTestMapred(htable);
- }
-
- /**
- * Verify the example we present in javadocs on TableInputFormatBase
- */
- @Test
- public void testExtensionOfTableInputFormatBase() throws IOException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase");
- final Table table = createTable(Bytes.toBytes("exampleTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleTIF.class);
- }
-
- @Test
- public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
- + "as it was given in 0.98.");
- final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleDeprecatedTIF.class);
- }
-
- @Test
- public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
- LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
- + "using JobConfigurable.");
- final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
- new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
- testInputFormat(ExampleJobConfigurableTIF.class);
- }
-
- void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
- Configuration conf = UTIL.getConfiguration();
- final JobConf job = new JobConf(conf);
- job.setInputFormat(clazz);
- job.setOutputFormat(NullOutputFormat.class);
- job.setMapperClass(ExampleVerifier.class);
- job.setNumReduceTasks(0);
- LOG.debug("submitting job.");
- final RunningJob run = JobClient.runJob(job);
- assertTrue("job failed!", run.isSuccessful());
- assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
- assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
- assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
- assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
- assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
- assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
- .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
- }
-
- public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
-
- @Override
- public void configure(JobConf conf) {
- }
-
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<NullWritable,NullWritable> output,
- Reporter reporter) throws IOException {
- for (Cell cell : value.listCells()) {
- reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
- Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
- .increment(1l);
- reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
- Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
- .increment(1l);
- reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
- Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
- .increment(1l);
- }
- }
-
- @Override
- public void close() {
- }
-
- }
-
- public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
-
- @Override
- public void configure(JobConf job) {
- try {
- Connection connection = ConnectionFactory.createConnection(job);
- Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
- // mandatory
- initializeTable(connection, exampleTable.getName());
- byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- Bytes.toBytes("columnB") };
- // mandatory
- setInputColumns(inputColumns);
- Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- // optional
- setRowFilter(exampleFilter);
- } catch (IOException exception) {
- throw new RuntimeException("Failed to configure for job.", exception);
- }
- }
-
- }
-
- public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
-
- @Override
- public void configure(JobConf job) {
- try {
- initialize(job);
- } catch (IOException exception) {
- throw new RuntimeException("Failed to initialize.", exception);
- }
- }
-
- @Override
- protected void initialize(JobConf job) throws IOException {
- initialize(job, "exampleJobConfigurableTable");
- }
- }
-
-
- public static class ExampleTIF extends TableInputFormatBase {
-
- @Override
- protected void initialize(JobConf job) throws IOException {
- initialize(job, "exampleTable");
- }
-
- protected void initialize(JobConf job, String table) throws IOException {
- Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
- TableName tableName = TableName.valueOf(table);
- // mandatory
- initializeTable(connection, tableName);
- byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- Bytes.toBytes("columnB") };
- // mandatory
- setInputColumns(inputColumns);
- Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- // optional
- setRowFilter(exampleFilter);
- }
-
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
deleted file mode 100644
index 3f905cf..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapred;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TestTableMapReduceBase;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-@SuppressWarnings("deprecation")
-public class TestTableMapReduce extends TestTableMapReduceBase {
- private static final Log LOG =
- LogFactory.getLog(TestTableMapReduce.class.getName());
-
- protected Log getLog() { return LOG; }
-
- /**
- * Pass the given key and processed record reduce
- */
- static class ProcessContentsMapper extends MapReduceBase implements
- TableMap<ImmutableBytesWritable, Put> {
-
- /**
- * Pass the key, and reversed value to reduce
- */
- public void map(ImmutableBytesWritable key, Result value,
- OutputCollector<ImmutableBytesWritable, Put> output,
- Reporter reporter)
- throws IOException {
- output.collect(key, TestTableMapReduceBase.map(key, value));
- }
- }
-
- @Override
- protected void runTestOnTable(Table table) throws IOException {
- JobConf jobConf = null;
- try {
- LOG.info("Before map/reduce startup");
- jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
- jobConf.setJobName("process column contents");
- jobConf.setNumReduceTasks(1);
- TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(),
- Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
- ImmutableBytesWritable.class, Put.class, jobConf);
- TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(),
- IdentityTableReduce.class, jobConf);
-
- LOG.info("Started " + table.getName());
- RunningJob job = JobClient.runJob(jobConf);
- assertTrue(job.isSuccessful());
- LOG.info("After map/reduce completion");
-
- // verify map-reduce results
- verify(table.getName());
- } finally {
- if (jobConf != null) {
- FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
- }
- }
- }
-}
-
[07/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
deleted file mode 100644
index 87522b6..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
+++ /dev/null
@@ -1,1495 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.Callable;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hbase.ArrayBackedTag;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.HadoopShims;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.PerformanceEvaluation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.Tag;
-import org.apache.hadoop.hbase.TagType;
-import org.apache.hadoop.hbase.TagUtil;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.Store;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
-import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.ReflectionUtils;
-import org.apache.hadoop.hbase.util.Writables;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
-import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestRule;
-import org.mockito.Mockito;
-
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-/**
- * Simple test for {@link HFileOutputFormat2}.
- * Sets up and runs a mapreduce job that writes hfile output.
- * Creates a few inner classes to implement splits and an inputformat that
- * emits keys and values like those of {@link PerformanceEvaluation}.
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestHFileOutputFormat2 {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- private final static int ROWSPERSPLIT = 1024;
-
- private static final byte[][] FAMILIES
- = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
- , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
- private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2",
- "TestTable3").map(TableName::valueOf).toArray(TableName[]::new);
-
- private HBaseTestingUtility util = new HBaseTestingUtility();
-
- private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
-
- /**
- * Simple mapper that makes KeyValue output.
- */
- static class RandomKVGeneratingMapper
- extends Mapper<NullWritable, NullWritable,
- ImmutableBytesWritable, Cell> {
-
- private int keyLength;
- private static final int KEYLEN_DEFAULT=10;
- private static final String KEYLEN_CONF="randomkv.key.length";
-
- private int valLength;
- private static final int VALLEN_DEFAULT=10;
- private static final String VALLEN_CONF="randomkv.val.length";
- private static final byte [] QUALIFIER = Bytes.toBytes("data");
- private boolean multiTableMapper = false;
- private TableName[] tables = null;
-
-
- @Override
- protected void setup(Context context) throws IOException,
- InterruptedException {
- super.setup(context);
-
- Configuration conf = context.getConfiguration();
- keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
- valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
- multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
- false);
- if (multiTableMapper) {
- tables = TABLE_NAMES;
- } else {
- tables = new TableName[]{TABLE_NAMES[0]};
- }
- }
-
- @Override
- protected void map(
- NullWritable n1, NullWritable n2,
- Mapper<NullWritable, NullWritable,
- ImmutableBytesWritable,Cell>.Context context)
- throws java.io.IOException ,InterruptedException
- {
-
- byte keyBytes[] = new byte[keyLength];
- byte valBytes[] = new byte[valLength];
-
- int taskId = context.getTaskAttemptID().getTaskID().getId();
- assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
- Random random = new Random();
- byte[] key;
- for (int j = 0; j < tables.length; ++j) {
- for (int i = 0; i < ROWSPERSPLIT; i++) {
- random.nextBytes(keyBytes);
- // Ensure that unique tasks generate unique keys
- keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
- random.nextBytes(valBytes);
- key = keyBytes;
- if (multiTableMapper) {
- key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
- }
-
- for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
- Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
- context.write(new ImmutableBytesWritable(key), kv);
- }
- }
- }
- }
- }
-
- /**
- * Simple mapper that makes Put output.
- */
- static class RandomPutGeneratingMapper
- extends Mapper<NullWritable, NullWritable,
- ImmutableBytesWritable, Put> {
-
- private int keyLength;
- private static final int KEYLEN_DEFAULT = 10;
- private static final String KEYLEN_CONF = "randomkv.key.length";
-
- private int valLength;
- private static final int VALLEN_DEFAULT = 10;
- private static final String VALLEN_CONF = "randomkv.val.length";
- private static final byte[] QUALIFIER = Bytes.toBytes("data");
- private boolean multiTableMapper = false;
- private TableName[] tables = null;
-
- @Override
- protected void setup(Context context) throws IOException,
- InterruptedException {
- super.setup(context);
-
- Configuration conf = context.getConfiguration();
- keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
- valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
- multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
- false);
- if (multiTableMapper) {
- tables = TABLE_NAMES;
- } else {
- tables = new TableName[]{TABLE_NAMES[0]};
- }
- }
-
- @Override
- protected void map(
- NullWritable n1, NullWritable n2,
- Mapper<NullWritable, NullWritable,
- ImmutableBytesWritable, Put>.Context context)
- throws java.io.IOException, InterruptedException {
-
- byte keyBytes[] = new byte[keyLength];
- byte valBytes[] = new byte[valLength];
-
- int taskId = context.getTaskAttemptID().getTaskID().getId();
- assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
-
- Random random = new Random();
- byte[] key;
- for (int j = 0; j < tables.length; ++j) {
- for (int i = 0; i < ROWSPERSPLIT; i++) {
- random.nextBytes(keyBytes);
- // Ensure that unique tasks generate unique keys
- keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
- random.nextBytes(valBytes);
- key = keyBytes;
- if (multiTableMapper) {
- key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
- }
-
- for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
- Put p = new Put(keyBytes);
- p.addColumn(family, QUALIFIER, valBytes);
- // set TTL to very low so that the scan does not return any value
- p.setTTL(1l);
- context.write(new ImmutableBytesWritable(key), p);
- }
- }
- }
- }
- }
-
- private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
- if (putSortReducer) {
- job.setInputFormatClass(NMapInputFormat.class);
- job.setMapperClass(RandomPutGeneratingMapper.class);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
- job.setMapOutputValueClass(Put.class);
- } else {
- job.setInputFormatClass(NMapInputFormat.class);
- job.setMapperClass(RandomKVGeneratingMapper.class);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
- job.setMapOutputValueClass(KeyValue.class);
- }
- }
-
- /**
- * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
- * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
- * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void test_LATEST_TIMESTAMP_isReplaced()
- throws Exception {
- Configuration conf = new Configuration(this.util.getConfiguration());
- RecordWriter<ImmutableBytesWritable, Cell> writer = null;
- TaskAttemptContext context = null;
- Path dir =
- util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
- try {
- Job job = new Job(conf);
- FileOutputFormat.setOutputPath(job, dir);
- context = createTestTaskAttemptContext(job);
- HFileOutputFormat2 hof = new HFileOutputFormat2();
- writer = hof.getRecordWriter(context);
- final byte [] b = Bytes.toBytes("b");
-
- // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be
- // changed by call to write. Check all in kv is same but ts.
- KeyValue kv = new KeyValue(b, b, b);
- KeyValue original = kv.clone();
- writer.write(new ImmutableBytesWritable(), kv);
- assertFalse(original.equals(kv));
- assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
- assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
- assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
- assertNotSame(original.getTimestamp(), kv.getTimestamp());
- assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
-
- // Test 2. Now test passing a kv that has explicit ts. It should not be
- // changed by call to record write.
- kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
- original = kv.clone();
- writer.write(new ImmutableBytesWritable(), kv);
- assertTrue(original.equals(kv));
- } finally {
- if (writer != null && context != null) writer.close(context);
- dir.getFileSystem(conf).delete(dir, true);
- }
- }
-
- private TaskAttemptContext createTestTaskAttemptContext(final Job job)
- throws Exception {
- HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
- TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
- job, "attempt_201402131733_0001_m_000000_0");
- return context;
- }
-
- /*
- * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
- * metadata used by time-restricted scans.
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void test_TIMERANGE() throws Exception {
- Configuration conf = new Configuration(this.util.getConfiguration());
- RecordWriter<ImmutableBytesWritable, Cell> writer = null;
- TaskAttemptContext context = null;
- Path dir =
- util.getDataTestDir("test_TIMERANGE_present");
- LOG.info("Timerange dir writing to dir: "+ dir);
- try {
- // build a record writer using HFileOutputFormat2
- Job job = new Job(conf);
- FileOutputFormat.setOutputPath(job, dir);
- context = createTestTaskAttemptContext(job);
- HFileOutputFormat2 hof = new HFileOutputFormat2();
- writer = hof.getRecordWriter(context);
-
- // Pass two key values with explicit times stamps
- final byte [] b = Bytes.toBytes("b");
-
- // value 1 with timestamp 2000
- KeyValue kv = new KeyValue(b, b, b, 2000, b);
- KeyValue original = kv.clone();
- writer.write(new ImmutableBytesWritable(), kv);
- assertEquals(original,kv);
-
- // value 2 with timestamp 1000
- kv = new KeyValue(b, b, b, 1000, b);
- original = kv.clone();
- writer.write(new ImmutableBytesWritable(), kv);
- assertEquals(original, kv);
-
- // verify that the file has the proper FileInfo.
- writer.close(context);
-
- // the generated file lives 1 directory down from the attempt directory
- // and is the only file, e.g.
- // _attempt__0000_r_000000_0/b/1979617994050536795
- FileSystem fs = FileSystem.get(conf);
- Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
- FileStatus[] sub1 = fs.listStatus(attemptDirectory);
- FileStatus[] file = fs.listStatus(sub1[0].getPath());
-
- // open as HFile Reader and pull out TIMERANGE FileInfo.
- HFile.Reader rd =
- HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
- Map<byte[],byte[]> finfo = rd.loadFileInfo();
- byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8"));
- assertNotNull(range);
-
- // unmarshall and check values.
- TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
- Writables.copyWritable(range, timeRangeTracker);
- LOG.info(timeRangeTracker.getMin() +
- "...." + timeRangeTracker.getMax());
- assertEquals(1000, timeRangeTracker.getMin());
- assertEquals(2000, timeRangeTracker.getMax());
- rd.close();
- } finally {
- if (writer != null && context != null) writer.close(context);
- dir.getFileSystem(conf).delete(dir, true);
- }
- }
-
- /**
- * Run small MR job.
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testWritingPEData() throws Exception {
- Configuration conf = util.getConfiguration();
- Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
- FileSystem fs = testDir.getFileSystem(conf);
-
- // Set down this value or we OOME in eclipse.
- conf.setInt("mapreduce.task.io.sort.mb", 20);
- // Write a few files.
- conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
-
- Job job = new Job(conf, "testWritingPEData");
- setupRandomGeneratorMapper(job, false);
- // This partitioner doesn't work well for number keys but using it anyways
- // just to demonstrate how to configure it.
- byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
- byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
-
- Arrays.fill(startKey, (byte)0);
- Arrays.fill(endKey, (byte)0xff);
-
- job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
- // Set start and end rows for partitioner.
- SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
- SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
- job.setReducerClass(KeyValueSortReducer.class);
- job.setOutputFormatClass(HFileOutputFormat2.class);
- job.setNumReduceTasks(4);
- job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName(),
- KeyValueSerialization.class.getName());
-
- FileOutputFormat.setOutputPath(job, testDir);
- assertTrue(job.waitForCompletion(false));
- FileStatus [] files = fs.listStatus(testDir);
- assertTrue(files.length > 0);
- }
-
- /**
- * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
- * hfile.
- */
- @Test
- public void test_WritingTagData()
- throws Exception {
- Configuration conf = new Configuration(this.util.getConfiguration());
- final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
- conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
- RecordWriter<ImmutableBytesWritable, Cell> writer = null;
- TaskAttemptContext context = null;
- Path dir =
- util.getDataTestDir("WritingTagData");
- try {
- conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
- // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
- conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
- Job job = new Job(conf);
- FileOutputFormat.setOutputPath(job, dir);
- context = createTestTaskAttemptContext(job);
- HFileOutputFormat2 hof = new HFileOutputFormat2();
- writer = hof.getRecordWriter(context);
- final byte [] b = Bytes.toBytes("b");
-
- List< Tag > tags = new ArrayList<>();
- tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
- KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
- writer.write(new ImmutableBytesWritable(), kv);
- writer.close(context);
- writer = null;
- FileSystem fs = dir.getFileSystem(conf);
- RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
- while(iterator.hasNext()) {
- LocatedFileStatus keyFileStatus = iterator.next();
- HFile.Reader reader =
- HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
- HFileScanner scanner = reader.getScanner(false, false, false);
- scanner.seekTo();
- Cell cell = scanner.getCell();
- List<Tag> tagsFromCell = TagUtil.asList(cell.getTagsArray(), cell.getTagsOffset(),
- cell.getTagsLength());
- assertTrue(tagsFromCell.size() > 0);
- for (Tag tag : tagsFromCell) {
- assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
- }
- }
- } finally {
- if (writer != null && context != null) writer.close(context);
- dir.getFileSystem(conf).delete(dir, true);
- }
- }
-
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testJobConfiguration() throws Exception {
- Configuration conf = new Configuration(this.util.getConfiguration());
- conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
- .toString());
- Job job = new Job(conf);
- job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
- Table table = Mockito.mock(Table.class);
- RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
- setupMockStartKeys(regionLocator);
- setupMockTableName(regionLocator);
- HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
- assertEquals(job.getNumReduceTasks(), 4);
- }
-
- private byte [][] generateRandomStartKeys(int numKeys) {
- Random random = new Random();
- byte[][] ret = new byte[numKeys][];
- // first region start key is always empty
- ret[0] = HConstants.EMPTY_BYTE_ARRAY;
- for (int i = 1; i < numKeys; i++) {
- ret[i] =
- PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
- }
- return ret;
- }
-
- private byte[][] generateRandomSplitKeys(int numKeys) {
- Random random = new Random();
- byte[][] ret = new byte[numKeys][];
- for (int i = 0; i < numKeys; i++) {
- ret[i] =
- PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
- }
- return ret;
- }
-
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testMRIncrementalLoad() throws Exception {
- LOG.info("\nStarting test testMRIncrementalLoad\n");
- doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
- }
-
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testMRIncrementalLoadWithSplit() throws Exception {
- LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
- doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
- }
-
- /**
- * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true
- * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY
- * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports),
- * it's not possible to check the region locality by comparing region locations and DN hostnames.
- * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does),
- * we could test region locality features more easily.
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testMRIncrementalLoadWithLocality() throws Exception {
- LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
- doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
- doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
- }
-
- //@Ignore("Wahtevs")
- @Test
- public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
- LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
- doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
- }
-
- private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
- boolean putSortReducer, String tableStr) throws Exception {
- doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer,
- Arrays.asList(tableStr));
- }
-
- @Test
- public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception {
- LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n");
- doIncrementalLoadTest(false, false, true,
- Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList
- ()));
- }
-
- private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
- boolean putSortReducer, List<String> tableStr) throws Exception {
- util = new HBaseTestingUtility();
- Configuration conf = util.getConfiguration();
- conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
- int hostCount = 1;
- int regionNum = 5;
- if (shouldKeepLocality) {
- // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
- // explicit hostnames parameter just like MiniDFSCluster does.
- hostCount = 3;
- regionNum = 20;
- }
-
- String[] hostnames = new String[hostCount];
- for (int i = 0; i < hostCount; ++i) {
- hostnames[i] = "datanode_" + i;
- }
- util.startMiniCluster(1, hostCount, hostnames);
-
- Map<String, Table> allTables = new HashMap<>(tableStr.size());
- List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
- boolean writeMultipleTables = tableStr.size() > 1;
- for (String tableStrSingle : tableStr) {
- byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
- TableName tableName = TableName.valueOf(tableStrSingle);
- Table table = util.createTable(tableName, FAMILIES, splitKeys);
-
- RegionLocator r = util.getConnection().getRegionLocator(tableName);
- assertEquals("Should start with empty table", 0, util.countRows(table));
- int numRegions = r.getStartKeys().length;
- assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
-
- allTables.put(tableStrSingle, table);
- tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r));
- }
- Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
- // Generate the bulk load files
- runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
-
- for (Table tableSingle : allTables.values()) {
- // This doesn't write into the table, just makes files
- assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
- }
- int numTableDirs = 0;
- for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) {
- Path tablePath = testDir;
-
- if (writeMultipleTables) {
- if (allTables.containsKey(tf.getPath().getName())) {
- ++numTableDirs;
- tablePath = tf.getPath();
- }
- else {
- continue;
- }
- }
-
- // Make sure that a directory was created for every CF
- int dir = 0;
- for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) {
- for (byte[] family : FAMILIES) {
- if (Bytes.toString(family).equals(f.getPath().getName())) {
- ++dir;
- }
- }
- }
- assertEquals("Column family not found in FS.", FAMILIES.length, dir);
- }
- if (writeMultipleTables) {
- assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
- }
-
- Admin admin = util.getConnection().getAdmin();
- try {
- // handle the split case
- if (shouldChangeRegions) {
- Table chosenTable = allTables.values().iterator().next();
- // Choose a semi-random table if multiple tables are available
- LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
- admin.disableTable(chosenTable.getName());
- util.waitUntilNoRegionsInTransition();
-
- util.deleteTable(chosenTable.getName());
- byte[][] newSplitKeys = generateRandomSplitKeys(14);
- Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
-
- while (util.getConnection().getRegionLocator(chosenTable.getName())
- .getAllRegionLocations().size() != 15 ||
- !admin.isTableAvailable(table.getName())) {
- Thread.sleep(200);
- LOG.info("Waiting for new region assignment to happen");
- }
- }
-
- // Perform the actual load
- for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
- Path tableDir = testDir;
- String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString();
- LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr);
- if (writeMultipleTables) {
- tableDir = new Path(testDir, tableNameStr);
- }
- Table currentTable = allTables.get(tableNameStr);
- TableName currentTableName = currentTable.getName();
- new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo
- .getRegionLocator());
-
- // Ensure data shows up
- int expectedRows = 0;
- if (putSortReducer) {
- // no rows should be extracted
- assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
- util.countRows(currentTable));
- } else {
- expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
- assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
- util.countRows(currentTable));
- Scan scan = new Scan();
- ResultScanner results = currentTable.getScanner(scan);
- for (Result res : results) {
- assertEquals(FAMILIES.length, res.rawCells().length);
- Cell first = res.rawCells()[0];
- for (Cell kv : res.rawCells()) {
- assertTrue(CellUtil.matchingRow(first, kv));
- assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
- }
- }
- results.close();
- }
- String tableDigestBefore = util.checksumRows(currentTable);
- // Check region locality
- HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
- for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
- hbd.add(region.getHDFSBlocksDistribution());
- }
- for (String hostname : hostnames) {
- float locality = hbd.getBlockLocalityIndex(hostname);
- LOG.info("locality of [" + hostname + "]: " + locality);
- assertEquals(100, (int) (locality * 100));
- }
-
- // Cause regions to reopen
- admin.disableTable(currentTableName);
- while (!admin.isTableDisabled(currentTableName)) {
- Thread.sleep(200);
- LOG.info("Waiting for table to disable");
- }
- admin.enableTable(currentTableName);
- util.waitTableAvailable(currentTableName);
- assertEquals("Data should remain after reopening of regions",
- tableDigestBefore, util.checksumRows(currentTable));
- }
- } finally {
- for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
- tableInfoSingle.getRegionLocator().close();
- }
- for (Entry<String, Table> singleTable : allTables.entrySet() ) {
- singleTable.getValue().close();
- util.deleteTable(singleTable.getValue().getName());
- }
- testDir.getFileSystem(conf).delete(testDir, true);
- util.shutdownMiniCluster();
- }
- }
-
- private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
- boolean putSortReducer) throws IOException,
- InterruptedException, ClassNotFoundException {
- Job job = new Job(conf, "testLocalMRIncrementalLoad");
- job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
- job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName(),
- KeyValueSerialization.class.getName());
- setupRandomGeneratorMapper(job, putSortReducer);
- if (tableInfo.size() > 1) {
- MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
- int sum = 0;
- for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
- sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
- }
- assertEquals(sum, job.getNumReduceTasks());
- }
- else {
- RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
- HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
- regionLocator);
- assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
- }
-
- FileOutputFormat.setOutputPath(job, outDir);
-
- assertFalse(util.getTestFileSystem().exists(outDir)) ;
-
- assertTrue(job.waitForCompletion(true));
- }
-
- /**
- * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and
- * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
- * Tests that the compression map is correctly serialized into
- * and deserialized from configuration
- *
- * @throws IOException
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
- for (int numCfs = 0; numCfs <= 3; numCfs++) {
- Configuration conf = new Configuration(this.util.getConfiguration());
- Map<String, Compression.Algorithm> familyToCompression =
- getMockColumnFamiliesForCompression(numCfs);
- Table table = Mockito.mock(Table.class);
- setupMockColumnFamiliesForCompression(table, familyToCompression);
- conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY,
- HFileOutputFormat2.serializeColumnFamilyAttribute
- (HFileOutputFormat2.compressionDetails,
- Arrays.asList(table.getTableDescriptor())));
-
- // read back family specific compression setting from the configuration
- Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
- .createFamilyCompressionMap(conf);
-
- // test that we have a value for all column families that matches with the
- // used mock values
- for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
- assertEquals("Compression configuration incorrect for column family:"
- + entry.getKey(), entry.getValue(),
- retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8")));
- }
- }
- }
-
- private void setupMockColumnFamiliesForCompression(Table table,
- Map<String, Compression.Algorithm> familyToCompression) throws IOException {
- HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
- for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
- mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
- .setMaxVersions(1)
- .setCompressionType(entry.getValue())
- .setBlockCacheEnabled(false)
- .setTimeToLive(0));
- }
- Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
- }
-
- /**
- * @return a map from column family names to compression algorithms for
- * testing column family compression. Column family names have special characters
- */
- private Map<String, Compression.Algorithm>
- getMockColumnFamiliesForCompression (int numCfs) {
- Map<String, Compression.Algorithm> familyToCompression = new HashMap<>();
- // use column family names having special characters
- if (numCfs-- > 0) {
- familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
- }
- if (numCfs-- > 0) {
- familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
- }
- if (numCfs-- > 0) {
- familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
- }
- if (numCfs-- > 0) {
- familyToCompression.put("Family3", Compression.Algorithm.NONE);
- }
- return familyToCompression;
- }
-
-
- /**
- * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and
- * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}.
- * Tests that the compression map is correctly serialized into
- * and deserialized from configuration
- *
- * @throws IOException
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
- for (int numCfs = 0; numCfs <= 2; numCfs++) {
- Configuration conf = new Configuration(this.util.getConfiguration());
- Map<String, BloomType> familyToBloomType =
- getMockColumnFamiliesForBloomType(numCfs);
- Table table = Mockito.mock(Table.class);
- setupMockColumnFamiliesForBloomType(table,
- familyToBloomType);
- conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY,
- HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails,
- Arrays.asList(table.getTableDescriptor())));
-
- // read back family specific data block encoding settings from the
- // configuration
- Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
- HFileOutputFormat2
- .createFamilyBloomTypeMap(conf);
-
- // test that we have a value for all column families that matches with the
- // used mock values
- for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
- assertEquals("BloomType configuration incorrect for column family:"
- + entry.getKey(), entry.getValue(),
- retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8")));
- }
- }
- }
-
- private void setupMockColumnFamiliesForBloomType(Table table,
- Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
- HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
- for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
- mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
- .setMaxVersions(1)
- .setBloomFilterType(entry.getValue())
- .setBlockCacheEnabled(false)
- .setTimeToLive(0));
- }
- Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
- }
-
- /**
- * @return a map from column family names to compression algorithms for
- * testing column family compression. Column family names have special characters
- */
- private Map<String, BloomType>
- getMockColumnFamiliesForBloomType (int numCfs) {
- Map<String, BloomType> familyToBloomType = new HashMap<>();
- // use column family names having special characters
- if (numCfs-- > 0) {
- familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
- }
- if (numCfs-- > 0) {
- familyToBloomType.put("Family2=asdads&!AASD",
- BloomType.ROWCOL);
- }
- if (numCfs-- > 0) {
- familyToBloomType.put("Family3", BloomType.NONE);
- }
- return familyToBloomType;
- }
-
- /**
- * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and
- * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}.
- * Tests that the compression map is correctly serialized into
- * and deserialized from configuration
- *
- * @throws IOException
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
- for (int numCfs = 0; numCfs <= 3; numCfs++) {
- Configuration conf = new Configuration(this.util.getConfiguration());
- Map<String, Integer> familyToBlockSize =
- getMockColumnFamiliesForBlockSize(numCfs);
- Table table = Mockito.mock(Table.class);
- setupMockColumnFamiliesForBlockSize(table,
- familyToBlockSize);
- conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY,
- HFileOutputFormat2.serializeColumnFamilyAttribute
- (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table
- .getTableDescriptor())));
-
- // read back family specific data block encoding settings from the
- // configuration
- Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
- HFileOutputFormat2
- .createFamilyBlockSizeMap(conf);
-
- // test that we have a value for all column families that matches with the
- // used mock values
- for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
- ) {
- assertEquals("BlockSize configuration incorrect for column family:"
- + entry.getKey(), entry.getValue(),
- retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8")));
- }
- }
- }
-
- private void setupMockColumnFamiliesForBlockSize(Table table,
- Map<String, Integer> familyToDataBlockEncoding) throws IOException {
- HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
- for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
- mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
- .setMaxVersions(1)
- .setBlocksize(entry.getValue())
- .setBlockCacheEnabled(false)
- .setTimeToLive(0));
- }
- Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
- }
-
- /**
- * @return a map from column family names to compression algorithms for
- * testing column family compression. Column family names have special characters
- */
- private Map<String, Integer>
- getMockColumnFamiliesForBlockSize (int numCfs) {
- Map<String, Integer> familyToBlockSize = new HashMap<>();
- // use column family names having special characters
- if (numCfs-- > 0) {
- familyToBlockSize.put("Family1!@#!@#&", 1234);
- }
- if (numCfs-- > 0) {
- familyToBlockSize.put("Family2=asdads&!AASD",
- Integer.MAX_VALUE);
- }
- if (numCfs-- > 0) {
- familyToBlockSize.put("Family2=asdads&!AASD",
- Integer.MAX_VALUE);
- }
- if (numCfs-- > 0) {
- familyToBlockSize.put("Family3", 0);
- }
- return familyToBlockSize;
- }
-
- /**
- * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
- * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
- * Tests that the compression map is correctly serialized into
- * and deserialized from configuration
- *
- * @throws IOException
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
- for (int numCfs = 0; numCfs <= 3; numCfs++) {
- Configuration conf = new Configuration(this.util.getConfiguration());
- Map<String, DataBlockEncoding> familyToDataBlockEncoding =
- getMockColumnFamiliesForDataBlockEncoding(numCfs);
- Table table = Mockito.mock(Table.class);
- setupMockColumnFamiliesForDataBlockEncoding(table,
- familyToDataBlockEncoding);
- HTableDescriptor tableDescriptor = table.getTableDescriptor();
- conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
- HFileOutputFormat2.serializeColumnFamilyAttribute
- (HFileOutputFormat2.dataBlockEncodingDetails, Arrays
- .asList(tableDescriptor)));
-
- // read back family specific data block encoding settings from the
- // configuration
- Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
- HFileOutputFormat2
- .createFamilyDataBlockEncodingMap(conf);
-
- // test that we have a value for all column families that matches with the
- // used mock values
- for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
- assertEquals("DataBlockEncoding configuration incorrect for column family:"
- + entry.getKey(), entry.getValue(),
- retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8")));
- }
- }
- }
-
- private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
- Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
- HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
- for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
- mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
- .setMaxVersions(1)
- .setDataBlockEncoding(entry.getValue())
- .setBlockCacheEnabled(false)
- .setTimeToLive(0));
- }
- Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
- }
-
- /**
- * @return a map from column family names to compression algorithms for
- * testing column family compression. Column family names have special characters
- */
- private Map<String, DataBlockEncoding>
- getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
- Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>();
- // use column family names having special characters
- if (numCfs-- > 0) {
- familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
- }
- if (numCfs-- > 0) {
- familyToDataBlockEncoding.put("Family2=asdads&!AASD",
- DataBlockEncoding.FAST_DIFF);
- }
- if (numCfs-- > 0) {
- familyToDataBlockEncoding.put("Family2=asdads&!AASD",
- DataBlockEncoding.PREFIX);
- }
- if (numCfs-- > 0) {
- familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
- }
- return familyToDataBlockEncoding;
- }
-
- private void setupMockStartKeys(RegionLocator table) throws IOException {
- byte[][] mockKeys = new byte[][] {
- HConstants.EMPTY_BYTE_ARRAY,
- Bytes.toBytes("aaa"),
- Bytes.toBytes("ggg"),
- Bytes.toBytes("zzz")
- };
- Mockito.doReturn(mockKeys).when(table).getStartKeys();
- }
-
- private void setupMockTableName(RegionLocator table) throws IOException {
- TableName mockTableName = TableName.valueOf("mock_table");
- Mockito.doReturn(mockTableName).when(table).getName();
- }
-
- /**
- * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
- * bloom filter settings from the column family descriptor
- */
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testColumnFamilySettings() throws Exception {
- Configuration conf = new Configuration(this.util.getConfiguration());
- RecordWriter<ImmutableBytesWritable, Cell> writer = null;
- TaskAttemptContext context = null;
- Path dir = util.getDataTestDir("testColumnFamilySettings");
-
- // Setup table descriptor
- Table table = Mockito.mock(Table.class);
- RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
- HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]);
- Mockito.doReturn(htd).when(table).getTableDescriptor();
- for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
- htd.addFamily(hcd);
- }
-
- // set up the table to return some mock keys
- setupMockStartKeys(regionLocator);
-
- try {
- // partial map red setup to get an operational writer for testing
- // We turn off the sequence file compression, because DefaultCodec
- // pollutes the GZip codec pool with an incompatible compressor.
- conf.set("io.seqfile.compression.type", "NONE");
- conf.set("hbase.fs.tmp.dir", dir.toString());
- // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
- conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
-
- Job job = new Job(conf, "testLocalMRIncrementalLoad");
- job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
- setupRandomGeneratorMapper(job, false);
- HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
- FileOutputFormat.setOutputPath(job, dir);
- context = createTestTaskAttemptContext(job);
- HFileOutputFormat2 hof = new HFileOutputFormat2();
- writer = hof.getRecordWriter(context);
-
- // write out random rows
- writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
- writer.close(context);
-
- // Make sure that a directory was created for every CF
- FileSystem fs = dir.getFileSystem(conf);
-
- // commit so that the filesystem has one directory per column family
- hof.getOutputCommitter(context).commitTask(context);
- hof.getOutputCommitter(context).commitJob(context);
- FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
- assertEquals(htd.getFamilies().size(), families.length);
- for (FileStatus f : families) {
- String familyStr = f.getPath().getName();
- HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
- // verify that the compression on this file matches the configured
- // compression
- Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
- Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
- Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
-
- byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
- if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
- assertEquals("Incorrect bloom filter used for column family " + familyStr +
- "(reader: " + reader + ")",
- hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
- assertEquals("Incorrect compression used for column family " + familyStr +
- "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
- }
- } finally {
- dir.getFileSystem(conf).delete(dir, true);
- }
- }
-
- /**
- * Write random values to the writer assuming a table created using
- * {@link #FAMILIES} as column family descriptors
- */
- private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
- TaskAttemptContext context, Set<byte[]> families, int numRows)
- throws IOException, InterruptedException {
- byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
- int valLength = 10;
- byte valBytes[] = new byte[valLength];
-
- int taskId = context.getTaskAttemptID().getTaskID().getId();
- assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
- final byte [] qualifier = Bytes.toBytes("data");
- Random random = new Random();
- for (int i = 0; i < numRows; i++) {
-
- Bytes.putInt(keyBytes, 0, i);
- random.nextBytes(valBytes);
- ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
-
- for (byte[] family : families) {
- Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
- writer.write(key, kv);
- }
- }
- }
-
- /**
- * This test is to test the scenario happened in HBASE-6901.
- * All files are bulk loaded and excluded from minor compaction.
- * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
- * will be thrown.
- */
- @Ignore ("Flakey: See HBASE-9051") @Test
- public void testExcludeAllFromMinorCompaction() throws Exception {
- Configuration conf = util.getConfiguration();
- conf.setInt("hbase.hstore.compaction.min", 2);
- generateRandomStartKeys(5);
-
- util.startMiniCluster();
- try (Connection conn = ConnectionFactory.createConnection();
- Admin admin = conn.getAdmin();
- Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
- RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) {
- final FileSystem fs = util.getDFSCluster().getFileSystem();
- assertEquals("Should start with empty table", 0, util.countRows(table));
-
- // deep inspection: get the StoreFile dir
- final Path storePath = new Path(
- FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
- new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
- Bytes.toString(FAMILIES[0])));
- assertEquals(0, fs.listStatus(storePath).length);
-
- // Generate two bulk load files
- conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
- true);
-
- for (int i = 0; i < 2; i++) {
- Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
- runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
- .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false);
- // Perform the actual load
- new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
- }
-
- // Ensure data shows up
- int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
- assertEquals("LoadIncrementalHFiles should put expected data in table",
- expectedRows, util.countRows(table));
-
- // should have a second StoreFile now
- assertEquals(2, fs.listStatus(storePath).length);
-
- // minor compactions shouldn't get rid of the file
- admin.compact(TABLE_NAMES[0]);
- try {
- quickPoll(new Callable<Boolean>() {
- @Override
- public Boolean call() throws Exception {
- List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
- for (HRegion region : regions) {
- for (Store store : region.getStores()) {
- store.closeAndArchiveCompactedFiles();
- }
- }
- return fs.listStatus(storePath).length == 1;
- }
- }, 5000);
- throw new IOException("SF# = " + fs.listStatus(storePath).length);
- } catch (AssertionError ae) {
- // this is expected behavior
- }
-
- // a major compaction should work though
- admin.majorCompact(TABLE_NAMES[0]);
- quickPoll(new Callable<Boolean>() {
- @Override
- public Boolean call() throws Exception {
- List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
- for (HRegion region : regions) {
- for (Store store : region.getStores()) {
- store.closeAndArchiveCompactedFiles();
- }
- }
- return fs.listStatus(storePath).length == 1;
- }
- }, 5000);
-
- } finally {
- util.shutdownMiniCluster();
- }
- }
-
- @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
- public void testExcludeMinorCompaction() throws Exception {
- Configuration conf = util.getConfiguration();
- conf.setInt("hbase.hstore.compaction.min", 2);
- generateRandomStartKeys(5);
-
- util.startMiniCluster();
- try (Connection conn = ConnectionFactory.createConnection(conf);
- Admin admin = conn.getAdmin()){
- Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
- final FileSystem fs = util.getDFSCluster().getFileSystem();
- Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
- assertEquals("Should start with empty table", 0, util.countRows(table));
-
- // deep inspection: get the StoreFile dir
- final Path storePath = new Path(
- FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
- new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
- Bytes.toString(FAMILIES[0])));
- assertEquals(0, fs.listStatus(storePath).length);
-
- // put some data in it and flush to create a storefile
- Put p = new Put(Bytes.toBytes("test"));
- p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
- table.put(p);
- admin.flush(TABLE_NAMES[0]);
- assertEquals(1, util.countRows(table));
- quickPoll(new Callable<Boolean>() {
- @Override
- public Boolean call() throws Exception {
- return fs.listStatus(storePath).length == 1;
- }
- }, 5000);
-
- // Generate a bulk load file with more rows
- conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
- true);
-
- RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]);
- runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
- .getTableDescriptor(), regionLocator)), testDir, false);
-
- // Perform the actual load
- new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
-
- // Ensure data shows up
- int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
- assertEquals("LoadIncrementalHFiles should put expected data in table",
- expectedRows + 1, util.countRows(table));
-
- // should have a second StoreFile now
- assertEquals(2, fs.listStatus(storePath).length);
-
- // minor compactions shouldn't get rid of the file
- admin.compact(TABLE_NAMES[0]);
- try {
- quickPoll(new Callable<Boolean>() {
- @Override
- public Boolean call() throws Exception {
- return fs.listStatus(storePath).length == 1;
- }
- }, 5000);
- throw new IOException("SF# = " + fs.listStatus(storePath).length);
- } catch (AssertionError ae) {
- // this is expected behavior
- }
-
- // a major compaction should work though
- admin.majorCompact(TABLE_NAMES[0]);
- quickPoll(new Callable<Boolean>() {
- @Override
- public Boolean call() throws Exception {
- return fs.listStatus(storePath).length == 1;
- }
- }, 5000);
-
- } finally {
- util.shutdownMiniCluster();
- }
- }
-
- private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
- int sleepMs = 10;
- int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
- while (retries-- > 0) {
- if (c.call().booleanValue()) {
- return;
- }
- Thread.sleep(sleepMs);
- }
- fail();
- }
-
- public static void main(String args[]) throws Exception {
- new TestHFileOutputFormat2().manualTest(args);
- }
-
- public void manualTest(String args[]) throws Exception {
- Configuration conf = HBaseConfiguration.create();
- util = new HBaseTestingUtility(conf);
- if ("newtable".equals(args[0])) {
- TableName tname = TableName.valueOf(args[1]);
- byte[][] splitKeys = generateRandomSplitKeys(4);
- Table table = util.createTable(tname, FAMILIES, splitKeys);
- } else if ("incremental".equals(args[0])) {
- TableName tname = TableName.valueOf(args[1]);
- try(Connection c = ConnectionFactory.createConnection(conf);
- Admin admin = c.getAdmin();
- RegionLocator regionLocator = c.getRegionLocator(tname)) {
- Path outDir = new Path("incremental-out");
- runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin
- .getTableDescriptor(tname), regionLocator)), outDir, false);
- }
- } else {
- throw new RuntimeException(
- "usage: TestHFileOutputFormat2 newtable | incremental");
- }
- }
-
- @Test
- public void testBlockStoragePolicy() throws Exception {
- util = new HBaseTestingUtility();
- Configuration conf = util.getConfiguration();
- conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD");
-
- conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX +
- Bytes.toString(HFileOutputFormat2.combineTableNameSuffix(
- TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD");
- Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0]));
- Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1]));
- util.startMiniDFSCluster(3);
- FileSystem fs = util.getDFSCluster().getFileSystem();
- try {
- fs.mkdirs(cf1Dir);
- fs.mkdirs(cf2Dir);
-
- // the original block storage policy would be HOT
- String spA = getStoragePolicyName(fs, cf1Dir);
- String spB = getStoragePolicyName(fs, cf2Dir);
- LOG.debug("Storage policy of cf 0: [" + spA + "].");
- LOG.debug("Storage policy of cf 1: [" + spB + "].");
- assertEquals("HOT", spA);
- assertEquals("HOT", spB);
-
- // alter table cf schema to change storage policies
- HFileOutputFormat2.configureStoragePolicy(conf, fs,
- HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir);
- HFileOutputFormat2.configureStoragePolicy(conf, fs,
- HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir);
- spA = getStoragePolicyName(fs, cf1Dir);
- spB = getStoragePolicyName(fs, cf2Dir);
- LOG.debug("Storage policy of cf 0: [" + spA + "].");
- LOG.debug("Storage policy of cf 1: [" + spB + "].");
- assertNotNull(spA);
- assertEquals("ONE_SSD", spA);
- assertNotNull(spB);
- assertEquals("ALL_SSD", spB);
- } finally {
- fs.delete(cf1Dir, true);
- fs.delete(cf2Dir, true);
- util.shutdownMiniDFSCluster();
- }
- }
-
- private String getStoragePolicyName(FileSystem fs, Path path) {
- try {
- Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path);
- return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
- } catch (Exception e) {
- // Maybe fail because of using old HDFS version, try the old way
- if (LOG.isTraceEnabled()) {
- LOG.trace("Failed to get policy directly", e);
- }
- String policy = getStoragePolicyNameForOldHDFSVersion(fs, path);
- return policy == null ? "HOT" : policy;// HOT by default
- }
- }
-
- private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) {
- try {
- if (fs instanceof DistributedFileSystem) {
- DistributedFileSystem dfs = (DistributedFileSystem) fs;
- HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
- if (null != status) {
- byte storagePolicyId = status.getStoragePolicy();
- Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
- if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) {
- BlockStoragePolicy[] policies = dfs.getStoragePolicies();
- for (BlockStoragePolicy policy : policies) {
- if (policy.getId() == storagePolicyId) {
- return policy.getName();
- }
- }
- }
- }
- }
- } catch (Throwable e) {
- LOG.warn("failed to get block storage policy of [" + path + "]", e);
- }
-
- return null;
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
deleted file mode 100644
index 2867f13..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import static org.junit.Assert.assertEquals;
-
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestHRegionPartitioner {
- private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- /**
- * Test HRegionPartitioner
- */
- @Test (timeout=300000)
- public void testHRegionPartitioner() throws Exception {
-
- byte[][] families = { Bytes.toBytes("familyA"), Bytes.toBytes("familyB") };
-
- UTIL.createTable(TableName.valueOf(name.getMethodName()), families, 1,
- Bytes.toBytes("aa"), Bytes.toBytes("cc"), 3);
-
- HRegionPartitioner<Long, Long> partitioner = new HRegionPartitioner<>();
- Configuration configuration = UTIL.getConfiguration();
- configuration.set(TableOutputFormat.OUTPUT_TABLE, name.getMethodName());
- partitioner.setConf(configuration);
- ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes("bb"));
-
- assertEquals(1, partitioner.getPartition(writable, 10L, 3));
- assertEquals(0, partitioner.getPartition(writable, 10L, 1));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
deleted file mode 100644
index 1f4efcd..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.MapFile;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.junit.rules.TestName;
-
-/**
- * Basic test for the HashTable M/R tool
- */
-@Category(LargeTests.class)
-public class TestHashTable {
-
- private static final Log LOG = LogFactory.getLog(TestHashTable.class);
-
- private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- TEST_UTIL.startMiniCluster(3);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @Test
- public void testHashTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- final byte[] family = Bytes.toBytes("family");
- final byte[] column1 = Bytes.toBytes("c1");
- final byte[] column2 = Bytes.toBytes("c2");
- final byte[] column3 = Bytes.toBytes("c3");
-
- int numRows = 100;
- int numRegions = 10;
- int numHashFiles = 3;
-
- byte[][] splitRows = new byte[numRegions-1][];
- for (int i = 1; i < numRegions; i++) {
- splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
- }
-
- long timestamp = 1430764183454L;
- // put rows into the first table
- Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
- for (int i = 0; i < numRows; i++) {
- Put p = new Put(Bytes.toBytes(i), timestamp);
- p.addColumn(family, column1, column1);
- p.addColumn(family, column2, column2);
- p.addColumn(family, column3, column3);
- t1.put(p);
- }
- t1.close();
-
- HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
-
- Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
-
- long batchSize = 300;
- int code = hashTable.run(new String[] {
- "--batchsize=" + batchSize,
- "--numhashfiles=" + numHashFiles,
- "--scanbatch=2",
- tableName.getNameAsString(),
- testDir.toString()});
- assertEquals("test job failed", 0, code);
-
- FileSystem fs = TEST_UTIL.getTestFileSystem();
-
- HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
- assertEquals(tableName.getNameAsString(), tableHash.tableName);
- assertEquals(batchSize, tableHash.batchSize);
- assertEquals(numHashFiles, tableHash.numHashFiles);
- assertEquals(numHashFiles - 1, tableHash.partitions.size());
- for (ImmutableBytesWritable bytes : tableHash.partitions) {
- LOG.debug("partition: " + Bytes.toInt(bytes.get()));
- }
-
- ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes
- = ImmutableMap.<Integer, ImmutableBytesWritable>builder()
- .put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
- .put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
- .put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
- .put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881")))
- .put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352")))
- .put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
- .put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
- .put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
- .put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
- .put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb")))
- .put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc")))
- .put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
- .put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b")))
- .put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59")))
- .put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
- .put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
- .put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095")))
- .put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91")))
- .put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38")))
- .put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
- .build();
-
- Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
- Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
- for (int i = 0; i < numHashFiles; i++) {
- Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
-
- MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
- ImmutableBytesWritable key = new ImmutableBytesWritable();
- ImmutableBytesWritable hash = new ImmutableBytesWritable();
- while(reader.next(key, hash)) {
- String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
- LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16))
- + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
-
- int intKey = -1;
- if (key.getLength() > 0) {
- intKey = Bytes.toInt(key.get(), key.getOffset(), key.getLength());
- }
- if (actualHashes.containsKey(intKey)) {
- Assert.fail("duplicate key in data files: " + intKey);
- }
- actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
- }
- reader.close();
- }
-
- FileStatus[] files = fs.listStatus(testDir);
- for (FileStatus file : files) {
- LOG.debug("Output file: " + file.getPath());
- }
-
- files = fs.listStatus(dataDir);
- for (FileStatus file : files) {
- LOG.debug("Data file: " + file.getPath());
- }
-
- if (!expectedHashes.equals(actualHashes)) {
- LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
- }
- Assert.assertEquals(expectedHashes, actualHashes);
-
- TEST_UTIL.deleteTable(tableName);
- TEST_UTIL.cleanupDataTestDirOnTestFS();
- }
-
-
-}
[23/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
new file mode 100644
index 0000000..13b6a96
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
@@ -0,0 +1,287 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+
+
+/**
+ * <p>
+ * Tests various scan start and stop row scenarios. This is set in a scan and
+ * tested in a MapReduce job to see if that is handed over and done properly
+ * too.
+ * </p>
+ * <p>
+ * This test is broken into two parts in order to side-step the test timeout
+ * period of 900, as documented in HBASE-8326.
+ * </p>
+ */
+public abstract class TestTableInputFormatScanBase {
+
+ private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
+ static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+ static final TableName TABLE_NAME = TableName.valueOf("scantest");
+ static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
+ static final String KEY_STARTROW = "startRow";
+ static final String KEY_LASTROW = "stpRow";
+
+ private static Table table = null;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
+ // this turns it off for this test. TODO: Figure out why scr breaks recovery.
+ System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
+
+ // switch TIF to log at DEBUG level
+ TEST_UTIL.enableDebug(TableInputFormat.class);
+ TEST_UTIL.enableDebug(TableInputFormatBase.class);
+ // start mini hbase cluster
+ TEST_UTIL.startMiniCluster(3);
+ // create and fill table
+ table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILYS);
+ TEST_UTIL.loadTable(table, INPUT_FAMILYS, null, false);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Pass the key and value to reduce.
+ */
+ public static class ScanMapper
+ extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
+
+ /**
+ * Pass the key and value to reduce.
+ *
+ * @param key The key, here "aaa", "aab" etc.
+ * @param value The value is the same as the key.
+ * @param context The task context.
+ * @throws IOException When reading the rows fails.
+ */
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ Context context)
+ throws IOException, InterruptedException {
+ if (value.size() != 2) {
+ throw new IOException("There should be two input columns");
+ }
+ Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+ cfMap = value.getMap();
+
+ if (!cfMap.containsKey(INPUT_FAMILYS[0]) || !cfMap.containsKey(INPUT_FAMILYS[1])) {
+ throw new IOException("Wrong input columns. Missing: '" +
+ Bytes.toString(INPUT_FAMILYS[0]) + "' or '" + Bytes.toString(INPUT_FAMILYS[1]) + "'.");
+ }
+
+ String val0 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[0], null));
+ String val1 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[1], null));
+ LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
+ ", value -> (" + val0 + ", " + val1 + ")");
+ context.write(key, key);
+ }
+ }
+
+ /**
+ * Checks the last and first key seen against the scanner boundaries.
+ */
+ public static class ScanReducer
+ extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
+ NullWritable, NullWritable> {
+
+ private String first = null;
+ private String last = null;
+
+ protected void reduce(ImmutableBytesWritable key,
+ Iterable<ImmutableBytesWritable> values, Context context)
+ throws IOException ,InterruptedException {
+ int count = 0;
+ for (ImmutableBytesWritable value : values) {
+ String val = Bytes.toStringBinary(value.get());
+ LOG.info("reduce: key[" + count + "] -> " +
+ Bytes.toStringBinary(key.get()) + ", value -> " + val);
+ if (first == null) first = val;
+ last = val;
+ count++;
+ }
+ }
+
+ protected void cleanup(Context context)
+ throws IOException, InterruptedException {
+ Configuration c = context.getConfiguration();
+ String startRow = c.get(KEY_STARTROW);
+ String lastRow = c.get(KEY_LASTROW);
+ LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
+ LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
+ if (startRow != null && startRow.length() > 0) {
+ assertEquals(startRow, first);
+ }
+ if (lastRow != null && lastRow.length() > 0) {
+ assertEquals(lastRow, last);
+ }
+ }
+
+ }
+
+ /**
+ * Tests an MR Scan initialized from properties set in the Configuration.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ protected void testScanFromConfiguration(String start, String stop, String last)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
+ "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+ Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+ c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString());
+ c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", "
+ + Bytes.toString(INPUT_FAMILYS[1]));
+ c.set(KEY_STARTROW, start != null ? start : "");
+ c.set(KEY_LASTROW, last != null ? last : "");
+
+ if (start != null) {
+ c.set(TableInputFormat.SCAN_ROW_START, start);
+ }
+
+ if (stop != null) {
+ c.set(TableInputFormat.SCAN_ROW_STOP, stop);
+ }
+
+ Job job = new Job(c, jobName);
+ job.setMapperClass(ScanMapper.class);
+ job.setReducerClass(ScanReducer.class);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+ job.setMapOutputValueClass(ImmutableBytesWritable.class);
+ job.setInputFormatClass(TableInputFormat.class);
+ job.setNumReduceTasks(1);
+ FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+ TableMapReduceUtil.addDependencyJars(job);
+ assertTrue(job.waitForCompletion(true));
+ }
+
+ /**
+ * Tests a MR scan using specific start and stop rows.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ protected void testScan(String start, String stop, String last)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
+ "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
+ LOG.info("Before map/reduce startup - job " + jobName);
+ Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+ Scan scan = new Scan();
+ scan.addFamily(INPUT_FAMILYS[0]);
+ scan.addFamily(INPUT_FAMILYS[1]);
+ if (start != null) {
+ scan.setStartRow(Bytes.toBytes(start));
+ }
+ c.set(KEY_STARTROW, start != null ? start : "");
+ if (stop != null) {
+ scan.setStopRow(Bytes.toBytes(stop));
+ }
+ c.set(KEY_LASTROW, last != null ? last : "");
+ LOG.info("scan before: " + scan);
+ Job job = new Job(c, jobName);
+ TableMapReduceUtil.initTableMapperJob(
+ TABLE_NAME, scan, ScanMapper.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+ job.setReducerClass(ScanReducer.class);
+ job.setNumReduceTasks(1); // one to get final "first" and "last" key
+ FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
+ LOG.info("Started " + job.getJobName());
+ assertTrue(job.waitForCompletion(true));
+ LOG.info("After map/reduce completion - job " + jobName);
+ }
+
+
+ /**
+ * Tests a MR scan using data skew auto-balance
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
+ InterruptedException,
+ ClassNotFoundException {
+ String jobName = "TestJobForNumOfSplits";
+ LOG.info("Before map/reduce startup - job " + jobName);
+ Configuration c = new Configuration(TEST_UTIL.getConfiguration());
+ Scan scan = new Scan();
+ scan.addFamily(INPUT_FAMILYS[0]);
+ scan.addFamily(INPUT_FAMILYS[1]);
+ c.set("hbase.mapreduce.input.autobalance", "true");
+ c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
+ c.set(KEY_STARTROW, "");
+ c.set(KEY_LASTROW, "");
+ Job job = new Job(c, jobName);
+ TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
+ ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
+ TableInputFormat tif = new TableInputFormat();
+ tif.setConf(job.getConfiguration());
+ Assert.assertEquals(TABLE_NAME, table.getName());
+ List<InputSplit> splits = tif.getSplits(job);
+ Assert.assertEquals(expectedNumOfSplits, splits.size());
+ }
+
+ /**
+ * Tests for the getSplitKey() method in TableInputFormatBase.java
+ */
+ public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
+ byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
+ Assert.assertArrayEquals(splitKey, result);
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
new file mode 100644
index 0000000..d702e0d
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
@@ -0,0 +1,174 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNotEnabledException;
+import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of
+ * a particular cell, and write it back to the table.
+ */
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableMapReduce extends TestTableMapReduceBase {
+ private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
+
+ @Override
+ protected Log getLog() { return LOG; }
+
+ /**
+ * Pass the given key and processed record reduce
+ */
+ static class ProcessContentsMapper extends TableMapper<ImmutableBytesWritable, Put> {
+
+ /**
+ * Pass the key, and reversed value to reduce
+ *
+ * @param key
+ * @param value
+ * @param context
+ * @throws IOException
+ */
+ @Override
+ public void map(ImmutableBytesWritable key, Result value,
+ Context context)
+ throws IOException, InterruptedException {
+ if (value.size() != 1) {
+ throw new IOException("There should only be one input column");
+ }
+ Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+ cf = value.getMap();
+ if(!cf.containsKey(INPUT_FAMILY)) {
+ throw new IOException("Wrong input columns. Missing: '" +
+ Bytes.toString(INPUT_FAMILY) + "'.");
+ }
+
+ // Get the original value and reverse it
+ String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+ StringBuilder newValue = new StringBuilder(originalValue);
+ newValue.reverse();
+ // Now set the value to be collected
+ Put outval = new Put(key.get());
+ outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+ context.write(key, outval);
+ }
+ }
+
+ @Override
+ protected void runTestOnTable(Table table) throws IOException {
+ Job job = null;
+ try {
+ LOG.info("Before map/reduce startup");
+ job = new Job(table.getConfiguration(), "process column contents");
+ job.setNumReduceTasks(1);
+ Scan scan = new Scan();
+ scan.addFamily(INPUT_FAMILY);
+ TableMapReduceUtil.initTableMapperJob(
+ table.getName().getNameAsString(), scan,
+ ProcessContentsMapper.class, ImmutableBytesWritable.class,
+ Put.class, job);
+ TableMapReduceUtil.initTableReducerJob(
+ table.getName().getNameAsString(),
+ IdentityTableReducer.class, job);
+ FileOutputFormat.setOutputPath(job, new Path("test"));
+ LOG.info("Started " + table.getName().getNameAsString());
+ assertTrue(job.waitForCompletion(true));
+ LOG.info("After map/reduce completion");
+
+ // verify map-reduce results
+ verify(table.getName());
+
+ verifyJobCountersAreEmitted(job);
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ } finally {
+ table.close();
+ if (job != null) {
+ FileUtil.fullyDelete(
+ new File(job.getConfiguration().get("hadoop.tmp.dir")));
+ }
+ }
+ }
+
+ /**
+ * Verify scan counters are emitted from the job
+ * @param job
+ * @throws IOException
+ */
+ private void verifyJobCountersAreEmitted(Job job) throws IOException {
+ Counters counters = job.getCounters();
+ Counter counter
+ = counters.findCounter(TableRecordReaderImpl.HBASE_COUNTER_GROUP_NAME, "RPC_CALLS");
+ assertNotNull("Unable to find Job counter for HBase scan metrics, RPC_CALLS", counter);
+ assertTrue("Counter value for RPC_CALLS should be larger than 0", counter.getValue() > 0);
+ }
+
+ @Test(expected = TableNotEnabledException.class)
+ public void testWritingToDisabledTable() throws IOException {
+
+ try (Admin admin = UTIL.getConnection().getAdmin();
+ Table table = UTIL.getConnection().getTable(TABLE_FOR_NEGATIVE_TESTS)) {
+ admin.disableTable(table.getName());
+ runTestOnTable(table);
+ fail("Should not have reached here, should have thrown an exception");
+ }
+ }
+
+ @Test(expected = TableNotFoundException.class)
+ public void testWritingToNonExistentTable() throws IOException {
+
+ try (Table table = UTIL.getConnection().getTable(TableName.valueOf("table-does-not-exist"))) {
+ runTestOnTable(table);
+ fail("Should not have reached here, should have thrown an exception");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
new file mode 100644
index 0000000..27bf063
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
@@ -0,0 +1,233 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+
+/**
+ * A base class for a test Map/Reduce job over HBase tables. The map/reduce process we're testing
+ * on our tables is simple - take every row in the table, reverse the value of a particular cell,
+ * and write it back to the table. Implements common components between mapred and mapreduce
+ * implementations.
+ */
+public abstract class TestTableMapReduceBase {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ protected static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
+ protected static final TableName TABLE_FOR_NEGATIVE_TESTS = TableName.valueOf("testfailuretable");
+ protected static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
+ protected static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
+
+ protected static final byte[][] columns = new byte[][] {
+ INPUT_FAMILY,
+ OUTPUT_FAMILY
+ };
+
+ /**
+ * Retrieve my logger instance.
+ */
+ protected abstract Log getLog();
+
+ /**
+ * Handles API-specifics for setting up and executing the job.
+ */
+ protected abstract void runTestOnTable(Table table) throws IOException;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ Table table =
+ UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
+ OUTPUT_FAMILY });
+ UTIL.loadTable(table, INPUT_FAMILY, false);
+ UTIL.createTable(TABLE_FOR_NEGATIVE_TESTS, new byte[][] { INPUT_FAMILY, OUTPUT_FAMILY });
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.deleteTable(TABLE_FOR_NEGATIVE_TESTS);
+ UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test a map/reduce against a multi-region table
+ * @throws IOException
+ */
+ @Test
+ public void testMultiRegionTable() throws IOException {
+ runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+ }
+
+ @Test
+ public void testCombiner() throws IOException {
+ Configuration conf = new Configuration(UTIL.getConfiguration());
+ // force use of combiner for testing purposes
+ conf.setInt("mapreduce.map.combine.minspills", 1);
+ runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
+ }
+
+ /**
+ * Implements mapper logic for use across APIs.
+ */
+ protected static Put map(ImmutableBytesWritable key, Result value) throws IOException {
+ if (value.size() != 1) {
+ throw new IOException("There should only be one input column");
+ }
+ Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
+ cf = value.getMap();
+ if(!cf.containsKey(INPUT_FAMILY)) {
+ throw new IOException("Wrong input columns. Missing: '" +
+ Bytes.toString(INPUT_FAMILY) + "'.");
+ }
+
+ // Get the original value and reverse it
+
+ String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
+ StringBuilder newValue = new StringBuilder(originalValue);
+ newValue.reverse();
+
+ // Now set the value to be collected
+
+ Put outval = new Put(key.get());
+ outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
+ return outval;
+ }
+
+ protected void verify(TableName tableName) throws IOException {
+ Table table = UTIL.getConnection().getTable(tableName);
+ boolean verified = false;
+ long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
+ int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
+ for (int i = 0; i < numRetries; i++) {
+ try {
+ getLog().info("Verification attempt #" + i);
+ verifyAttempt(table);
+ verified = true;
+ break;
+ } catch (NullPointerException e) {
+ // If here, a cell was empty. Presume its because updates came in
+ // after the scanner had been opened. Wait a while and retry.
+ getLog().debug("Verification attempt failed: " + e.getMessage());
+ }
+ try {
+ Thread.sleep(pause);
+ } catch (InterruptedException e) {
+ // continue
+ }
+ }
+ assertTrue(verified);
+ }
+
+ /**
+ * Looks at every value of the mapreduce output and verifies that indeed
+ * the values have been reversed.
+ * @param table Table to scan.
+ * @throws IOException
+ * @throws NullPointerException if we failed to find a cell value
+ */
+ private void verifyAttempt(final Table table) throws IOException, NullPointerException {
+ Scan scan = new Scan();
+ TableInputFormat.addColumns(scan, columns);
+ ResultScanner scanner = table.getScanner(scan);
+ try {
+ Iterator<Result> itr = scanner.iterator();
+ assertTrue(itr.hasNext());
+ while(itr.hasNext()) {
+ Result r = itr.next();
+ if (getLog().isDebugEnabled()) {
+ if (r.size() > 2 ) {
+ throw new IOException("Too many results, expected 2 got " +
+ r.size());
+ }
+ }
+ byte[] firstValue = null;
+ byte[] secondValue = null;
+ int count = 0;
+ for(Cell kv : r.listCells()) {
+ if (count == 0) {
+ firstValue = CellUtil.cloneValue(kv);
+ }
+ if (count == 1) {
+ secondValue = CellUtil.cloneValue(kv);
+ }
+ count++;
+ if (count == 2) {
+ break;
+ }
+ }
+
+
+ if (firstValue == null) {
+ throw new NullPointerException(Bytes.toString(r.getRow()) +
+ ": first value is null");
+ }
+ String first = Bytes.toString(firstValue);
+
+ if (secondValue == null) {
+ throw new NullPointerException(Bytes.toString(r.getRow()) +
+ ": second value is null");
+ }
+ byte[] secondReversed = new byte[secondValue.length];
+ for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
+ secondReversed[i] = secondValue[j];
+ }
+ String second = Bytes.toString(secondReversed);
+
+ if (first.compareTo(second) != 0) {
+ if (getLog().isDebugEnabled()) {
+ getLog().debug("second key is not the reverse of first. row=" +
+ Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
+ ", second value=" + second);
+ }
+ fail();
+ }
+ }
+ } finally {
+ scanner.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
new file mode 100644
index 0000000..506bf4f
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test different variants of initTableMapperJob method
+ */
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestTableMapReduceUtil {
+
+ /*
+ * initTableSnapshotMapperJob is tested in {@link TestTableSnapshotInputFormat} because
+ * the method depends on an online cluster.
+ */
+
+ @Test
+ public void testInitTableMapperJob1() throws Exception {
+ Configuration configuration = new Configuration();
+ Job job = new Job(configuration, "tableName");
+ // test
+ TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
+ Text.class, job, false, WALInputFormat.class);
+ assertEquals(WALInputFormat.class, job.getInputFormatClass());
+ assertEquals(Import.Importer.class, job.getMapperClass());
+ assertEquals(LongWritable.class, job.getOutputKeyClass());
+ assertEquals(Text.class, job.getOutputValueClass());
+ assertNull(job.getCombinerClass());
+ assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+ }
+
+ @Test
+ public void testInitTableMapperJob2() throws Exception {
+ Configuration configuration = new Configuration();
+ Job job = new Job(configuration, "tableName");
+ TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+ Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
+ assertEquals(WALInputFormat.class, job.getInputFormatClass());
+ assertEquals(Import.Importer.class, job.getMapperClass());
+ assertEquals(LongWritable.class, job.getOutputKeyClass());
+ assertEquals(Text.class, job.getOutputValueClass());
+ assertNull(job.getCombinerClass());
+ assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+ }
+
+ @Test
+ public void testInitTableMapperJob3() throws Exception {
+ Configuration configuration = new Configuration();
+ Job job = new Job(configuration, "tableName");
+ TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+ Import.Importer.class, Text.class, Text.class, job);
+ assertEquals(TableInputFormat.class, job.getInputFormatClass());
+ assertEquals(Import.Importer.class, job.getMapperClass());
+ assertEquals(LongWritable.class, job.getOutputKeyClass());
+ assertEquals(Text.class, job.getOutputValueClass());
+ assertNull(job.getCombinerClass());
+ assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+ }
+
+ @Test
+ public void testInitTableMapperJob4() throws Exception {
+ Configuration configuration = new Configuration();
+ Job job = new Job(configuration, "tableName");
+ TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
+ Import.Importer.class, Text.class, Text.class, job, false);
+ assertEquals(TableInputFormat.class, job.getInputFormatClass());
+ assertEquals(Import.Importer.class, job.getMapperClass());
+ assertEquals(LongWritable.class, job.getOutputKeyClass());
+ assertEquals(Text.class, job.getOutputValueClass());
+ assertNull(job.getCombinerClass());
+ assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
new file mode 100644
index 0000000..028df98
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -0,0 +1,373 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.TestTableSnapshotScanner;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+
+import java.util.Arrays;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
+ private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+
+ private static final byte[] bbb = Bytes.toBytes("bbb");
+ private static final byte[] yyy = Bytes.toBytes("yyy");
+
+ @Rule
+ public TestName name = new TestName();
+
+ @Override
+ protected byte[] getStartRow() {
+ return bbb;
+ }
+
+ @Override
+ protected byte[] getEndRow() {
+ return yyy;
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ }
+
+ @Test
+ public void testGetBestLocations() throws IOException {
+ TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl();
+ Configuration conf = UTIL.getConfiguration();
+
+ HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
+ Assert.assertEquals(Lists.newArrayList(),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
+ Assert.assertEquals(Lists.newArrayList("h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
+ Assert.assertEquals(Lists.newArrayList("h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
+ Assert.assertEquals(Lists.newArrayList("h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution = new HDFSBlocksDistribution();
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
+ Assert.assertEquals(Lists.newArrayList("h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
+ Assert.assertEquals(Lists.newArrayList("h1", "h2"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
+ Assert.assertEquals(Lists.newArrayList("h2", "h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
+ blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
+
+ Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
+ TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
+ }
+
+ public static enum TestTableSnapshotCounters {
+ VALIDATION_ERROR
+ }
+
+ public static class TestTableSnapshotMapper
+ extends TableMapper<ImmutableBytesWritable, NullWritable> {
+ @Override
+ protected void map(ImmutableBytesWritable key, Result value,
+ Context context) throws IOException, InterruptedException {
+ // Validate a single row coming from the snapshot, and emit the row key
+ verifyRowFromMap(key, value);
+ context.write(key, NullWritable.get());
+ }
+ }
+
+ public static class TestTableSnapshotReducer
+ extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
+ HBaseTestingUtility.SeenRowTracker rowTracker =
+ new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
+ @Override
+ protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
+ Context context) throws IOException, InterruptedException {
+ rowTracker.addRow(key.get());
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException,
+ InterruptedException {
+ rowTracker.validate();
+ }
+ }
+
+ @Test
+ public void testInitTableSnapshotMapperJobConfig() throws Exception {
+ setupCluster();
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ String snapshotName = "foo";
+
+ try {
+ createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
+ Job job = new Job(UTIL.getConfiguration());
+ Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+
+ // TODO: would be better to examine directly the cache instance that results from this
+ // config. Currently this is not possible because BlockCache initialization is static.
+ Assert.assertEquals(
+ "Snapshot job should be configured for default LruBlockCache.",
+ HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
+ job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
+ Assert.assertEquals(
+ "Snapshot job should not use BucketCache.",
+ 0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ @Override
+ public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
+ String snapshotName, Path tmpTableDir) throws Exception {
+ Job job = new Job(UTIL.getConfiguration());
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+ }
+
+ @Override
+ public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
+ int numRegions, int expectedNumSplits) throws Exception {
+ setupCluster();
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ try {
+ createTableAndSnapshot(
+ util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
+
+ Job job = new Job(util.getConfiguration());
+ Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
+ Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, false, tmpTableDir);
+
+ verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
+
+ } finally {
+ util.getAdmin().deleteSnapshot(snapshotName);
+ util.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ @Test
+ public void testNoDuplicateResultsWhenSplitting() throws Exception {
+ setupCluster();
+ TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+ String snapshotName = "testSnapshotBug";
+ try {
+ if (UTIL.getAdmin().tableExists(tableName)) {
+ UTIL.deleteTable(tableName);
+ }
+
+ UTIL.createTable(tableName, FAMILIES);
+ Admin admin = UTIL.getAdmin();
+
+ // put some stuff in the table
+ Table table = UTIL.getConnection().getTable(tableName);
+ UTIL.loadTable(table, FAMILIES);
+
+ // split to 2 regions
+ admin.split(tableName, Bytes.toBytes("eee"));
+ TestTableSnapshotScanner.blockUntilSplitFinished(UTIL, tableName, 2);
+
+ Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+ FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+ SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+ null, snapshotName, rootDir, fs, true);
+
+ // load different values
+ byte[] value = Bytes.toBytes("after_snapshot_value");
+ UTIL.loadTable(table, FAMILIES, value);
+
+ // cause flush to create new files in the region
+ admin.flush(tableName);
+ table.close();
+
+ Job job = new Job(UTIL.getConfiguration());
+ Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+ // limit the scan
+ Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+ TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
+ tmpTableDir);
+
+ verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
+ byte[] startRow, byte[] stopRow)
+ throws IOException, InterruptedException {
+ TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
+ List<InputSplit> splits = tsif.getSplits(job);
+
+ Assert.assertEquals(expectedNumSplits, splits.size());
+
+ HBaseTestingUtility.SeenRowTracker rowTracker =
+ new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
+
+ for (int i = 0; i < splits.size(); i++) {
+ // validate input split
+ InputSplit split = splits.get(i);
+ Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
+
+ // validate record reader
+ TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
+ when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
+ RecordReader<ImmutableBytesWritable, Result> rr =
+ tsif.createRecordReader(split, taskAttemptContext);
+ rr.initialize(split, taskAttemptContext);
+
+ // validate we can read all the data back
+ while (rr.nextKeyValue()) {
+ byte[] row = rr.getCurrentKey().get();
+ verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
+ rowTracker.addRow(row);
+ }
+
+ rr.close();
+ }
+
+ // validate all rows are seen
+ rowTracker.validate();
+ }
+
+ @Override
+ protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
+ boolean shutdownCluster) throws Exception {
+ doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
+ numRegions, expectedNumSplits, shutdownCluster);
+ }
+
+ // this is also called by the IntegrationTestTableSnapshotInputFormat
+ public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
+ String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
+ int expectedNumSplits, boolean shutdownCluster) throws Exception {
+
+ LOG.info("testing with MapReduce");
+
+ LOG.info("create the table and snapshot");
+ createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
+
+ if (shutdownCluster) {
+ LOG.info("shutting down hbase cluster.");
+ util.shutdownMiniHBaseCluster();
+ }
+
+ try {
+ // create the job
+ Job job = new Job(util.getConfiguration());
+ Scan scan = new Scan(startRow, endRow); // limit the scan
+
+ job.setJarByClass(util.getClass());
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ TestTableSnapshotInputFormat.class);
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
+ scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
+ NullWritable.class, job, true, tableDir);
+
+ job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
+ job.setNumReduceTasks(1);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ Assert.assertTrue(job.waitForCompletion(true));
+ } finally {
+ if (!shutdownCluster) {
+ util.getAdmin().deleteSnapshot(snapshotName);
+ util.deleteTable(tableName);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
new file mode 100644
index 0000000..4382c9c
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.util.HashSet;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@Category({MapReduceTests.class, SmallTests.class})
+public class TestTableSplit {
+ @Rule
+ public TestName name = new TestName();
+
+ @Test
+ public void testHashCode() {
+ TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start".getBytes(),
+ "row-end".getBytes(), "location");
+ TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start".getBytes(),
+ "row-end".getBytes(), "location");
+ assertEquals (split1, split2);
+ assertTrue (split1.hashCode() == split2.hashCode());
+ HashSet<TableSplit> set = new HashSet<>(2);
+ set.add(split1);
+ set.add(split2);
+ assertTrue(set.size() == 1);
+ }
+
+ /**
+ * length of region should not influence hashcode
+ * */
+ @Test
+ public void testHashCode_length() {
+ TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start".getBytes(),
+ "row-end".getBytes(), "location", 1984);
+ TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start".getBytes(),
+ "row-end".getBytes(), "location", 1982);
+
+ assertEquals (split1, split2);
+ assertTrue (split1.hashCode() == split2.hashCode());
+ HashSet<TableSplit> set = new HashSet<>(2);
+ set.add(split1);
+ set.add(split2);
+ assertTrue(set.size() == 1);
+ }
+
+ /**
+ * Length of region need to be properly serialized.
+ * */
+ @Test
+ public void testLengthIsSerialized() throws Exception {
+ TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start".getBytes(),
+ "row-end".getBytes(), "location", 666);
+
+ TableSplit deserialized = new TableSplit(TableName.valueOf(name.getMethodName()),
+ "row-start2".getBytes(),
+ "row-end2".getBytes(), "location1");
+ ReflectionUtils.copy(new Configuration(), split1, deserialized);
+
+ Assert.assertEquals(666, deserialized.getLength());
+ }
+
+ @Test
+ public void testToString() {
+ TableSplit split =
+ new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
+ "location");
+ String str =
+ "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
+ + "end row: row-end, region location: location, "
+ + "encoded region name: )";
+ Assert.assertEquals(str, split.toString());
+
+ split =
+ new TableSplit(TableName.valueOf(name.getMethodName()), null, "row-start".getBytes(),
+ "row-end".getBytes(), "location", "encoded-region-name", 1000L);
+ str =
+ "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
+ + "end row: row-end, region location: location, "
+ + "encoded region name: encoded-region-name)";
+ Assert.assertEquals(str, split.toString());
+
+ split = new TableSplit((TableName) null, null, null, null);
+ str =
+ "HBase table split(table name: null, scan: , start row: null, "
+ + "end row: null, region location: null, "
+ + "encoded region name: )";
+ Assert.assertEquals(str, split.toString());
+
+ split = new TableSplit((TableName) null, null, null, null, null, null, 1000L);
+ str =
+ "HBase table split(table name: null, scan: , start row: null, "
+ + "end row: null, region location: null, "
+ + "encoded region name: null)";
+ Assert.assertEquals(str, split.toString());
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
new file mode 100644
index 0000000..6796c94
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
@@ -0,0 +1,211 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestTimeRangeMapRed {
+ private final static Log log = LogFactory.getLog(TestTimeRangeMapRed.class);
+ private static final HBaseTestingUtility UTIL =
+ new HBaseTestingUtility();
+ private Admin admin;
+
+ private static final byte [] KEY = Bytes.toBytes("row1");
+ private static final NavigableMap<Long, Boolean> TIMESTAMP = new TreeMap<>();
+ static {
+ TIMESTAMP.put((long)1245620000, false);
+ TIMESTAMP.put((long)1245620005, true); // include
+ TIMESTAMP.put((long)1245620010, true); // include
+ TIMESTAMP.put((long)1245620055, true); // include
+ TIMESTAMP.put((long)1245620100, true); // include
+ TIMESTAMP.put((long)1245620150, false);
+ TIMESTAMP.put((long)1245620250, false);
+ }
+ static final long MINSTAMP = 1245620005;
+ static final long MAXSTAMP = 1245620100 + 1; // maxStamp itself is excluded. so increment it.
+
+ static final TableName TABLE_NAME = TableName.valueOf("table123");
+ static final byte[] FAMILY_NAME = Bytes.toBytes("text");
+ static final byte[] COLUMN_NAME = Bytes.toBytes("input");
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws Exception {
+ this.admin = UTIL.getAdmin();
+ }
+
+ private static class ProcessTimeRangeMapper
+ extends TableMapper<ImmutableBytesWritable, MapWritable>
+ implements Configurable {
+
+ private Configuration conf = null;
+ private Table table = null;
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result result,
+ Context context)
+ throws IOException {
+ List<Long> tsList = new ArrayList<>();
+ for (Cell kv : result.listCells()) {
+ tsList.add(kv.getTimestamp());
+ }
+
+ List<Put> puts = new ArrayList<>();
+ for (Long ts : tsList) {
+ Put put = new Put(key.get());
+ put.setDurability(Durability.SKIP_WAL);
+ put.addColumn(FAMILY_NAME, COLUMN_NAME, ts, Bytes.toBytes(true));
+ puts.add(put);
+ }
+ table.put(puts);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration configuration) {
+ this.conf = configuration;
+ try {
+ Connection connection = ConnectionFactory.createConnection(conf);
+ table = connection.getTable(TABLE_NAME);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ @Test
+ public void testTimeRangeMapRed()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ final HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
+ final HColumnDescriptor col = new HColumnDescriptor(FAMILY_NAME);
+ col.setMaxVersions(Integer.MAX_VALUE);
+ desc.addFamily(col);
+ admin.createTable(desc);
+ List<Put> puts = new ArrayList<>();
+ for (Map.Entry<Long, Boolean> entry : TIMESTAMP.entrySet()) {
+ Put put = new Put(KEY);
+ put.setDurability(Durability.SKIP_WAL);
+ put.addColumn(FAMILY_NAME, COLUMN_NAME, entry.getKey(), Bytes.toBytes(false));
+ puts.add(put);
+ }
+ Table table = UTIL.getConnection().getTable(desc.getTableName());
+ table.put(puts);
+ runTestOnTable();
+ verify(table);
+ table.close();
+ }
+
+ private void runTestOnTable()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ Job job = null;
+ try {
+ job = new Job(UTIL.getConfiguration(), "test123");
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.setNumReduceTasks(0);
+ Scan scan = new Scan();
+ scan.addColumn(FAMILY_NAME, COLUMN_NAME);
+ scan.setTimeRange(MINSTAMP, MAXSTAMP);
+ scan.setMaxVersions();
+ TableMapReduceUtil.initTableMapperJob(TABLE_NAME,
+ scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job);
+ job.waitForCompletion(true);
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } finally {
+ if (job != null) {
+ FileUtil.fullyDelete(
+ new File(job.getConfiguration().get("hadoop.tmp.dir")));
+ }
+ }
+ }
+
+ private void verify(final Table table) throws IOException {
+ Scan scan = new Scan();
+ scan.addColumn(FAMILY_NAME, COLUMN_NAME);
+ scan.setMaxVersions(1);
+ ResultScanner scanner = table.getScanner(scan);
+ for (Result r: scanner) {
+ for (Cell kv : r.listCells()) {
+ log.debug(Bytes.toString(r.getRow()) + "\t" + Bytes.toString(CellUtil.cloneFamily(kv))
+ + "\t" + Bytes.toString(CellUtil.cloneQualifier(kv))
+ + "\t" + kv.getTimestamp() + "\t" + Bytes.toBoolean(CellUtil.cloneValue(kv)));
+ org.junit.Assert.assertEquals(TIMESTAMP.get(kv.getTimestamp()),
+ Bytes.toBoolean(CellUtil.cloneValue(kv)));
+ }
+ }
+ scanner.close();
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
new file mode 100644
index 0000000..427c5cc
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Basic test for the WALPlayer M/R tool
+ */
+@Category({MapReduceTests.class, LargeTests.class})
+public class TestWALPlayer {
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private static MiniHBaseCluster cluster;
+ private static Path rootDir;
+ private static Path walRootDir;
+ private static FileSystem fs;
+ private static FileSystem logFs;
+ private static Configuration conf;
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ conf= TEST_UTIL.getConfiguration();
+ rootDir = TEST_UTIL.createRootDir();
+ walRootDir = TEST_UTIL.createWALRootDir();
+ fs = FSUtils.getRootDirFileSystem(conf);
+ logFs = FSUtils.getWALFileSystem(conf);
+ cluster = TEST_UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ fs.delete(rootDir, true);
+ logFs.delete(walRootDir, true);
+ }
+
+ /**
+ * Simple end-to-end test
+ * @throws Exception
+ */
+ @Test
+ public void testWALPlayer() throws Exception {
+ final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
+ final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
+ final byte[] FAMILY = Bytes.toBytes("family");
+ final byte[] COLUMN1 = Bytes.toBytes("c1");
+ final byte[] COLUMN2 = Bytes.toBytes("c2");
+ final byte[] ROW = Bytes.toBytes("row");
+ Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
+ Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
+
+ // put a row into the first table
+ Put p = new Put(ROW);
+ p.addColumn(FAMILY, COLUMN1, COLUMN1);
+ p.addColumn(FAMILY, COLUMN2, COLUMN2);
+ t1.put(p);
+ // delete one column
+ Delete d = new Delete(ROW);
+ d.addColumns(FAMILY, COLUMN1);
+ t1.delete(d);
+
+ // replay the WAL, map table 1 to table 2
+ WAL log = cluster.getRegionServer(0).getWAL(null);
+ log.rollWriter();
+ String walInputDir = new Path(cluster.getMaster().getMasterFileSystem()
+ .getWALRootDir(), HConstants.HREGION_LOGDIR_NAME).toString();
+
+ Configuration configuration= TEST_UTIL.getConfiguration();
+ WALPlayer player = new WALPlayer(configuration);
+ String optionName="_test_.name";
+ configuration.set(optionName, "1000");
+ player.setupTime(configuration, optionName);
+ assertEquals(1000,configuration.getLong(optionName,0));
+ assertEquals(0, ToolRunner.run(configuration, player,
+ new String[] {walInputDir, tableName1.getNameAsString(),
+ tableName2.getNameAsString() }));
+
+
+ // verify the WAL was player into table 2
+ Get g = new Get(ROW);
+ Result r = t2.get(g);
+ assertEquals(1, r.size());
+ assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN2));
+ }
+
+ /**
+ * Test WALKeyValueMapper setup and map
+ */
+ @Test
+ public void testWALKeyValueMapper() throws Exception {
+ testWALKeyValueMapper(WALPlayer.TABLES_KEY);
+ }
+
+ @Test
+ public void testWALKeyValueMapperWithDeprecatedConfig() throws Exception {
+ testWALKeyValueMapper("hlog.input.tables");
+ }
+
+ private void testWALKeyValueMapper(final String tableConfigKey) throws Exception {
+ Configuration configuration = new Configuration();
+ configuration.set(tableConfigKey, "table");
+ WALKeyValueMapper mapper = new WALKeyValueMapper();
+ WALKey key = mock(WALKey.class);
+ when(key.getTablename()).thenReturn(TableName.valueOf("table"));
+ @SuppressWarnings("unchecked")
+ Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue>.Context context = mock(Context.class);
+ when(context.getConfiguration()).thenReturn(configuration);
+
+ WALEdit value = mock(WALEdit.class);
+ ArrayList<Cell> values = new ArrayList<>();
+ KeyValue kv1 = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), null);
+
+ values.add(kv1);
+ when(value.getCells()).thenReturn(values);
+ mapper.setup(context);
+
+ doAnswer(new Answer<Void>() {
+
+ @Override
+ public Void answer(InvocationOnMock invocation) throws Throwable {
+ ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
+ KeyValue key = (KeyValue) invocation.getArguments()[1];
+ assertEquals("row", Bytes.toString(writer.get()));
+ assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
+ return null;
+ }
+ }).when(context).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
+
+ mapper.map(key, value, context);
+
+ }
+
+ /**
+ * Test main method
+ */
+ @Test
+ public void testMainMethod() throws Exception {
+
+ PrintStream oldPrintStream = System.err;
+ SecurityManager SECURITY_MANAGER = System.getSecurityManager();
+ LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
+ System.setSecurityManager(newSecurityManager);
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+ String[] args = {};
+ System.setErr(new PrintStream(data));
+ try {
+ System.setErr(new PrintStream(data));
+ try {
+ WALPlayer.main(args);
+ fail("should be SecurityException");
+ } catch (SecurityException e) {
+ assertEquals(-1, newSecurityManager.getExitCode());
+ assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
+ assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
+ " <tables> [<tableMappings>]"));
+ assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
+ }
+
+ } finally {
+ System.setErr(oldPrintStream);
+ System.setSecurityManager(SECURITY_MANAGER);
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
new file mode 100644
index 0000000..34725b4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
@@ -0,0 +1,276 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
+import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALFactory;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.MapReduceTestUtil;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * JUnit tests for the WALRecordReader
+ */
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestWALRecordReader {
+ private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
+ private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private static Configuration conf;
+ private static FileSystem fs;
+ private static Path hbaseDir;
+ private static FileSystem walFs;
+ private static Path walRootDir;
+ // visible for TestHLogRecordReader
+ static final TableName tableName = TableName.valueOf(getName());
+ private static final byte [] rowName = tableName.getName();
+ // visible for TestHLogRecordReader
+ static final HRegionInfo info = new HRegionInfo(tableName,
+ Bytes.toBytes(""), Bytes.toBytes(""), false);
+ private static final byte [] family = Bytes.toBytes("column");
+ private static final byte [] value = Bytes.toBytes("value");
+ private static HTableDescriptor htd;
+ private static Path logDir;
+ protected MultiVersionConcurrencyControl mvcc;
+ protected static NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
+
+ private static String getName() {
+ return "TestWALRecordReader";
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ fs.delete(hbaseDir, true);
+ walFs.delete(walRootDir, true);
+ mvcc = new MultiVersionConcurrencyControl();
+ }
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // Make block sizes small.
+ conf = TEST_UTIL.getConfiguration();
+ conf.setInt("dfs.blocksize", 1024 * 1024);
+ conf.setInt("dfs.replication", 1);
+ TEST_UTIL.startMiniDFSCluster(1);
+
+ conf = TEST_UTIL.getConfiguration();
+ fs = TEST_UTIL.getDFSCluster().getFileSystem();
+
+ hbaseDir = TEST_UTIL.createRootDir();
+ walRootDir = TEST_UTIL.createWALRootDir();
+ walFs = FSUtils.getWALFileSystem(conf);
+ logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
+
+ htd = new HTableDescriptor(tableName);
+ htd.addFamily(new HColumnDescriptor(family));
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ fs.delete(hbaseDir, true);
+ walFs.delete(walRootDir, true);
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test partial reads from the log based on passed time range
+ * @throws Exception
+ */
+ @Test
+ public void testPartialRead() throws Exception {
+ final WALFactory walfactory = new WALFactory(conf, null, getName());
+ WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
+ // This test depends on timestamp being millisecond based and the filename of the WAL also
+ // being millisecond based.
+ long ts = System.currentTimeMillis();
+ WALEdit edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
+ log.append(info, getWalKey(ts, scopes), edit, true);
+ edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
+ log.append(info, getWalKey(ts+1, scopes), edit, true);
+ log.sync();
+ LOG.info("Before 1st WAL roll " + log.toString());
+ log.rollWriter();
+ LOG.info("Past 1st WAL roll " + log.toString());
+
+ Thread.sleep(1);
+ long ts1 = System.currentTimeMillis();
+
+ edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
+ log.append(info, getWalKey(ts1+1, scopes), edit, true);
+ edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
+ log.append(info, getWalKey(ts1+2, scopes), edit, true);
+ log.sync();
+ log.shutdown();
+ walfactory.shutdown();
+ LOG.info("Closed WAL " + log.toString());
+
+
+ WALInputFormat input = new WALInputFormat();
+ Configuration jobConf = new Configuration(conf);
+ jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
+ jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
+
+ // only 1st file is considered, and only its 1st entry is used
+ List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+
+ assertEquals(1, splits.size());
+ testSplit(splits.get(0), Bytes.toBytes("1"));
+
+ jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
+ jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
+ splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+ // both files need to be considered
+ assertEquals(2, splits.size());
+ // only the 2nd entry from the 1st file is used
+ testSplit(splits.get(0), Bytes.toBytes("2"));
+ // only the 1nd entry from the 2nd file is used
+ testSplit(splits.get(1), Bytes.toBytes("3"));
+ }
+
+ /**
+ * Test basic functionality
+ * @throws Exception
+ */
+ @Test
+ public void testWALRecordReader() throws Exception {
+ final WALFactory walfactory = new WALFactory(conf, null, getName());
+ WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
+ byte [] value = Bytes.toBytes("value");
+ final AtomicLong sequenceId = new AtomicLong(0);
+ WALEdit edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
+ System.currentTimeMillis(), value));
+ long txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
+ log.sync(txid);
+
+ Thread.sleep(1); // make sure 2nd log gets a later timestamp
+ long secondTs = System.currentTimeMillis();
+ log.rollWriter();
+
+ edit = new WALEdit();
+ edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
+ System.currentTimeMillis(), value));
+ txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
+ log.sync(txid);
+ log.shutdown();
+ walfactory.shutdown();
+ long thirdTs = System.currentTimeMillis();
+
+ // should have 2 log files now
+ WALInputFormat input = new WALInputFormat();
+ Configuration jobConf = new Configuration(conf);
+ jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
+
+ // make sure both logs are found
+ List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+ assertEquals(2, splits.size());
+
+ // should return exactly one KV
+ testSplit(splits.get(0), Bytes.toBytes("1"));
+ // same for the 2nd split
+ testSplit(splits.get(1), Bytes.toBytes("2"));
+
+ // now test basic time ranges:
+
+ // set an endtime, the 2nd log file can be ignored completely.
+ jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
+ splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+ assertEquals(1, splits.size());
+ testSplit(splits.get(0), Bytes.toBytes("1"));
+
+ // now set a start time
+ jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
+ jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
+ splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
+ // both logs need to be considered
+ assertEquals(2, splits.size());
+ // but both readers skip all edits
+ testSplit(splits.get(0));
+ testSplit(splits.get(1));
+ }
+
+ protected WALKey getWalKey(final long time, NavigableMap<byte[], Integer> scopes) {
+ return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc, scopes);
+ }
+
+ protected WALRecordReader getReader() {
+ return new WALKeyRecordReader();
+ }
+
+ /**
+ * Create a new reader from the split, and match the edits against the passed columns.
+ */
+ private void testSplit(InputSplit split, byte[]... columns) throws Exception {
+ final WALRecordReader reader = getReader();
+ reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
+
+ for (byte[] column : columns) {
+ assertTrue(reader.nextKeyValue());
+ Cell cell = reader.getCurrentValue().getCells().get(0);
+ if (!Bytes.equals(column, 0, column.length, cell.getQualifierArray(),
+ cell.getQualifierOffset(), cell.getQualifierLength())) {
+ assertTrue(
+ "expected ["
+ + Bytes.toString(column)
+ + "], actual ["
+ + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
+ cell.getQualifierLength()) + "]", false);
+ }
+ }
+ assertFalse(reader.nextKeyValue());
+ reader.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
new file mode 100644
index 0000000..aea5036
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.KeyValue;
+
+import java.io.IOException;
+
+/**
+ * Dummy mapper used for unit tests to verify that the mapper can be injected.
+ * This approach would be used if a custom transformation needed to be done after
+ * reading the input data before writing it to HFiles.
+ */
+public class TsvImporterCustomTestMapper extends TsvImporterMapper {
+
+ @Override
+ protected void setup(Context context) {
+ doSetup(context);
+ }
+
+ /**
+ * Convert a line of TSV text into an HBase table row after transforming the
+ * values by multiplying them by 3.
+ */
+ @Override
+ public void map(LongWritable offset, Text value, Context context)
+ throws IOException {
+ byte[] family = Bytes.toBytes("FAM");
+ final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };
+
+ // do some basic line parsing
+ byte[] lineBytes = value.getBytes();
+ String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");
+
+ // create the rowKey and Put
+ ImmutableBytesWritable rowKey =
+ new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
+ Put put = new Put(rowKey.copyBytes());
+ put.setDurability(Durability.SKIP_WAL);
+
+ //The value should look like this: VALUE1 or VALUE2. Let's multiply
+ //the integer by 3
+ for(int i = 1; i < valueTokens.length; i++) {
+ String prefix = valueTokens[i].substring(0, "VALUE".length());
+ String suffix = valueTokens[i].substring("VALUE".length());
+ String newValue = prefix + Integer.parseInt(suffix) * 3;
+
+ KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
+ qualifiers[i-1], Bytes.toBytes(newValue));
+ put.add(kv);
+ }
+
+ try {
+ context.write(rowKey, put);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+}
[10/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
deleted file mode 100644
index eebb0f3..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ /dev/null
@@ -1,2626 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase;
-
-import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.lang.reflect.Constructor;
-import java.math.BigDecimal;
-import java.math.MathContext;
-import java.text.DecimalFormat;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.LinkedList;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Random;
-import java.util.TreeMap;
-import java.util.NoSuchElementException;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Append;
-import org.apache.hadoop.hbase.client.AsyncConnection;
-import org.apache.hadoop.hbase.client.AsyncTable;
-import org.apache.hadoop.hbase.client.BufferedMutator;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Consistency;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Increment;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RawAsyncTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.RowMutations;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.filter.BinaryComparator;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
-import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterAllFilter;
-import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.PageFilter;
-import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
-import org.apache.hadoop.hbase.filter.WhileMatchFilter;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.RandomDistribution;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
-import org.apache.hadoop.hbase.trace.HBaseHTraceConfiguration;
-import org.apache.hadoop.hbase.trace.SpanReceiverHost;
-import org.apache.hadoop.hbase.util.*;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.apache.htrace.Sampler;
-import org.apache.htrace.Trace;
-import org.apache.htrace.TraceScope;
-import org.apache.htrace.impl.ProbabilitySampler;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects;
-import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.UniformReservoir;
-
-/**
- * Script used evaluating HBase performance and scalability. Runs a HBase
- * client that steps through one of a set of hardcoded tests or 'experiments'
- * (e.g. a random reads test, a random writes test, etc.). Pass on the
- * command-line which test to run and how many clients are participating in
- * this experiment. Run {@code PerformanceEvaluation --help} to obtain usage.
- *
- * <p>This class sets up and runs the evaluation programs described in
- * Section 7, <i>Performance Evaluation</i>, of the <a
- * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
- * paper, pages 8-10.
- *
- * <p>By default, runs as a mapreduce job where each mapper runs a single test
- * client. Can also run as a non-mapreduce, multithreaded application by
- * specifying {@code --nomapred}. Each client does about 1GB of data, unless
- * specified otherwise.
- */
-@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
-public class PerformanceEvaluation extends Configured implements Tool {
- static final String RANDOM_SEEK_SCAN = "randomSeekScan";
- static final String RANDOM_READ = "randomRead";
- private static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
- private static final ObjectMapper MAPPER = new ObjectMapper();
- static {
- MAPPER.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
- }
-
- public static final String TABLE_NAME = "TestTable";
- public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
- public static final byte [] COLUMN_ZERO = Bytes.toBytes("" + 0);
- public static final byte [] QUALIFIER_NAME = COLUMN_ZERO;
- public static final int DEFAULT_VALUE_LENGTH = 1000;
- public static final int ROW_LENGTH = 26;
-
- private static final int ONE_GB = 1024 * 1024 * 1000;
- private static final int DEFAULT_ROWS_PER_GB = ONE_GB / DEFAULT_VALUE_LENGTH;
- // TODO : should we make this configurable
- private static final int TAG_LENGTH = 256;
- private static final DecimalFormat FMT = new DecimalFormat("0.##");
- private static final MathContext CXT = MathContext.DECIMAL64;
- private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
- private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
- private static final TestOptions DEFAULT_OPTS = new TestOptions();
-
- private static Map<String, CmdDescriptor> COMMANDS = new TreeMap<>();
- private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
-
- static {
- addCommandDescriptor(AsyncRandomReadTest.class, "asyncRandomRead",
- "Run async random read test");
- addCommandDescriptor(AsyncRandomWriteTest.class, "asyncRandomWrite",
- "Run async random write test");
- addCommandDescriptor(AsyncSequentialReadTest.class, "asyncSequentialRead",
- "Run async sequential read test");
- addCommandDescriptor(AsyncSequentialWriteTest.class, "asyncSequentialWrite",
- "Run async sequential write test");
- addCommandDescriptor(AsyncScanTest.class, "asyncScan",
- "Run async scan test (read every row)");
- addCommandDescriptor(RandomReadTest.class, RANDOM_READ,
- "Run random read test");
- addCommandDescriptor(RandomSeekScanTest.class, RANDOM_SEEK_SCAN,
- "Run random seek and scan 100 test");
- addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
- "Run random seek scan with both start and stop row (max 10 rows)");
- addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
- "Run random seek scan with both start and stop row (max 100 rows)");
- addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
- "Run random seek scan with both start and stop row (max 1000 rows)");
- addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
- "Run random seek scan with both start and stop row (max 10000 rows)");
- addCommandDescriptor(RandomWriteTest.class, "randomWrite",
- "Run random write test");
- addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
- "Run sequential read test");
- addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
- "Run sequential write test");
- addCommandDescriptor(ScanTest.class, "scan",
- "Run scan test (read every row)");
- addCommandDescriptor(FilteredScanTest.class, "filterScan",
- "Run scan test using a filter to find a specific row based on it's value " +
- "(make sure to use --rows=20)");
- addCommandDescriptor(IncrementTest.class, "increment",
- "Increment on each row; clients overlap on keyspace so some concurrent operations");
- addCommandDescriptor(AppendTest.class, "append",
- "Append on each row; clients overlap on keyspace so some concurrent operations");
- addCommandDescriptor(CheckAndMutateTest.class, "checkAndMutate",
- "CheckAndMutate on each row; clients overlap on keyspace so some concurrent operations");
- addCommandDescriptor(CheckAndPutTest.class, "checkAndPut",
- "CheckAndPut on each row; clients overlap on keyspace so some concurrent operations");
- addCommandDescriptor(CheckAndDeleteTest.class, "checkAndDelete",
- "CheckAndDelete on each row; clients overlap on keyspace so some concurrent operations");
- }
-
- /**
- * Enum for map metrics. Keep it out here rather than inside in the Map
- * inner-class so we can find associated properties.
- */
- protected static enum Counter {
- /** elapsed time */
- ELAPSED_TIME,
- /** number of rows */
- ROWS
- }
-
- protected static class RunResult implements Comparable<RunResult> {
- public RunResult(long duration, Histogram hist) {
- this.duration = duration;
- this.hist = hist;
- }
-
- public final long duration;
- public final Histogram hist;
-
- @Override
- public String toString() {
- return Long.toString(duration);
- }
-
- @Override public int compareTo(RunResult o) {
- return Long.compare(this.duration, o.duration);
- }
- }
-
- /**
- * Constructor
- * @param conf Configuration object
- */
- public PerformanceEvaluation(final Configuration conf) {
- super(conf);
- }
-
- protected static void addCommandDescriptor(Class<? extends TestBase> cmdClass,
- String name, String description) {
- CmdDescriptor cmdDescriptor = new CmdDescriptor(cmdClass, name, description);
- COMMANDS.put(name, cmdDescriptor);
- }
-
- /**
- * Implementations can have their status set.
- */
- interface Status {
- /**
- * Sets status
- * @param msg status message
- * @throws IOException
- */
- void setStatus(final String msg) throws IOException;
- }
-
- /**
- * MapReduce job that runs a performance evaluation client in each map task.
- */
- public static class EvaluationMapTask
- extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
-
- /** configuration parameter name that contains the command */
- public final static String CMD_KEY = "EvaluationMapTask.command";
- /** configuration parameter name that contains the PE impl */
- public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
-
- private Class<? extends Test> cmd;
-
- @Override
- protected void setup(Context context) throws IOException, InterruptedException {
- this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
-
- // this is required so that extensions of PE are instantiated within the
- // map reduce task...
- Class<? extends PerformanceEvaluation> peClass =
- forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
- try {
- peClass.getConstructor(Configuration.class).newInstance(context.getConfiguration());
- } catch (Exception e) {
- throw new IllegalStateException("Could not instantiate PE instance", e);
- }
- }
-
- private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
- try {
- return Class.forName(className).asSubclass(type);
- } catch (ClassNotFoundException e) {
- throw new IllegalStateException("Could not find class for name: " + className, e);
- }
- }
-
- @Override
- protected void map(LongWritable key, Text value, final Context context)
- throws IOException, InterruptedException {
-
- Status status = new Status() {
- @Override
- public void setStatus(String msg) {
- context.setStatus(msg);
- }
- };
-
- ObjectMapper mapper = new ObjectMapper();
- TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
- Configuration conf = HBaseConfiguration.create(context.getConfiguration());
- final Connection con = ConnectionFactory.createConnection(conf);
- AsyncConnection asyncCon = null;
- try {
- asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
- } catch (ExecutionException e) {
- throw new IOException(e);
- }
-
- // Evaluation task
- RunResult result = PerformanceEvaluation.runOneClient(this.cmd, conf, con, asyncCon, opts, status);
- // Collect how much time the thing took. Report as map output and
- // to the ELAPSED_TIME counter.
- context.getCounter(Counter.ELAPSED_TIME).increment(result.duration);
- context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
- context.write(new LongWritable(opts.startRow), new LongWritable(result.duration));
- context.progress();
- }
- }
-
- /*
- * If table does not already exist, create. Also create a table when
- * {@code opts.presplitRegions} is specified or when the existing table's
- * region replica count doesn't match {@code opts.replicas}.
- */
- static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
- TableName tableName = TableName.valueOf(opts.tableName);
- boolean needsDelete = false, exists = admin.tableExists(tableName);
- boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
- || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
- if (!exists && isReadCmd) {
- throw new IllegalStateException(
- "Must specify an existing table for read commands. Run a write command first.");
- }
- HTableDescriptor desc =
- exists ? admin.getTableDescriptor(TableName.valueOf(opts.tableName)) : null;
- byte[][] splits = getSplits(opts);
-
- // recreate the table when user has requested presplit or when existing
- // {RegionSplitPolicy,replica count} does not match requested.
- if ((exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions)
- || (!isReadCmd && desc != null &&
- !StringUtils.equals(desc.getRegionSplitPolicyClassName(), opts.splitPolicy))
- || (!isReadCmd && desc != null && desc.getRegionReplication() != opts.replicas)) {
- needsDelete = true;
- // wait, why did it delete my table?!?
- LOG.debug(MoreObjects.toStringHelper("needsDelete")
- .add("needsDelete", needsDelete)
- .add("isReadCmd", isReadCmd)
- .add("exists", exists)
- .add("desc", desc)
- .add("presplit", opts.presplitRegions)
- .add("splitPolicy", opts.splitPolicy)
- .add("replicas", opts.replicas));
- }
-
- // remove an existing table
- if (needsDelete) {
- if (admin.isTableEnabled(tableName)) {
- admin.disableTable(tableName);
- }
- admin.deleteTable(tableName);
- }
-
- // table creation is necessary
- if (!exists || needsDelete) {
- desc = getTableDescriptor(opts);
- if (splits != null) {
- if (LOG.isDebugEnabled()) {
- for (int i = 0; i < splits.length; i++) {
- LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
- }
- }
- }
- admin.createTable(desc, splits);
- LOG.info("Table " + desc + " created");
- }
- return admin.tableExists(tableName);
- }
-
- /**
- * Create an HTableDescriptor from provided TestOptions.
- */
- protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
- HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(opts.tableName));
- HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
- family.setDataBlockEncoding(opts.blockEncoding);
- family.setCompressionType(opts.compression);
- family.setBloomFilterType(opts.bloomType);
- family.setBlocksize(opts.blockSize);
- if (opts.inMemoryCF) {
- family.setInMemory(true);
- }
- family.setInMemoryCompaction(opts.inMemoryCompaction);
- desc.addFamily(family);
- if (opts.replicas != DEFAULT_OPTS.replicas) {
- desc.setRegionReplication(opts.replicas);
- }
- if (opts.splitPolicy != DEFAULT_OPTS.splitPolicy) {
- desc.setRegionSplitPolicyClassName(opts.splitPolicy);
- }
- return desc;
- }
-
- /**
- * generates splits based on total number of rows and specified split regions
- */
- protected static byte[][] getSplits(TestOptions opts) {
- if (opts.presplitRegions == DEFAULT_OPTS.presplitRegions)
- return null;
-
- int numSplitPoints = opts.presplitRegions - 1;
- byte[][] splits = new byte[numSplitPoints][];
- int jump = opts.totalRows / opts.presplitRegions;
- for (int i = 0; i < numSplitPoints; i++) {
- int rowkey = jump * (1 + i);
- splits[i] = format(rowkey);
- }
- return splits;
- }
-
- /*
- * Run all clients in this vm each to its own thread.
- */
- static RunResult[] doLocalClients(final TestOptions opts, final Configuration conf)
- throws IOException, InterruptedException, ExecutionException {
- final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
- assert cmd != null;
- @SuppressWarnings("unchecked")
- Future<RunResult>[] threads = new Future[opts.numClientThreads];
- RunResult[] results = new RunResult[opts.numClientThreads];
- ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
- new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
- final Connection con = ConnectionFactory.createConnection(conf);
- final AsyncConnection asyncCon = ConnectionFactory.createAsyncConnection(conf).get();
- for (int i = 0; i < threads.length; i++) {
- final int index = i;
- threads[i] = pool.submit(new Callable<RunResult>() {
- @Override
- public RunResult call() throws Exception {
- TestOptions threadOpts = new TestOptions(opts);
- if (threadOpts.startRow == 0) threadOpts.startRow = index * threadOpts.perClientRunRows;
- RunResult run = runOneClient(cmd, conf, con, asyncCon, threadOpts, new Status() {
- @Override
- public void setStatus(final String msg) throws IOException {
- LOG.info(msg);
- }
- });
- LOG.info("Finished " + Thread.currentThread().getName() + " in " + run.duration +
- "ms over " + threadOpts.perClientRunRows + " rows");
- return run;
- }
- });
- }
- pool.shutdown();
-
- for (int i = 0; i < threads.length; i++) {
- try {
- results[i] = threads[i].get();
- } catch (ExecutionException e) {
- throw new IOException(e.getCause());
- }
- }
- final String test = cmd.getSimpleName();
- LOG.info("[" + test + "] Summary of timings (ms): "
- + Arrays.toString(results));
- Arrays.sort(results);
- long total = 0;
- for (RunResult result : results) {
- total += result.duration;
- }
- LOG.info("[" + test + "]"
- + "\tMin: " + results[0] + "ms"
- + "\tMax: " + results[results.length - 1] + "ms"
- + "\tAvg: " + (total / results.length) + "ms");
-
- con.close();
- asyncCon.close();
-
- return results;
- }
-
- /*
- * Run a mapreduce job. Run as many maps as asked-for clients.
- * Before we start up the job, write out an input file with instruction
- * per client regards which row they are to start on.
- * @param cmd Command to run.
- * @throws IOException
- */
- static Job doMapReduce(TestOptions opts, final Configuration conf)
- throws IOException, InterruptedException, ClassNotFoundException {
- final Class<? extends TestBase> cmd = determineCommandClass(opts.cmdName);
- assert cmd != null;
- Path inputDir = writeInputFile(conf, opts);
- conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
- conf.set(EvaluationMapTask.PE_KEY, PerformanceEvaluation.class.getName());
- Job job = Job.getInstance(conf);
- job.setJarByClass(PerformanceEvaluation.class);
- job.setJobName("HBase Performance Evaluation - " + opts.cmdName);
-
- job.setInputFormatClass(NLineInputFormat.class);
- NLineInputFormat.setInputPaths(job, inputDir);
- // this is default, but be explicit about it just in case.
- NLineInputFormat.setNumLinesPerSplit(job, 1);
-
- job.setOutputKeyClass(LongWritable.class);
- job.setOutputValueClass(LongWritable.class);
-
- job.setMapperClass(EvaluationMapTask.class);
- job.setReducerClass(LongSumReducer.class);
-
- job.setNumReduceTasks(1);
-
- job.setOutputFormatClass(TextOutputFormat.class);
- TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
-
- TableMapReduceUtil.addDependencyJars(job);
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- Histogram.class, // yammer metrics
- ObjectMapper.class); // jackson-mapper-asl
-
- TableMapReduceUtil.initCredentials(job);
-
- job.waitForCompletion(true);
- return job;
- }
-
- /**
- * Each client has one mapper to do the work, and client do the resulting count in a map task.
- */
-
- static String JOB_INPUT_FILENAME = "input.txt";
-
- /*
- * Write input file of offsets-per-client for the mapreduce job.
- * @param c Configuration
- * @return Directory that contains file written whose name is JOB_INPUT_FILENAME
- * @throws IOException
- */
- static Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
- return writeInputFile(c, opts, new Path("."));
- }
-
- static Path writeInputFile(final Configuration c, final TestOptions opts, final Path basedir)
- throws IOException {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
- Path jobdir = new Path(new Path(basedir, PERF_EVAL_DIR), formatter.format(new Date()));
- Path inputDir = new Path(jobdir, "inputs");
-
- FileSystem fs = FileSystem.get(c);
- fs.mkdirs(inputDir);
-
- Path inputFile = new Path(inputDir, JOB_INPUT_FILENAME);
- PrintStream out = new PrintStream(fs.create(inputFile));
- // Make input random.
- Map<Integer, String> m = new TreeMap<>();
- Hash h = MurmurHash.getInstance();
- int perClientRows = (opts.totalRows / opts.numClientThreads);
- try {
- for (int j = 0; j < opts.numClientThreads; j++) {
- TestOptions next = new TestOptions(opts);
- next.startRow = j * perClientRows;
- next.perClientRunRows = perClientRows;
- String s = MAPPER.writeValueAsString(next);
- LOG.info("Client=" + j + ", input=" + s);
- byte[] b = Bytes.toBytes(s);
- int hash = h.hash(new ByteArrayHashKey(b, 0, b.length), -1);
- m.put(hash, s);
- }
- for (Map.Entry<Integer, String> e: m.entrySet()) {
- out.println(e.getValue());
- }
- } finally {
- out.close();
- }
- return inputDir;
- }
-
- /**
- * Describes a command.
- */
- static class CmdDescriptor {
- private Class<? extends TestBase> cmdClass;
- private String name;
- private String description;
-
- CmdDescriptor(Class<? extends TestBase> cmdClass, String name, String description) {
- this.cmdClass = cmdClass;
- this.name = name;
- this.description = description;
- }
-
- public Class<? extends TestBase> getCmdClass() {
- return cmdClass;
- }
-
- public String getName() {
- return name;
- }
-
- public String getDescription() {
- return description;
- }
- }
-
- /**
- * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
- * This makes tracking all these arguments a little easier.
- * NOTE: ADDING AN OPTION, you need to add a data member, a getter/setter (to make JSON
- * serialization of this TestOptions class behave), and you need to add to the clone constructor
- * below copying your new option from the 'that' to the 'this'. Look for 'clone' below.
- */
- static class TestOptions {
- String cmdName = null;
- boolean nomapred = false;
- boolean filterAll = false;
- int startRow = 0;
- float size = 1.0f;
- int perClientRunRows = DEFAULT_ROWS_PER_GB;
- int numClientThreads = 1;
- int totalRows = DEFAULT_ROWS_PER_GB;
- int measureAfter = 0;
- float sampleRate = 1.0f;
- double traceRate = 0.0;
- String tableName = TABLE_NAME;
- boolean flushCommits = true;
- boolean writeToWAL = true;
- boolean autoFlush = false;
- boolean oneCon = false;
- boolean useTags = false;
- int noOfTags = 1;
- boolean reportLatency = false;
- int multiGet = 0;
- int randomSleep = 0;
- boolean inMemoryCF = false;
- int presplitRegions = 0;
- int replicas = HTableDescriptor.DEFAULT_REGION_REPLICATION;
- String splitPolicy = null;
- Compression.Algorithm compression = Compression.Algorithm.NONE;
- BloomType bloomType = BloomType.ROW;
- int blockSize = HConstants.DEFAULT_BLOCKSIZE;
- DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
- boolean valueRandom = false;
- boolean valueZipf = false;
- int valueSize = DEFAULT_VALUE_LENGTH;
- int period = (this.perClientRunRows / 10) == 0? perClientRunRows: perClientRunRows / 10;
- int cycles = 1;
- int columns = 1;
- int caching = 30;
- boolean addColumns = true;
- MemoryCompactionPolicy inMemoryCompaction =
- MemoryCompactionPolicy.valueOf(
- CompactingMemStore.COMPACTING_MEMSTORE_TYPE_DEFAULT);
- boolean asyncPrefetch = false;
- boolean cacheBlocks = true;
- Scan.ReadType scanReadType = Scan.ReadType.DEFAULT;
-
- public TestOptions() {}
-
- /**
- * Clone constructor.
- * @param that Object to copy from.
- */
- public TestOptions(TestOptions that) {
- this.cmdName = that.cmdName;
- this.cycles = that.cycles;
- this.nomapred = that.nomapred;
- this.startRow = that.startRow;
- this.size = that.size;
- this.perClientRunRows = that.perClientRunRows;
- this.numClientThreads = that.numClientThreads;
- this.totalRows = that.totalRows;
- this.sampleRate = that.sampleRate;
- this.traceRate = that.traceRate;
- this.tableName = that.tableName;
- this.flushCommits = that.flushCommits;
- this.writeToWAL = that.writeToWAL;
- this.autoFlush = that.autoFlush;
- this.oneCon = that.oneCon;
- this.useTags = that.useTags;
- this.noOfTags = that.noOfTags;
- this.reportLatency = that.reportLatency;
- this.multiGet = that.multiGet;
- this.inMemoryCF = that.inMemoryCF;
- this.presplitRegions = that.presplitRegions;
- this.replicas = that.replicas;
- this.splitPolicy = that.splitPolicy;
- this.compression = that.compression;
- this.blockEncoding = that.blockEncoding;
- this.filterAll = that.filterAll;
- this.bloomType = that.bloomType;
- this.blockSize = that.blockSize;
- this.valueRandom = that.valueRandom;
- this.valueZipf = that.valueZipf;
- this.valueSize = that.valueSize;
- this.period = that.period;
- this.randomSleep = that.randomSleep;
- this.measureAfter = that.measureAfter;
- this.addColumns = that.addColumns;
- this.columns = that.columns;
- this.caching = that.caching;
- this.inMemoryCompaction = that.inMemoryCompaction;
- this.asyncPrefetch = that.asyncPrefetch;
- this.cacheBlocks = that.cacheBlocks;
- this.scanReadType = that.scanReadType;
- }
-
- public int getCaching() {
- return this.caching;
- }
-
- public void setCaching(final int caching) {
- this.caching = caching;
- }
-
- public int getColumns() {
- return this.columns;
- }
-
- public void setColumns(final int columns) {
- this.columns = columns;
- }
-
- public int getCycles() {
- return this.cycles;
- }
-
- public void setCycles(final int cycles) {
- this.cycles = cycles;
- }
-
- public boolean isValueZipf() {
- return valueZipf;
- }
-
- public void setValueZipf(boolean valueZipf) {
- this.valueZipf = valueZipf;
- }
-
- public String getCmdName() {
- return cmdName;
- }
-
- public void setCmdName(String cmdName) {
- this.cmdName = cmdName;
- }
-
- public int getRandomSleep() {
- return randomSleep;
- }
-
- public void setRandomSleep(int randomSleep) {
- this.randomSleep = randomSleep;
- }
-
- public int getReplicas() {
- return replicas;
- }
-
- public void setReplicas(int replicas) {
- this.replicas = replicas;
- }
-
- public String getSplitPolicy() {
- return splitPolicy;
- }
-
- public void setSplitPolicy(String splitPolicy) {
- this.splitPolicy = splitPolicy;
- }
-
- public void setNomapred(boolean nomapred) {
- this.nomapred = nomapred;
- }
-
- public void setFilterAll(boolean filterAll) {
- this.filterAll = filterAll;
- }
-
- public void setStartRow(int startRow) {
- this.startRow = startRow;
- }
-
- public void setSize(float size) {
- this.size = size;
- }
-
- public void setPerClientRunRows(int perClientRunRows) {
- this.perClientRunRows = perClientRunRows;
- }
-
- public void setNumClientThreads(int numClientThreads) {
- this.numClientThreads = numClientThreads;
- }
-
- public void setTotalRows(int totalRows) {
- this.totalRows = totalRows;
- }
-
- public void setSampleRate(float sampleRate) {
- this.sampleRate = sampleRate;
- }
-
- public void setTraceRate(double traceRate) {
- this.traceRate = traceRate;
- }
-
- public void setTableName(String tableName) {
- this.tableName = tableName;
- }
-
- public void setFlushCommits(boolean flushCommits) {
- this.flushCommits = flushCommits;
- }
-
- public void setWriteToWAL(boolean writeToWAL) {
- this.writeToWAL = writeToWAL;
- }
-
- public void setAutoFlush(boolean autoFlush) {
- this.autoFlush = autoFlush;
- }
-
- public void setOneCon(boolean oneCon) {
- this.oneCon = oneCon;
- }
-
- public void setUseTags(boolean useTags) {
- this.useTags = useTags;
- }
-
- public void setNoOfTags(int noOfTags) {
- this.noOfTags = noOfTags;
- }
-
- public void setReportLatency(boolean reportLatency) {
- this.reportLatency = reportLatency;
- }
-
- public void setMultiGet(int multiGet) {
- this.multiGet = multiGet;
- }
-
- public void setInMemoryCF(boolean inMemoryCF) {
- this.inMemoryCF = inMemoryCF;
- }
-
- public void setPresplitRegions(int presplitRegions) {
- this.presplitRegions = presplitRegions;
- }
-
- public void setCompression(Compression.Algorithm compression) {
- this.compression = compression;
- }
-
- public void setBloomType(BloomType bloomType) {
- this.bloomType = bloomType;
- }
-
- public void setBlockSize(int blockSize) {
- this.blockSize = blockSize;
- }
-
- public void setBlockEncoding(DataBlockEncoding blockEncoding) {
- this.blockEncoding = blockEncoding;
- }
-
- public void setValueRandom(boolean valueRandom) {
- this.valueRandom = valueRandom;
- }
-
- public void setValueSize(int valueSize) {
- this.valueSize = valueSize;
- }
-
- public void setPeriod(int period) {
- this.period = period;
- }
-
- public boolean isNomapred() {
- return nomapred;
- }
-
- public boolean isFilterAll() {
- return filterAll;
- }
-
- public int getStartRow() {
- return startRow;
- }
-
- public float getSize() {
- return size;
- }
-
- public int getPerClientRunRows() {
- return perClientRunRows;
- }
-
- public int getNumClientThreads() {
- return numClientThreads;
- }
-
- public int getTotalRows() {
- return totalRows;
- }
-
- public float getSampleRate() {
- return sampleRate;
- }
-
- public double getTraceRate() {
- return traceRate;
- }
-
- public String getTableName() {
- return tableName;
- }
-
- public boolean isFlushCommits() {
- return flushCommits;
- }
-
- public boolean isWriteToWAL() {
- return writeToWAL;
- }
-
- public boolean isAutoFlush() {
- return autoFlush;
- }
-
- public boolean isUseTags() {
- return useTags;
- }
-
- public int getNoOfTags() {
- return noOfTags;
- }
-
- public boolean isReportLatency() {
- return reportLatency;
- }
-
- public int getMultiGet() {
- return multiGet;
- }
-
- public boolean isInMemoryCF() {
- return inMemoryCF;
- }
-
- public int getPresplitRegions() {
- return presplitRegions;
- }
-
- public Compression.Algorithm getCompression() {
- return compression;
- }
-
- public DataBlockEncoding getBlockEncoding() {
- return blockEncoding;
- }
-
- public boolean isValueRandom() {
- return valueRandom;
- }
-
- public int getValueSize() {
- return valueSize;
- }
-
- public int getPeriod() {
- return period;
- }
-
- public BloomType getBloomType() {
- return bloomType;
- }
-
- public int getBlockSize() {
- return blockSize;
- }
-
- public boolean isOneCon() {
- return oneCon;
- }
-
- public int getMeasureAfter() {
- return measureAfter;
- }
-
- public void setMeasureAfter(int measureAfter) {
- this.measureAfter = measureAfter;
- }
-
- public boolean getAddColumns() {
- return addColumns;
- }
-
- public void setAddColumns(boolean addColumns) {
- this.addColumns = addColumns;
- }
-
- public void setInMemoryCompaction(MemoryCompactionPolicy inMemoryCompaction) {
- this.inMemoryCompaction = inMemoryCompaction;
- }
-
- public MemoryCompactionPolicy getInMemoryCompaction() {
- return this.inMemoryCompaction;
- }
- }
-
- /*
- * A test.
- * Subclass to particularize what happens per row.
- */
- static abstract class TestBase {
- // Below is make it so when Tests are all running in the one
- // jvm, that they each have a differently seeded Random.
- private static final Random randomSeed = new Random(System.currentTimeMillis());
-
- private static long nextRandomSeed() {
- return randomSeed.nextLong();
- }
- private final int everyN;
-
- protected final Random rand = new Random(nextRandomSeed());
- protected final Configuration conf;
- protected final TestOptions opts;
-
- private final Status status;
- private final Sampler<?> traceSampler;
- private final SpanReceiverHost receiverHost;
-
- private String testName;
- private Histogram latencyHistogram;
- private Histogram valueSizeHistogram;
- private RandomDistribution.Zipf zipf;
-
- /**
- * Note that all subclasses of this class must provide a public constructor
- * that has the exact same list of arguments.
- */
- TestBase(final Configuration conf, final TestOptions options, final Status status) {
- this.conf = conf;
- this.receiverHost = this.conf == null? null: SpanReceiverHost.getInstance(conf);
- this.opts = options;
- this.status = status;
- this.testName = this.getClass().getSimpleName();
- if (options.traceRate >= 1.0) {
- this.traceSampler = Sampler.ALWAYS;
- } else if (options.traceRate > 0.0) {
- conf.setDouble("hbase.sampler.fraction", options.traceRate);
- this.traceSampler = new ProbabilitySampler(new HBaseHTraceConfiguration(conf));
- } else {
- this.traceSampler = Sampler.NEVER;
- }
- everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
- if (options.isValueZipf()) {
- this.zipf = new RandomDistribution.Zipf(this.rand, 1, options.getValueSize(), 1.2);
- }
- LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
- }
-
- int getValueLength(final Random r) {
- if (this.opts.isValueRandom()) return Math.abs(r.nextInt() % opts.valueSize);
- else if (this.opts.isValueZipf()) return Math.abs(this.zipf.nextInt());
- else return opts.valueSize;
- }
-
- void updateValueSize(final Result [] rs) throws IOException {
- if (rs == null || !isRandomValueSize()) return;
- for (Result r: rs) updateValueSize(r);
- }
-
- void updateValueSize(final Result r) throws IOException {
- if (r == null || !isRandomValueSize()) return;
- int size = 0;
- for (CellScanner scanner = r.cellScanner(); scanner.advance();) {
- size += scanner.current().getValueLength();
- }
- updateValueSize(size);
- }
-
- void updateValueSize(final int valueSize) {
- if (!isRandomValueSize()) return;
- this.valueSizeHistogram.update(valueSize);
- }
-
- String generateStatus(final int sr, final int i, final int lr) {
- return sr + "/" + i + "/" + lr + ", latency " + getShortLatencyReport() +
- (!isRandomValueSize()? "": ", value size " + getShortValueSizeReport());
- }
-
- boolean isRandomValueSize() {
- return opts.valueRandom;
- }
-
- protected int getReportingPeriod() {
- return opts.period;
- }
-
- /**
- * Populated by testTakedown. Only implemented by RandomReadTest at the moment.
- */
- public Histogram getLatencyHistogram() {
- return latencyHistogram;
- }
-
- void testSetup() throws IOException {
- createConnection();
- onStartup();
- latencyHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
- valueSizeHistogram = YammerHistogramUtils.newHistogram(new UniformReservoir(1024 * 500));
- }
-
- abstract void createConnection() throws IOException;
-
- abstract void onStartup() throws IOException;
-
- void testTakedown() throws IOException {
- onTakedown();
- // Print all stats for this thread continuously.
- // Synchronize on Test.class so different threads don't intermingle the
- // output. We can't use 'this' here because each thread has its own instance of Test class.
- synchronized (Test.class) {
- status.setStatus("Test : " + testName + ", Thread : " + Thread.currentThread().getName());
- status.setStatus("Latency (us) : " + YammerHistogramUtils.getHistogramReport(
- latencyHistogram));
- status.setStatus("Num measures (latency) : " + latencyHistogram.getCount());
- status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(latencyHistogram));
- status.setStatus("ValueSize (bytes) : "
- + YammerHistogramUtils.getHistogramReport(valueSizeHistogram));
- status.setStatus("Num measures (ValueSize): " + valueSizeHistogram.getCount());
- status.setStatus(YammerHistogramUtils.getPrettyHistogramReport(valueSizeHistogram));
- }
- closeConnection();
- receiverHost.closeReceivers();
- }
-
- abstract void onTakedown() throws IOException;
-
- abstract void closeConnection() throws IOException;
-
- /*
- * Run test
- * @return Elapsed time.
- * @throws IOException
- */
- long test() throws IOException, InterruptedException {
- testSetup();
- LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
- final long startTime = System.nanoTime();
- try {
- testTimed();
- } finally {
- testTakedown();
- }
- return (System.nanoTime() - startTime) / 1000000;
- }
-
- int getStartRow() {
- return opts.startRow;
- }
-
- int getLastRow() {
- return getStartRow() + opts.perClientRunRows;
- }
-
- /**
- * Provides an extension point for tests that don't want a per row invocation.
- */
- void testTimed() throws IOException, InterruptedException {
- int startRow = getStartRow();
- int lastRow = getLastRow();
- // Report on completion of 1/10th of total.
- for (int ii = 0; ii < opts.cycles; ii++) {
- if (opts.cycles > 1) LOG.info("Cycle=" + ii + " of " + opts.cycles);
- for (int i = startRow; i < lastRow; i++) {
- if (i % everyN != 0) continue;
- long startTime = System.nanoTime();
- TraceScope scope = Trace.startSpan("test row", traceSampler);
- try {
- testRow(i);
- } finally {
- scope.close();
- }
- if ( (i - startRow) > opts.measureAfter) {
- // If multiget is enabled, say set to 10, testRow() returns immediately first 9 times
- // and sends the actual get request in the 10th iteration. We should only set latency
- // when actual request is sent because otherwise it turns out to be 0.
- if (opts.multiGet == 0 || (i - startRow + 1) % opts.multiGet == 0) {
- latencyHistogram.update((System.nanoTime() - startTime) / 1000);
- }
- if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
- status.setStatus(generateStatus(startRow, i, lastRow));
- }
- }
- }
- }
- }
-
- /**
- * @return Subset of the histograms' calculation.
- */
- public String getShortLatencyReport() {
- return YammerHistogramUtils.getShortHistogramReport(this.latencyHistogram);
- }
-
- /**
- * @return Subset of the histograms' calculation.
- */
- public String getShortValueSizeReport() {
- return YammerHistogramUtils.getShortHistogramReport(this.valueSizeHistogram);
- }
-
- /*
- * Test for individual row.
- * @param i Row index.
- */
- abstract void testRow(final int i) throws IOException, InterruptedException;
- }
-
- static abstract class Test extends TestBase {
- protected Connection connection;
-
- Test(final Connection con, final TestOptions options, final Status status) {
- super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
- this.connection = con;
- }
-
- @Override
- void createConnection() throws IOException {
- if (!opts.isOneCon()) {
- this.connection = ConnectionFactory.createConnection(conf);
- }
- }
-
- @Override
- void closeConnection() throws IOException {
- if (!opts.isOneCon()) {
- this.connection.close();
- }
- }
- }
-
- static abstract class AsyncTest extends TestBase {
- protected AsyncConnection connection;
-
- AsyncTest(final AsyncConnection con, final TestOptions options, final Status status) {
- super(con == null ? HBaseConfiguration.create() : con.getConfiguration(), options, status);
- this.connection = con;
- }
-
- @Override
- void createConnection() {
- if (!opts.isOneCon()) {
- try {
- this.connection = ConnectionFactory.createAsyncConnection(conf).get();
- } catch (InterruptedException | ExecutionException e) {
- LOG.error("Failed to create async connection", e);
- }
- }
- }
-
- @Override
- void closeConnection() throws IOException {
- if (!opts.isOneCon()) {
- this.connection.close();
- }
- }
- }
-
- static abstract class TableTest extends Test {
- protected Table table;
-
- TableTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void onStartup() throws IOException {
- this.table = connection.getTable(TableName.valueOf(opts.tableName));
- }
-
- @Override
- void onTakedown() throws IOException {
- table.close();
- }
- }
-
- static abstract class AsyncTableTest extends AsyncTest {
- protected RawAsyncTable table;
-
- AsyncTableTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void onStartup() throws IOException {
- this.table = connection.getRawTable(TableName.valueOf(opts.tableName));
- }
-
- @Override
- void onTakedown() throws IOException {
- }
- }
-
- static class AsyncRandomReadTest extends AsyncTableTest {
- private final Consistency consistency;
- private ArrayList<Get> gets;
- private Random rd = new Random();
-
- AsyncRandomReadTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
- if (opts.multiGet > 0) {
- LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
- this.gets = new ArrayList<>(opts.multiGet);
- }
- }
-
- @Override
- void testRow(final int i) throws IOException, InterruptedException {
- if (opts.randomSleep > 0) {
- Thread.sleep(rd.nextInt(opts.randomSleep));
- }
- Get get = new Get(getRandomRow(this.rand, opts.totalRows));
- if (opts.addColumns) {
- get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- get.addFamily(FAMILY_NAME);
- }
- if (opts.filterAll) {
- get.setFilter(new FilterAllFilter());
- }
- get.setConsistency(consistency);
- if (LOG.isTraceEnabled()) LOG.trace(get.toString());
- try {
- if (opts.multiGet > 0) {
- this.gets.add(get);
- if (this.gets.size() == opts.multiGet) {
- Result[] rs =
- this.table.get(this.gets).stream().map(f -> propagate(f::get)).toArray(Result[]::new);
- updateValueSize(rs);
- this.gets.clear();
- }
- } else {
- updateValueSize(this.table.get(get).get());
- }
- } catch (ExecutionException e) {
- throw new IOException(e);
- }
- }
-
- public static RuntimeException runtime(Throwable e) {
- if (e instanceof RuntimeException) {
- return (RuntimeException) e;
- }
- return new RuntimeException(e);
- }
-
- public static <V> V propagate(Callable<V> callable) {
- try {
- return callable.call();
- } catch (Exception e) {
- throw runtime(e);
- }
- }
-
- @Override
- protected int getReportingPeriod() {
- int period = opts.perClientRunRows / 10;
- return period == 0 ? opts.perClientRunRows : period;
- }
-
- @Override
- protected void testTakedown() throws IOException {
- if (this.gets != null && this.gets.size() > 0) {
- this.table.get(gets);
- this.gets.clear();
- }
- super.testTakedown();
- }
- }
-
- static class AsyncRandomWriteTest extends AsyncTableTest {
- AsyncRandomWriteTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException, InterruptedException {
- byte[] row = getRandomRow(this.rand, opts.totalRows);
- Put put = new Put(row);
- for (int column = 0; column < opts.columns; column++) {
- byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
- byte[] value = generateData(this.rand, getValueLength(this.rand));
- if (opts.useTags) {
- byte[] tag = generateData(this.rand, TAG_LENGTH);
- Tag[] tags = new Tag[opts.noOfTags];
- for (int n = 0; n < opts.noOfTags; n++) {
- Tag t = new ArrayBackedTag((byte) n, tag);
- tags[n] = t;
- }
- KeyValue kv =
- new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP, value, tags);
- put.add(kv);
- updateValueSize(kv.getValueLength());
- } else {
- put.addColumn(FAMILY_NAME, qualifier, value);
- updateValueSize(value.length);
- }
- }
- put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
- try {
- table.put(put).get();
- } catch (ExecutionException e) {
- throw new IOException(e);
- }
- }
- }
-
- static class AsyncScanTest extends AsyncTableTest {
- private ResultScanner testScanner;
- private AsyncTable asyncTable;
-
- AsyncScanTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void onStartup() throws IOException {
- this.asyncTable =
- connection.getTable(TableName.valueOf(opts.tableName),
- Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
- }
-
- @Override
- void testTakedown() throws IOException {
- if (this.testScanner != null) {
- this.testScanner.close();
- }
- super.testTakedown();
- }
-
- @Override
- void testRow(final int i) throws IOException {
- if (this.testScanner == null) {
- Scan scan =
- new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
- .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
- .setReadType(opts.scanReadType);
- if (opts.addColumns) {
- scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- scan.addFamily(FAMILY_NAME);
- }
- if (opts.filterAll) {
- scan.setFilter(new FilterAllFilter());
- }
- this.testScanner = asyncTable.getScanner(scan);
- }
- Result r = testScanner.next();
- updateValueSize(r);
- }
- }
-
- static class AsyncSequentialReadTest extends AsyncTableTest {
- AsyncSequentialReadTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException, InterruptedException {
- Get get = new Get(format(i));
- if (opts.addColumns) {
- get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- }
- if (opts.filterAll) {
- get.setFilter(new FilterAllFilter());
- }
- try {
- updateValueSize(table.get(get).get());
- } catch (ExecutionException e) {
- throw new IOException(e);
- }
- }
- }
-
- static class AsyncSequentialWriteTest extends AsyncTableTest {
- AsyncSequentialWriteTest(AsyncConnection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException, InterruptedException {
- byte[] row = format(i);
- Put put = new Put(row);
- for (int column = 0; column < opts.columns; column++) {
- byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
- byte[] value = generateData(this.rand, getValueLength(this.rand));
- if (opts.useTags) {
- byte[] tag = generateData(this.rand, TAG_LENGTH);
- Tag[] tags = new Tag[opts.noOfTags];
- for (int n = 0; n < opts.noOfTags; n++) {
- Tag t = new ArrayBackedTag((byte) n, tag);
- tags[n] = t;
- }
- KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
- value, tags);
- put.add(kv);
- updateValueSize(kv.getValueLength());
- } else {
- put.addColumn(FAMILY_NAME, qualifier, value);
- updateValueSize(value.length);
- }
- }
- put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
- try {
- table.put(put).get();
- } catch (ExecutionException e) {
- throw new IOException(e);
- }
- }
- }
-
- static abstract class BufferedMutatorTest extends Test {
- protected BufferedMutator mutator;
- protected Table table;
-
- BufferedMutatorTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void onStartup() throws IOException {
- this.mutator = connection.getBufferedMutator(TableName.valueOf(opts.tableName));
- this.table = connection.getTable(TableName.valueOf(opts.tableName));
- }
-
- @Override
- void onTakedown() throws IOException {
- mutator.close();
- table.close();
- }
- }
-
- static class RandomSeekScanTest extends TableTest {
- RandomSeekScanTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- Scan scan = new Scan().withStartRow(getRandomRow(this.rand, opts.totalRows))
- .setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
- .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
- FilterList list = new FilterList();
- if (opts.addColumns) {
- scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- scan.addFamily(FAMILY_NAME);
- }
- if (opts.filterAll) {
- list.addFilter(new FilterAllFilter());
- }
- list.addFilter(new WhileMatchFilter(new PageFilter(120)));
- scan.setFilter(list);
- ResultScanner s = this.table.getScanner(scan);
- for (Result rr; (rr = s.next()) != null;) {
- updateValueSize(rr);
- }
- s.close();
- }
-
- @Override
- protected int getReportingPeriod() {
- int period = opts.perClientRunRows / 100;
- return period == 0 ? opts.perClientRunRows : period;
- }
-
- }
-
- static abstract class RandomScanWithRangeTest extends TableTest {
- RandomScanWithRangeTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
- Scan scan = new Scan().withStartRow(startAndStopRow.getFirst())
- .withStopRow(startAndStopRow.getSecond()).setCaching(opts.caching)
- .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
- .setReadType(opts.scanReadType);
- if (opts.filterAll) {
- scan.setFilter(new FilterAllFilter());
- }
- if (opts.addColumns) {
- scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- scan.addFamily(FAMILY_NAME);
- }
- Result r = null;
- int count = 0;
- ResultScanner s = this.table.getScanner(scan);
- for (; (r = s.next()) != null;) {
- updateValueSize(r);
- count++;
- }
- if (i % 100 == 0) {
- LOG.info(String.format("Scan for key range %s - %s returned %s rows",
- Bytes.toString(startAndStopRow.getFirst()),
- Bytes.toString(startAndStopRow.getSecond()), count));
- }
-
- s.close();
- }
-
- protected abstract Pair<byte[],byte[]> getStartAndStopRow();
-
- protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
- int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
- int stop = start + maxRange;
- return new Pair<>(format(start), format(stop));
- }
-
- @Override
- protected int getReportingPeriod() {
- int period = opts.perClientRunRows / 100;
- return period == 0? opts.perClientRunRows: period;
- }
- }
-
- static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
- RandomScanWithRange10Test(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- protected Pair<byte[], byte[]> getStartAndStopRow() {
- return generateStartAndStopRows(10);
- }
- }
-
- static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
- RandomScanWithRange100Test(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- protected Pair<byte[], byte[]> getStartAndStopRow() {
- return generateStartAndStopRows(100);
- }
- }
-
- static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
- RandomScanWithRange1000Test(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- protected Pair<byte[], byte[]> getStartAndStopRow() {
- return generateStartAndStopRows(1000);
- }
- }
-
- static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
- RandomScanWithRange10000Test(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- protected Pair<byte[], byte[]> getStartAndStopRow() {
- return generateStartAndStopRows(10000);
- }
- }
-
- static class RandomReadTest extends TableTest {
- private final Consistency consistency;
- private ArrayList<Get> gets;
- private Random rd = new Random();
-
- RandomReadTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- consistency = options.replicas == DEFAULT_OPTS.replicas ? null : Consistency.TIMELINE;
- if (opts.multiGet > 0) {
- LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
- this.gets = new ArrayList<>(opts.multiGet);
- }
- }
-
- @Override
- void testRow(final int i) throws IOException, InterruptedException {
- if (opts.randomSleep > 0) {
- Thread.sleep(rd.nextInt(opts.randomSleep));
- }
- Get get = new Get(getRandomRow(this.rand, opts.totalRows));
- if (opts.addColumns) {
- get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- get.addFamily(FAMILY_NAME);
- }
- if (opts.filterAll) {
- get.setFilter(new FilterAllFilter());
- }
- get.setConsistency(consistency);
- if (LOG.isTraceEnabled()) LOG.trace(get.toString());
- if (opts.multiGet > 0) {
- this.gets.add(get);
- if (this.gets.size() == opts.multiGet) {
- Result [] rs = this.table.get(this.gets);
- updateValueSize(rs);
- this.gets.clear();
- }
- } else {
- updateValueSize(this.table.get(get));
- }
- }
-
- @Override
- protected int getReportingPeriod() {
- int period = opts.perClientRunRows / 10;
- return period == 0 ? opts.perClientRunRows : period;
- }
-
- @Override
- protected void testTakedown() throws IOException {
- if (this.gets != null && this.gets.size() > 0) {
- this.table.get(gets);
- this.gets.clear();
- }
- super.testTakedown();
- }
- }
-
- static class RandomWriteTest extends BufferedMutatorTest {
- RandomWriteTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte[] row = getRandomRow(this.rand, opts.totalRows);
- Put put = new Put(row);
- for (int column = 0; column < opts.columns; column++) {
- byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
- byte[] value = generateData(this.rand, getValueLength(this.rand));
- if (opts.useTags) {
- byte[] tag = generateData(this.rand, TAG_LENGTH);
- Tag[] tags = new Tag[opts.noOfTags];
- for (int n = 0; n < opts.noOfTags; n++) {
- Tag t = new ArrayBackedTag((byte) n, tag);
- tags[n] = t;
- }
- KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
- value, tags);
- put.add(kv);
- updateValueSize(kv.getValueLength());
- } else {
- put.addColumn(FAMILY_NAME, qualifier, value);
- updateValueSize(value.length);
- }
- }
- put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
- if (opts.autoFlush) {
- table.put(put);
- } else {
- mutator.mutate(put);
- }
- }
- }
-
- static class ScanTest extends TableTest {
- private ResultScanner testScanner;
-
- ScanTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testTakedown() throws IOException {
- if (this.testScanner != null) {
- this.testScanner.close();
- }
- super.testTakedown();
- }
-
-
- @Override
- void testRow(final int i) throws IOException {
- if (this.testScanner == null) {
- Scan scan = new Scan().withStartRow(format(opts.startRow)).setCaching(opts.caching)
- .setCacheBlocks(opts.cacheBlocks).setAsyncPrefetch(opts.asyncPrefetch)
- .setReadType(opts.scanReadType);
- if (opts.addColumns) {
- scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- scan.addFamily(FAMILY_NAME);
- }
- if (opts.filterAll) {
- scan.setFilter(new FilterAllFilter());
- }
- this.testScanner = table.getScanner(scan);
- }
- Result r = testScanner.next();
- updateValueSize(r);
- }
- }
-
- /**
- * Base class for operations that are CAS-like; that read a value and then set it based off what
- * they read. In this category is increment, append, checkAndPut, etc.
- *
- * <p>These operations also want some concurrency going on. Usually when these tests run, they
- * operate in their own part of the key range. In CASTest, we will have them all overlap on the
- * same key space. We do this with our getStartRow and getLastRow overrides.
- */
- static abstract class CASTableTest extends TableTest {
- private final byte [] qualifier;
- CASTableTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- qualifier = Bytes.toBytes(this.getClass().getSimpleName());
- }
-
- byte [] getQualifier() {
- return this.qualifier;
- }
-
- @Override
- int getStartRow() {
- return 0;
- }
-
- @Override
- int getLastRow() {
- return opts.perClientRunRows;
- }
- }
-
- static class IncrementTest extends CASTableTest {
- IncrementTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- Increment increment = new Increment(format(i));
- increment.addColumn(FAMILY_NAME, getQualifier(), 1l);
- updateValueSize(this.table.increment(increment));
- }
- }
-
- static class AppendTest extends CASTableTest {
- AppendTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte [] bytes = format(i);
- Append append = new Append(bytes);
- append.addColumn(FAMILY_NAME, getQualifier(), bytes);
- updateValueSize(this.table.append(append));
- }
- }
-
- static class CheckAndMutateTest extends CASTableTest {
- CheckAndMutateTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte [] bytes = format(i);
- // Put a known value so when we go to check it, it is there.
- Put put = new Put(bytes);
- put.addColumn(FAMILY_NAME, getQualifier(), bytes);
- this.table.put(put);
- RowMutations mutations = new RowMutations(bytes);
- mutations.add(put);
- this.table.checkAndMutate(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes,
- mutations);
- }
- }
-
- static class CheckAndPutTest extends CASTableTest {
- CheckAndPutTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte [] bytes = format(i);
- // Put a known value so when we go to check it, it is there.
- Put put = new Put(bytes);
- put.addColumn(FAMILY_NAME, getQualifier(), bytes);
- this.table.put(put);
- this.table.checkAndPut(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, put);
- }
- }
-
- static class CheckAndDeleteTest extends CASTableTest {
- CheckAndDeleteTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte [] bytes = format(i);
- // Put a known value so when we go to check it, it is there.
- Put put = new Put(bytes);
- put.addColumn(FAMILY_NAME, getQualifier(), bytes);
- this.table.put(put);
- Delete delete = new Delete(put.getRow());
- delete.addColumn(FAMILY_NAME, getQualifier());
- this.table.checkAndDelete(bytes, FAMILY_NAME, getQualifier(), CompareOp.EQUAL, bytes, delete);
- }
- }
-
- static class SequentialReadTest extends TableTest {
- SequentialReadTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- Get get = new Get(format(i));
- if (opts.addColumns) {
- get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- }
- if (opts.filterAll) {
- get.setFilter(new FilterAllFilter());
- }
- updateValueSize(table.get(get));
- }
- }
-
- static class SequentialWriteTest extends BufferedMutatorTest {
- SequentialWriteTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(final int i) throws IOException {
- byte[] row = format(i);
- Put put = new Put(row);
- for (int column = 0; column < opts.columns; column++) {
- byte [] qualifier = column == 0? COLUMN_ZERO: Bytes.toBytes("" + column);
- byte[] value = generateData(this.rand, getValueLength(this.rand));
- if (opts.useTags) {
- byte[] tag = generateData(this.rand, TAG_LENGTH);
- Tag[] tags = new Tag[opts.noOfTags];
- for (int n = 0; n < opts.noOfTags; n++) {
- Tag t = new ArrayBackedTag((byte) n, tag);
- tags[n] = t;
- }
- KeyValue kv = new KeyValue(row, FAMILY_NAME, qualifier, HConstants.LATEST_TIMESTAMP,
- value, tags);
- put.add(kv);
- updateValueSize(kv.getValueLength());
- } else {
- put.addColumn(FAMILY_NAME, qualifier, value);
- updateValueSize(value.length);
- }
- }
- put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
- if (opts.autoFlush) {
- table.put(put);
- } else {
- mutator.mutate(put);
- }
- }
- }
-
- static class FilteredScanTest extends TableTest {
- protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
-
- FilteredScanTest(Connection con, TestOptions options, Status status) {
- super(con, options, status);
- }
-
- @Override
- void testRow(int i) throws IOException {
- byte[] value = generateData(this.rand, getValueLength(this.rand));
- Scan scan = constructScan(value);
- ResultScanner scanner = null;
- try {
- scanner = this.table.getScanner(scan);
- for (Result r = null; (r = scanner.next()) != null;) {
- updateValueSize(r);
- }
- } finally {
- if (scanner != null) scanner.close();
- }
- }
-
- protected Scan constructScan(byte[] valuePrefix) throws IOException {
- FilterList list = new FilterList();
- Filter filter = new SingleColumnValueFilter(
- FAMILY_NAME, COLUMN_ZERO, CompareFilter.CompareOp.EQUAL,
- new BinaryComparator(valuePrefix)
- );
- list.addFilter(filter);
- if(opts.filterAll) {
- list.addFilter(new FilterAllFilter());
- }
- Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
- .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType);
- if (opts.addColumns) {
- scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
- } else {
- scan.addFamily(FAMILY_NAME);
- }
- scan.setFilter(list);
- return scan;
- }
- }
-
- /**
- * Compute a throughput rate in MB/s.
- * @param rows Number of records consumed.
- * @param timeMs Time taken in milliseconds.
- * @return String value with label, ie '123.76 MB/s'
- */
- private static String calculateMbps(int rows, long timeMs, final int valueSize, int columns) {
- BigDecimal rowSize = BigDecimal.valueOf(ROW_LENGTH +
- ((valueSize + FAMILY_NAME.length + COLUMN_ZERO.length) * columns));
- BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
- .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
- .divide(BYTES_PER_MB, CXT);
- return FMT.format(mbps) + " MB/s";
- }
-
- /*
- * Format passed integer.
- * @param number
- * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
- * number (Does absolute in case number is negative).
- */
- public static byte [] format(final int number) {
- byte [] b = new byte[ROW_LENGTH];
- int d = Math.abs(number);
- for (int i = b.length - 1; i >= 0; i--) {
- b[i] = (byte)((d % 10) + '0');
- d /= 10;
- }
- return b;
- }
-
- /*
- * This method takes some time and is done inline uploading data. For
- * example, doing the mapfile test, generation of the key and value
- * consumes about 30% of CPU time.
- * @return Generated random value to insert into a table cell.
- */
- public static byte[] generateData(final Random r, int length) {
- byte [] b = new byte [length];
- int i;
-
- for(i = 0; i < (length-8); i += 8) {
- b[i] = (byte) (65 + r.nextInt(26));
- b[i+1] = b[i];
- b[i+2] = b[i];
- b[i+3] = b[i];
- b[i+4] = b[i];
- b[i+5] = b[i];
- b[i+6] = b[i];
- b[i+7] = b[i];
- }
-
- byte a = (byte) (65 + r.nextInt(26));
- for(; i < length; i++) {
- b[i] = a;
- }
- return b;
- }
-
- static byte [] getRandomRow(final Random random, final int totalRows) {
- return format(generateRandomRow(random, totalRows));
- }
-
- static int generateRandomRow(final Random random, final int totalRows) {
- return random.nextInt(Integer.MAX_VALUE) % totalRows;
- }
-
- static RunResult runOneClient(final Class<? extends TestBase> cmd, Configuration conf,
- Connection con, AsyncConnection asyncCon, TestOptions opts, final Status status)
- throws IOException, InterruptedException {
- status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for "
- + opts.perClientRunRows + " rows");
- long totalElapsedTime;
-
- final TestBase t;
- try {
- if (AsyncTest.class.isAssignableFrom(cmd)) {
- Class<? extends AsyncTest> newCmd = (Class<? extends AsyncTest>) cmd;
- Constructor<? extends AsyncTest> constructor =
- newCmd.getDeclaredConstructor(AsyncConnection.class, TestOptions.class, Status.class);
- t = constructor.newInstance(asyncCon, opts, status);
- } else {
- Class<? extends Test> newCmd = (Class<? extends Test>) cmd;
- Constructor<? extends Test> constructor =
- newCmd.getDeclaredConstructor(Connection.class, TestOptions.class, Status.class);
- t = constructor.newInstance(con, opts, status);
- }
- } catch (NoSuchMethodException e) {
- throw new IllegalArgumentException("Invalid command class: " + cmd.getName()
- + ". It does not provide a constructor as described by "
- + "the javadoc comment. Available constructors are: "
- + Arrays.toString(cmd.getConstructors()));
- } catch (Exception e) {
- throw new IllegalStateException("Failed to construct command class", e);
- }
- totalElapsedTime = t.test();
-
- status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
- "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
- " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime,
- getAverageValueLength(opts), opts.columns) + ")");
-
- return new RunResult(totalElapsedTime, t.getLatencyHistogram());
- }
-
- private static int getAverageValueLength(final TestOptions opts) {
- return opts.valueRandom? opts.valueSize/2: opts.valueSize;
- }
-
- private void runTest(final Class<? extends TestBase> cmd, TestOptions opts) throws IOException,
- InterruptedException, ClassNotFoundException, ExecutionException {
- // Log the configuration we're going to run with. Uses JSON mapper because lazy. It'll do
- // the TestOptions introspection for us and dump the output in a readable format.
- LOG.info(cmd.getSimpleName() + " test run options=" + MAPPER.writeValueAsString(opts));
- Admin admin = null;
- Connection connection = null;
- try {
- connection = ConnectionFactory.createConnection(getConf());
- admin = connection.getAdmin();
- checkTable(admin, opts);
- } finally {
- if (admin != null) admin.close();
- if (connection != null) connection.close();
- }
- if (opts.nomapred) {
- doLocalClients(opts, getConf());
- } else {
- doMapReduce(opts, getConf());
- }
- }
-
- protected void printUsage() {
- printUsage(this.getClass().getName(), null);
- }
-
- protected static void printUsage(final String message) {
- printUsage(PerformanceEvaluation.class.getName(), message);
- }
-
- protected static void printUsageAndExit(final String message, final int exitCode) {
- printUsage(message);
- System.exit(exitCode);
- }
-
- protected static void printUsage(final String className, final String message) {
- if (message != null && message.length() > 0) {
- System.err.println(message);
- }
- System.err.println("Usage: java " + className + " \\");
- System.err.println(" <OPTIONS> [-D<property=value>]* <command> <nclients>");
- System.err.println();
- System.err.println("General Options:");
- System.err.println(" nomapred Run multiple clients using threads " +
- "(rather than use mapreduce)");
- System.err.println(" oneCon all the threads share the same connection. Default: False");
- System.err.println(" sampleRate Execute test on a sample of total " +
- "rows. Only supported by randomRead. Default: 1.0");
- System.err.println(" period Report every 'period' rows: " +
- "Default: opts.perClientRunRows / 10 = " + DEFAULT_OPTS.getPerClientRunRows()/10);
- System.err.println(" cycles How many times to cycle the test. Defaults: 1.");
- System.err.println(" traceRate Enable HTrace spans. Initiate tracing every N rows. " +
- "Default: 0");
- System.err.println(" latency Set to report operation latencies. Default: False");
- System.err.println(" measureAfter Start to measure the latency once 'measureAfter'" +
- " rows have been treated. Default: 0");
- System.err.println(" valueSize Pass value size to use: Default: "
- + DEFAULT_OPTS.getValueSize());
- System.err.println(" valueRandom Set if we should vary value size between 0 and " +
- "'valueSize'; set on read for stats on size: Default: Not set.");
- System.err.println(" blockEncoding Block encoding to use. Value should be one of "
- + Arrays.toString(DataBlockEncoding.values()) + ". Default: NONE");
- System.err.println();
- System.err.println("Table Creation / Write Tests:");
- System.err.println(" table Alternate table name. Default: 'TestTable'");
- System.err.println(" rows Rows each client runs. Default: "
- + DEFAULT_OPTS.getPerClientRunRows()
- + ". In case of randomReads and randomSeekScans this could"
- + " be specified along with --size to specify the number of rows to be scanned within"
- + " the total range specified by the size.");
- System.err.println(
- " size Total size in GiB. Mutually exclusive with --rows for writes and scans"
- + ". But for randomReads and randomSeekScans when you use size with --rows you could"
- + " use size to specify the end range and --rows"
- + " specifies the number of rows within that range. " + "Default: 1.0.");
- System.err.println(" compress Compression type to use (GZ, LZO, ...). Default: 'NONE'");
- System.err.println(" flushCommits Used to determine if the test should flush the table. " +
- "Default: false");
- System.err.println(" valueZipf Set if we should vary value size between 0 and " +
- "'valueSize' in zipf form: Default: Not set.");
- System.err.println(" writeToWAL Set writeToWAL on puts. Default: True");
- System.err.println(" autoFlush Set autoFlush on htable. Default: False");
- System.err.println(" presplit Create presplit table. If a table with same name exists,"
- + " it'll be deleted and recreated (instead of verifying count of its existing regions). "
- + "Recommended for accurate perf analysis (see guide). Default: disabled");
- System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. " +
- "Default: false");
- System.err.println(" numoftags Specify the no of tags that would be needed. " +
- "This works only if usetags is true. Default: " + DEFAULT_OPTS.noOfTags);
- System.err.println(" splitPolicy Specify a custom RegionSplitPolicy for the table.");
- System.err.println(" columns Columns to write per row. Default: 1");
- System.err.println();
- System.err.println("Read Tests:");
- System.err.println(" filterAll Helps to filter out all the rows on the server side"
- + " there by not returning any thing back to the client. Helps to check the server side"
- + " performance. Uses FilterAllFilter internally. ");
- System.err.println(" multiGet Batch gets together into groups of N. Only supported " +
- "by randomRead. Default: disabled");
- System.err.println(" inmemory Tries to keep the HFiles of the CF " +
- "inmemory as far as possible. Not guaranteed that reads are always served " +
- "from memory. Default: false");
- System.err.println(" bloomFilter Bloom filter type, one of "
- + Arrays.toString(BloomType.values()));
- System.err.println(" blockSize Blocksize to use when writing out hfiles. ");
- System.err.println(" inmemoryCompaction Makes the column family to do inmemory flushes/compactions. "
- + "Uses the CompactingMemstore");
- System.err.println(" addColumns Adds columns to scans/gets explicitly. Default: true");
- System.err.println(" replicas Enable region replica testing. Defaults: 1.");
- System.err.println(" randomSleep Do a random sleep before each get between 0 and entered value. Defaults: 0");
- System.err.println(" caching Scan caching to use. Default: 30");
- System.err.println(" asyncPrefetch Enable asyncPrefetch for scan");
- System.err.println(" cacheBlocks Set the cacheBlocks option for scan. Default: true");
- System.err.println(" scanReadType Set the readType option for scan, stream/pread/default. Default: default");
- System.err.println();
- System.err.println(" Note: -D properties will be applied to the conf used. ");
- System.err.println(" For example: ");
- System.err.println(" -Dmapreduce.output.fileoutputformat.compress=true");
- System.err.println(" -Dmapreduce.task.timeout=60000");
- System.err.println();
- System.err.println("Command:");
- for (CmdDescriptor command : COMMANDS.values()) {
- System.err.println(String.format(" %-20s %s", command.getName(), command.getDescription()));
- }
- System.err.println();
- System.err.println("Args:");
- System.err.println(" nclients Integer. Required. Total number of clients "
- + "(and HRegionServers) running. 1 <= value <= 500");
- System.err.println("Examples:");
- System.err.println(" To run a single client doing the default 1M sequentialWrites:");
- System.err.println(" $ hbase " + className + " sequentialWrite 1");
- System.err.println(" To run 10 clients doing increments over ten rows:");
- System.err.println(" $ hbase " + className + " --rows=10 --nomapred increment 10");
- }
-
- /**
- * Parse options passed in via an arguments array. Assumes that array has been split
- * on white-space and placed into a {@code Queue}. Any unknown arguments will remain
- * in the queue at the conclusion of this method call. It's up to the caller to deal
- * with these unrecognized arguments.
- */
- static TestOptions parseOpts(Queue<String> args) {
- TestOptions opts = new TestOptions();
-
- String cmd = null;
- while ((cmd = args.poll()) != null) {
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
- // place item back onto queue so that caller knows parsing was incomplete
- args.add(cmd);
- break;
- }
-
- final String nmr = "--nomapred";
- if (cmd.startsWith(nmr)) {
- opts.nomapred = true;
- continue;
- }
-
- final String rows = "--rows=";
- if (cmd.startsWith(rows)) {
- opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
- continue;
- }
-
- final String cycles = "--cycles=";
- if (cmd.startsWith(cycles)) {
- opts.cycles = Integer.parseInt(cmd.substring(cycles.length()));
- continue;
- }
-
- final String sampleRate = "--sampleRate=";
- if (cmd.startsWith(sampleRate)) {
- opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
- continue;
- }
-
- final String table = "--table=";
- if (cmd.startsWith(table)) {
- opts.tableName = cmd.substring(table.length());
- continue;
- }
-
- final String startRow = "--startRow=";
- if (cmd.startsWith(startRow)) {
- opts.startRow = Integer.parseInt(cmd.substring(startRow.length()));
- continue;
- }
-
- final String compress = "--compress=";
- if (cmd.startsWith(compress)) {
- opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
- continue;
- }
-
- final String traceRate = "--traceRate=";
- if (cmd.startsWith(traceRate)) {
- opts.traceRate = Double.parseDouble(cmd.substring(traceRate.length()));
- continue;
- }
-
- final String blockEncoding = "--blockEncoding=";
- if (cmd.startsWith(blockEncoding)) {
- opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
- continue;
- }
-
- final String flushCommits = "--flushCommits=";
- if (cmd.startsWith(flushCommits)) {
- opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
- continue;
- }
-
- final String writeToWAL = "--writeToWAL=";
- if (cmd.startsWith(writeToWAL)) {
- opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
- continue;
- }
-
- final String presplit = "--presplit=";
- if (cmd.startsWith(presplit)) {
- opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
- continue;
- }
-
- final String inMemory = "--inmemory=";
- if (cmd.startsWith(inMemory)) {
- opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
- continue;
- }
-
- final String autoFlush = "--autoFlush=";
- if (cmd.startsWith(autoFlush)) {
- opts.autoFlush = Boolean.parseBoolean(cmd.substring(autoFlush.length()));
- continue;
- }
-
- final String onceCon = "--oneCon=";
- if (cmd.startsWith(onceCon)) {
- opts.oneCon = Boolean.parseBoolean(cmd.substring(onceCon.length()));
- continue;
- }
-
- final String latency = "--latency";
- if (cmd.startsWith(latency)) {
- opts.reportLatency = true;
- continue;
- }
-
- final String multiGet = "--multiGet=";
- if (cmd.startsWith(multiGet)) {
- opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
- continue;
- }
-
- final String useTags = "--usetags=";
- if (cmd.startsWith(useTags)) {
- opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
- continue;
- }
-
- final String noOfTags = "--numoftags=";
- if (cmd.startsWith(noOfTags)) {
- opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
- continue;
- }
-
- final String replicas = "--replicas=";
- if (cmd.startsWith(replicas)) {
- opts.replicas = Integer.parseInt(cmd.substring(replicas.length()));
- continue;
- }
-
- final String filterOutAll = "--filterAll";
- if (cmd.startsWith(filterOutAll)) {
- opts.filterAll = true;
- continue;
- }
-
- final String size = "--size=";
- if (cmd.startsWith(size)) {
- opts.size = Float.parseFloat(cmd.substring(size.length()));
- if (opts.size <= 1.0f) throw new IllegalStateException("Size must be > 1; i.e. 1GB");
- continue;
- }
-
- final String splitPolicy = "--splitPolicy=";
- if (cmd.startsWith(splitPolicy)) {
- opts.splitPolicy = cmd.substring(splitPolicy.length());
- continue;
- }
-
- final String randomSleep = "--randomSleep=";
- if (cmd.startsWith(randomSleep)) {
- opts.randomSleep = Integer.parseInt(cmd.substring(randomSleep.length()));
- continue;
- }
-
- final String measureAfter = "--measureAfter=";
- if (cmd.startsWith(measureAfter)) {
- opts.measureAfter = Integer.parseInt(cmd.substring(measureAfter.length()));
- continue;
- }
-
- final String bloomFilter = "--bloomFilter=";
- if (cmd.startsWith(bloomFilter)) {
- opts.bloomType = BloomType.valueOf(cmd.substring(bloomFilter.length()));
- continue;
- }
-
- final String blockSize = "--blockSize=";
- if(cmd.startsWith(blockSize) ) {
- opts.blockSize = Integer.parseInt(cmd.substring(blockSize.length()));
- }
-
- final String valueSize = "--valueSize=";
- if (cmd.startsWith(valueSize)) {
- opts.valueSize = Integer.parseInt(cmd.substring(valueSize.length()));
- continue;
- }
-
- final String valueRandom = "--valueRandom";
- if (cmd.startsWith(valueRandom)) {
- opts.valueRandom = true;
- if (opts.valueZipf) {
- throw new IllegalStateException("Either valueZipf or valueRandom but not both");
- }
- continue;
- }
-
- final String valueZipf = "--valueZipf";
- if (cmd.startsWith(valueZipf)) {
- opts.valueZipf = true;
- if (opts.valueRandom) {
- throw new IllegalStateException("Either valueZipf or valueRandom but not both");
- }
- continue;
- }
-
- final String period = "--period=";
- if (cmd.startsWith(period)) {
- opts.period = Integer.parseInt(cmd.substring(period.length()));
- continue;
- }
-
- final String addColumns = "--addColumns=";
- if (cmd.startsWith(addColumns)) {
- opts.addColumns = Boolean.parseBoolean(cmd.substring(addColumns.length
<TRUNCATED>
[03/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
deleted file mode 100644
index 0f49333..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-
-
-/**
- * <p>
- * Tests various scan start and stop row scenarios. This is set in a scan and
- * tested in a MapReduce job to see if that is handed over and done properly
- * too.
- * </p>
- * <p>
- * This test is broken into two parts in order to side-step the test timeout
- * period of 900, as documented in HBASE-8326.
- * </p>
- */
-public abstract class TestTableInputFormatScanBase {
-
- private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
- static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
- static final TableName TABLE_NAME = TableName.valueOf("scantest");
- static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
- static final String KEY_STARTROW = "startRow";
- static final String KEY_LASTROW = "stpRow";
-
- private static Table table = null;
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
- // this turns it off for this test. TODO: Figure out why scr breaks recovery.
- System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
-
- // switch TIF to log at DEBUG level
- TEST_UTIL.enableDebug(TableInputFormat.class);
- TEST_UTIL.enableDebug(TableInputFormatBase.class);
- // start mini hbase cluster
- TEST_UTIL.startMiniCluster(3);
- // create and fill table
- table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILYS);
- TEST_UTIL.loadTable(table, INPUT_FAMILYS, null, false);
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- /**
- * Pass the key and value to reduce.
- */
- public static class ScanMapper
- extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
-
- /**
- * Pass the key and value to reduce.
- *
- * @param key The key, here "aaa", "aab" etc.
- * @param value The value is the same as the key.
- * @param context The task context.
- * @throws IOException When reading the rows fails.
- */
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- Context context)
- throws IOException, InterruptedException {
- if (value.size() != 2) {
- throw new IOException("There should be two input columns");
- }
- Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
- cfMap = value.getMap();
-
- if (!cfMap.containsKey(INPUT_FAMILYS[0]) || !cfMap.containsKey(INPUT_FAMILYS[1])) {
- throw new IOException("Wrong input columns. Missing: '" +
- Bytes.toString(INPUT_FAMILYS[0]) + "' or '" + Bytes.toString(INPUT_FAMILYS[1]) + "'.");
- }
-
- String val0 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[0], null));
- String val1 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[1], null));
- LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
- ", value -> (" + val0 + ", " + val1 + ")");
- context.write(key, key);
- }
- }
-
- /**
- * Checks the last and first key seen against the scanner boundaries.
- */
- public static class ScanReducer
- extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
- NullWritable, NullWritable> {
-
- private String first = null;
- private String last = null;
-
- protected void reduce(ImmutableBytesWritable key,
- Iterable<ImmutableBytesWritable> values, Context context)
- throws IOException ,InterruptedException {
- int count = 0;
- for (ImmutableBytesWritable value : values) {
- String val = Bytes.toStringBinary(value.get());
- LOG.info("reduce: key[" + count + "] -> " +
- Bytes.toStringBinary(key.get()) + ", value -> " + val);
- if (first == null) first = val;
- last = val;
- count++;
- }
- }
-
- protected void cleanup(Context context)
- throws IOException, InterruptedException {
- Configuration c = context.getConfiguration();
- String startRow = c.get(KEY_STARTROW);
- String lastRow = c.get(KEY_LASTROW);
- LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
- LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
- if (startRow != null && startRow.length() > 0) {
- assertEquals(startRow, first);
- }
- if (lastRow != null && lastRow.length() > 0) {
- assertEquals(lastRow, last);
- }
- }
-
- }
-
- /**
- * Tests an MR Scan initialized from properties set in the Configuration.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- protected void testScanFromConfiguration(String start, String stop, String last)
- throws IOException, InterruptedException, ClassNotFoundException {
- String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
- "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
- Configuration c = new Configuration(TEST_UTIL.getConfiguration());
- c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString());
- c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", "
- + Bytes.toString(INPUT_FAMILYS[1]));
- c.set(KEY_STARTROW, start != null ? start : "");
- c.set(KEY_LASTROW, last != null ? last : "");
-
- if (start != null) {
- c.set(TableInputFormat.SCAN_ROW_START, start);
- }
-
- if (stop != null) {
- c.set(TableInputFormat.SCAN_ROW_STOP, stop);
- }
-
- Job job = new Job(c, jobName);
- job.setMapperClass(ScanMapper.class);
- job.setReducerClass(ScanReducer.class);
- job.setMapOutputKeyClass(ImmutableBytesWritable.class);
- job.setMapOutputValueClass(ImmutableBytesWritable.class);
- job.setInputFormatClass(TableInputFormat.class);
- job.setNumReduceTasks(1);
- FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
- TableMapReduceUtil.addDependencyJars(job);
- assertTrue(job.waitForCompletion(true));
- }
-
- /**
- * Tests a MR scan using specific start and stop rows.
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- protected void testScan(String start, String stop, String last)
- throws IOException, InterruptedException, ClassNotFoundException {
- String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
- "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
- LOG.info("Before map/reduce startup - job " + jobName);
- Configuration c = new Configuration(TEST_UTIL.getConfiguration());
- Scan scan = new Scan();
- scan.addFamily(INPUT_FAMILYS[0]);
- scan.addFamily(INPUT_FAMILYS[1]);
- if (start != null) {
- scan.setStartRow(Bytes.toBytes(start));
- }
- c.set(KEY_STARTROW, start != null ? start : "");
- if (stop != null) {
- scan.setStopRow(Bytes.toBytes(stop));
- }
- c.set(KEY_LASTROW, last != null ? last : "");
- LOG.info("scan before: " + scan);
- Job job = new Job(c, jobName);
- TableMapReduceUtil.initTableMapperJob(
- TABLE_NAME, scan, ScanMapper.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
- job.setReducerClass(ScanReducer.class);
- job.setNumReduceTasks(1); // one to get final "first" and "last" key
- FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
- LOG.info("Started " + job.getJobName());
- assertTrue(job.waitForCompletion(true));
- LOG.info("After map/reduce completion - job " + jobName);
- }
-
-
- /**
- * Tests a MR scan using data skew auto-balance
- *
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
- InterruptedException,
- ClassNotFoundException {
- String jobName = "TestJobForNumOfSplits";
- LOG.info("Before map/reduce startup - job " + jobName);
- Configuration c = new Configuration(TEST_UTIL.getConfiguration());
- Scan scan = new Scan();
- scan.addFamily(INPUT_FAMILYS[0]);
- scan.addFamily(INPUT_FAMILYS[1]);
- c.set("hbase.mapreduce.input.autobalance", "true");
- c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
- c.set(KEY_STARTROW, "");
- c.set(KEY_LASTROW, "");
- Job job = new Job(c, jobName);
- TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
- TableInputFormat tif = new TableInputFormat();
- tif.setConf(job.getConfiguration());
- Assert.assertEquals(TABLE_NAME, table.getName());
- List<InputSplit> splits = tif.getSplits(job);
- Assert.assertEquals(expectedNumOfSplits, splits.size());
- }
-
- /**
- * Tests for the getSplitKey() method in TableInputFormatBase.java
- */
- public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
- byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
- Assert.assertArrayEquals(splitKey, result);
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
deleted file mode 100644
index d702e0d..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotEnabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of
- * a particular cell, and write it back to the table.
- */
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableMapReduce extends TestTableMapReduceBase {
- private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
-
- @Override
- protected Log getLog() { return LOG; }
-
- /**
- * Pass the given key and processed record reduce
- */
- static class ProcessContentsMapper extends TableMapper<ImmutableBytesWritable, Put> {
-
- /**
- * Pass the key, and reversed value to reduce
- *
- * @param key
- * @param value
- * @param context
- * @throws IOException
- */
- @Override
- public void map(ImmutableBytesWritable key, Result value,
- Context context)
- throws IOException, InterruptedException {
- if (value.size() != 1) {
- throw new IOException("There should only be one input column");
- }
- Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
- cf = value.getMap();
- if(!cf.containsKey(INPUT_FAMILY)) {
- throw new IOException("Wrong input columns. Missing: '" +
- Bytes.toString(INPUT_FAMILY) + "'.");
- }
-
- // Get the original value and reverse it
- String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
- StringBuilder newValue = new StringBuilder(originalValue);
- newValue.reverse();
- // Now set the value to be collected
- Put outval = new Put(key.get());
- outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
- context.write(key, outval);
- }
- }
-
- @Override
- protected void runTestOnTable(Table table) throws IOException {
- Job job = null;
- try {
- LOG.info("Before map/reduce startup");
- job = new Job(table.getConfiguration(), "process column contents");
- job.setNumReduceTasks(1);
- Scan scan = new Scan();
- scan.addFamily(INPUT_FAMILY);
- TableMapReduceUtil.initTableMapperJob(
- table.getName().getNameAsString(), scan,
- ProcessContentsMapper.class, ImmutableBytesWritable.class,
- Put.class, job);
- TableMapReduceUtil.initTableReducerJob(
- table.getName().getNameAsString(),
- IdentityTableReducer.class, job);
- FileOutputFormat.setOutputPath(job, new Path("test"));
- LOG.info("Started " + table.getName().getNameAsString());
- assertTrue(job.waitForCompletion(true));
- LOG.info("After map/reduce completion");
-
- // verify map-reduce results
- verify(table.getName());
-
- verifyJobCountersAreEmitted(job);
- } catch (InterruptedException e) {
- throw new IOException(e);
- } catch (ClassNotFoundException e) {
- throw new IOException(e);
- } finally {
- table.close();
- if (job != null) {
- FileUtil.fullyDelete(
- new File(job.getConfiguration().get("hadoop.tmp.dir")));
- }
- }
- }
-
- /**
- * Verify scan counters are emitted from the job
- * @param job
- * @throws IOException
- */
- private void verifyJobCountersAreEmitted(Job job) throws IOException {
- Counters counters = job.getCounters();
- Counter counter
- = counters.findCounter(TableRecordReaderImpl.HBASE_COUNTER_GROUP_NAME, "RPC_CALLS");
- assertNotNull("Unable to find Job counter for HBase scan metrics, RPC_CALLS", counter);
- assertTrue("Counter value for RPC_CALLS should be larger than 0", counter.getValue() > 0);
- }
-
- @Test(expected = TableNotEnabledException.class)
- public void testWritingToDisabledTable() throws IOException {
-
- try (Admin admin = UTIL.getConnection().getAdmin();
- Table table = UTIL.getConnection().getTable(TABLE_FOR_NEGATIVE_TESTS)) {
- admin.disableTable(table.getName());
- runTestOnTable(table);
- fail("Should not have reached here, should have thrown an exception");
- }
- }
-
- @Test(expected = TableNotFoundException.class)
- public void testWritingToNonExistentTable() throws IOException {
-
- try (Table table = UTIL.getConnection().getTable(TableName.valueOf("table-does-not-exist"))) {
- runTestOnTable(table);
- fail("Should not have reached here, should have thrown an exception");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
deleted file mode 100644
index 27bf063..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceBase.java
+++ /dev/null
@@ -1,233 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NavigableMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-
-/**
- * A base class for a test Map/Reduce job over HBase tables. The map/reduce process we're testing
- * on our tables is simple - take every row in the table, reverse the value of a particular cell,
- * and write it back to the table. Implements common components between mapred and mapreduce
- * implementations.
- */
-public abstract class TestTableMapReduceBase {
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
- protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
- protected static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
- protected static final TableName TABLE_FOR_NEGATIVE_TESTS = TableName.valueOf("testfailuretable");
- protected static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
- protected static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
-
- protected static final byte[][] columns = new byte[][] {
- INPUT_FAMILY,
- OUTPUT_FAMILY
- };
-
- /**
- * Retrieve my logger instance.
- */
- protected abstract Log getLog();
-
- /**
- * Handles API-specifics for setting up and executing the job.
- */
- protected abstract void runTestOnTable(Table table) throws IOException;
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- Table table =
- UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
- OUTPUT_FAMILY });
- UTIL.loadTable(table, INPUT_FAMILY, false);
- UTIL.createTable(TABLE_FOR_NEGATIVE_TESTS, new byte[][] { INPUT_FAMILY, OUTPUT_FAMILY });
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.deleteTable(TABLE_FOR_NEGATIVE_TESTS);
- UTIL.shutdownMiniCluster();
- }
-
- /**
- * Test a map/reduce against a multi-region table
- * @throws IOException
- */
- @Test
- public void testMultiRegionTable() throws IOException {
- runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
- }
-
- @Test
- public void testCombiner() throws IOException {
- Configuration conf = new Configuration(UTIL.getConfiguration());
- // force use of combiner for testing purposes
- conf.setInt("mapreduce.map.combine.minspills", 1);
- runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME));
- }
-
- /**
- * Implements mapper logic for use across APIs.
- */
- protected static Put map(ImmutableBytesWritable key, Result value) throws IOException {
- if (value.size() != 1) {
- throw new IOException("There should only be one input column");
- }
- Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
- cf = value.getMap();
- if(!cf.containsKey(INPUT_FAMILY)) {
- throw new IOException("Wrong input columns. Missing: '" +
- Bytes.toString(INPUT_FAMILY) + "'.");
- }
-
- // Get the original value and reverse it
-
- String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY));
- StringBuilder newValue = new StringBuilder(originalValue);
- newValue.reverse();
-
- // Now set the value to be collected
-
- Put outval = new Put(key.get());
- outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
- return outval;
- }
-
- protected void verify(TableName tableName) throws IOException {
- Table table = UTIL.getConnection().getTable(tableName);
- boolean verified = false;
- long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
- int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- getLog().info("Verification attempt #" + i);
- verifyAttempt(table);
- verified = true;
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- getLog().debug("Verification attempt failed: " + e.getMessage());
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- assertTrue(verified);
- }
-
- /**
- * Looks at every value of the mapreduce output and verifies that indeed
- * the values have been reversed.
- * @param table Table to scan.
- * @throws IOException
- * @throws NullPointerException if we failed to find a cell value
- */
- private void verifyAttempt(final Table table) throws IOException, NullPointerException {
- Scan scan = new Scan();
- TableInputFormat.addColumns(scan, columns);
- ResultScanner scanner = table.getScanner(scan);
- try {
- Iterator<Result> itr = scanner.iterator();
- assertTrue(itr.hasNext());
- while(itr.hasNext()) {
- Result r = itr.next();
- if (getLog().isDebugEnabled()) {
- if (r.size() > 2 ) {
- throw new IOException("Too many results, expected 2 got " +
- r.size());
- }
- }
- byte[] firstValue = null;
- byte[] secondValue = null;
- int count = 0;
- for(Cell kv : r.listCells()) {
- if (count == 0) {
- firstValue = CellUtil.cloneValue(kv);
- }
- if (count == 1) {
- secondValue = CellUtil.cloneValue(kv);
- }
- count++;
- if (count == 2) {
- break;
- }
- }
-
-
- if (firstValue == null) {
- throw new NullPointerException(Bytes.toString(r.getRow()) +
- ": first value is null");
- }
- String first = Bytes.toString(firstValue);
-
- if (secondValue == null) {
- throw new NullPointerException(Bytes.toString(r.getRow()) +
- ": second value is null");
- }
- byte[] secondReversed = new byte[secondValue.length];
- for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
- secondReversed[i] = secondValue[j];
- }
- String second = Bytes.toString(secondReversed);
-
- if (first.compareTo(second) != 0) {
- if (getLog().isDebugEnabled()) {
- getLog().debug("second key is not the reverse of first. row=" +
- Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
- ", second value=" + second);
- }
- fail();
- }
- }
- } finally {
- scanner.close();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
deleted file mode 100644
index 303a144..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduceUtil.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
- * agreements. See the NOTICE file distributed with this work for additional information regarding
- * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with the License. You may
- * obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test different variants of initTableMapperJob method
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestTableMapReduceUtil {
-
- /*
- * initTableSnapshotMapperJob is tested in {@link TestTableSnapshotInputFormat} because
- * the method depends on an online cluster.
- */
-
- @Test
- public void testInitTableMapperJob1() throws Exception {
- Configuration configuration = new Configuration();
- Job job = new Job(configuration, "tableName");
- // test
- TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
- Text.class, job, false, WALInputFormat.class);
- assertEquals(WALInputFormat.class, job.getInputFormatClass());
- assertEquals(Import.Importer.class, job.getMapperClass());
- assertEquals(LongWritable.class, job.getOutputKeyClass());
- assertEquals(Text.class, job.getOutputValueClass());
- assertNull(job.getCombinerClass());
- assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
- }
-
- @Test
- public void testInitTableMapperJob2() throws Exception {
- Configuration configuration = new Configuration();
- Job job = new Job(configuration, "tableName");
- TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
- Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
- assertEquals(WALInputFormat.class, job.getInputFormatClass());
- assertEquals(Import.Importer.class, job.getMapperClass());
- assertEquals(LongWritable.class, job.getOutputKeyClass());
- assertEquals(Text.class, job.getOutputValueClass());
- assertNull(job.getCombinerClass());
- assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
- }
-
- @Test
- public void testInitTableMapperJob3() throws Exception {
- Configuration configuration = new Configuration();
- Job job = new Job(configuration, "tableName");
- TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
- Import.Importer.class, Text.class, Text.class, job);
- assertEquals(TableInputFormat.class, job.getInputFormatClass());
- assertEquals(Import.Importer.class, job.getMapperClass());
- assertEquals(LongWritable.class, job.getOutputKeyClass());
- assertEquals(Text.class, job.getOutputValueClass());
- assertNull(job.getCombinerClass());
- assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
- }
-
- @Test
- public void testInitTableMapperJob4() throws Exception {
- Configuration configuration = new Configuration();
- Job job = new Job(configuration, "tableName");
- TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
- Import.Importer.class, Text.class, Text.class, job, false);
- assertEquals(TableInputFormat.class, job.getInputFormatClass());
- assertEquals(Import.Importer.class, job.getMapperClass());
- assertEquals(LongWritable.class, job.getOutputKeyClass());
- assertEquals(Text.class, job.getOutputValueClass());
- assertNull(job.getCombinerClass());
- assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
deleted file mode 100644
index 5e63082..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.CategoryBasedTimeout;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.junit.rules.TestRule;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-import java.util.Arrays;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.util.FSUtils;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
- private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
- @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
- withTimeout(this.getClass()).withLookingForStuckThread(true).build();
-
- private static final byte[] bbb = Bytes.toBytes("bbb");
- private static final byte[] yyy = Bytes.toBytes("yyy");
-
- @Rule
- public TestName name = new TestName();
-
- @Override
- protected byte[] getStartRow() {
- return bbb;
- }
-
- @Override
- protected byte[] getEndRow() {
- return yyy;
- }
-
- @After
- public void tearDown() throws Exception {
- }
-
- @Test
- public void testGetBestLocations() throws IOException {
- TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl();
- Configuration conf = UTIL.getConfiguration();
-
- HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
- Assert.assertEquals(Lists.newArrayList(),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
- Assert.assertEquals(Lists.newArrayList("h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
- Assert.assertEquals(Lists.newArrayList("h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
- Assert.assertEquals(Lists.newArrayList("h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution = new HDFSBlocksDistribution();
- blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
- blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
- blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
- blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
- Assert.assertEquals(Lists.newArrayList("h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
- Assert.assertEquals(Lists.newArrayList("h1", "h2"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
- Assert.assertEquals(Lists.newArrayList("h2", "h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
-
- blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
- blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
-
- Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
- TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
- }
-
- public static enum TestTableSnapshotCounters {
- VALIDATION_ERROR
- }
-
- public static class TestTableSnapshotMapper
- extends TableMapper<ImmutableBytesWritable, NullWritable> {
- @Override
- protected void map(ImmutableBytesWritable key, Result value,
- Context context) throws IOException, InterruptedException {
- // Validate a single row coming from the snapshot, and emit the row key
- verifyRowFromMap(key, value);
- context.write(key, NullWritable.get());
- }
- }
-
- public static class TestTableSnapshotReducer
- extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
- HBaseTestingUtility.SeenRowTracker rowTracker =
- new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
- @Override
- protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
- Context context) throws IOException, InterruptedException {
- rowTracker.addRow(key.get());
- }
-
- @Override
- protected void cleanup(Context context) throws IOException,
- InterruptedException {
- rowTracker.validate();
- }
- }
-
- @Test
- public void testInitTableSnapshotMapperJobConfig() throws Exception {
- setupCluster();
- final TableName tableName = TableName.valueOf(name.getMethodName());
- String snapshotName = "foo";
-
- try {
- createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
- Job job = new Job(UTIL.getConfiguration());
- Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
-
- TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
-
- // TODO: would be better to examine directly the cache instance that results from this
- // config. Currently this is not possible because BlockCache initialization is static.
- Assert.assertEquals(
- "Snapshot job should be configured for default LruBlockCache.",
- HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
- job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
- Assert.assertEquals(
- "Snapshot job should not use BucketCache.",
- 0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
- } finally {
- UTIL.getAdmin().deleteSnapshot(snapshotName);
- UTIL.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- @Override
- public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
- String snapshotName, Path tmpTableDir) throws Exception {
- Job job = new Job(UTIL.getConfiguration());
- TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
- }
-
- @Override
- public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
- int numRegions, int expectedNumSplits) throws Exception {
- setupCluster();
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- createTableAndSnapshot(
- util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
-
- Job job = new Job(util.getConfiguration());
- Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
- Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
-
- TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, false, tmpTableDir);
-
- verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
-
- } finally {
- util.getAdmin().deleteSnapshot(snapshotName);
- util.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
- int expectedRegionSize) throws Exception {
- for (int i = 0; i < 100; i++) {
- List<HRegionInfo> hRegionInfoList = util.getAdmin().getTableRegions(tableName);
- if (hRegionInfoList.size() >= expectedRegionSize) {
- break;
- }
- Thread.sleep(1000);
- }
- }
-
- @Test
- public void testNoDuplicateResultsWhenSplitting() throws Exception {
- setupCluster();
- TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
- String snapshotName = "testSnapshotBug";
- try {
- if (UTIL.getAdmin().tableExists(tableName)) {
- UTIL.deleteTable(tableName);
- }
-
- UTIL.createTable(tableName, FAMILIES);
- Admin admin = UTIL.getAdmin();
-
- // put some stuff in the table
- Table table = UTIL.getConnection().getTable(tableName);
- UTIL.loadTable(table, FAMILIES);
-
- // split to 2 regions
- admin.split(tableName, Bytes.toBytes("eee"));
- blockUntilSplitFinished(UTIL, tableName, 2);
-
- Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
- FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
-
- SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
- null, snapshotName, rootDir, fs, true);
-
- // load different values
- byte[] value = Bytes.toBytes("after_snapshot_value");
- UTIL.loadTable(table, FAMILIES, value);
-
- // cause flush to create new files in the region
- admin.flush(tableName);
- table.close();
-
- Job job = new Job(UTIL.getConfiguration());
- Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
- // limit the scan
- Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
-
- TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
- TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
- tmpTableDir);
-
- verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
- } finally {
- UTIL.getAdmin().deleteSnapshot(snapshotName);
- UTIL.deleteTable(tableName);
- tearDownCluster();
- }
- }
-
- private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
- byte[] startRow, byte[] stopRow)
- throws IOException, InterruptedException {
- TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
- List<InputSplit> splits = tsif.getSplits(job);
-
- Assert.assertEquals(expectedNumSplits, splits.size());
-
- HBaseTestingUtility.SeenRowTracker rowTracker =
- new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
-
- for (int i = 0; i < splits.size(); i++) {
- // validate input split
- InputSplit split = splits.get(i);
- Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
-
- // validate record reader
- TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
- when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
- RecordReader<ImmutableBytesWritable, Result> rr =
- tsif.createRecordReader(split, taskAttemptContext);
- rr.initialize(split, taskAttemptContext);
-
- // validate we can read all the data back
- while (rr.nextKeyValue()) {
- byte[] row = rr.getCurrentKey().get();
- verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
- rowTracker.addRow(row);
- }
-
- rr.close();
- }
-
- // validate all rows are seen
- rowTracker.validate();
- }
-
- @Override
- protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
- String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
- boolean shutdownCluster) throws Exception {
- doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
- numRegions, expectedNumSplits, shutdownCluster);
- }
-
- // this is also called by the IntegrationTestTableSnapshotInputFormat
- public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
- String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
- int expectedNumSplits, boolean shutdownCluster) throws Exception {
-
- LOG.info("testing with MapReduce");
-
- LOG.info("create the table and snapshot");
- createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
-
- if (shutdownCluster) {
- LOG.info("shutting down hbase cluster.");
- util.shutdownMiniHBaseCluster();
- }
-
- try {
- // create the job
- Job job = new Job(util.getConfiguration());
- Scan scan = new Scan(startRow, endRow); // limit the scan
-
- job.setJarByClass(util.getClass());
- TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
- TestTableSnapshotInputFormat.class);
-
- TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
- scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
- NullWritable.class, job, true, tableDir);
-
- job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
- job.setNumReduceTasks(1);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- Assert.assertTrue(job.waitForCompletion(true));
- } finally {
- if (!shutdownCluster) {
- util.getAdmin().deleteSnapshot(snapshotName);
- util.deleteTable(tableName);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
deleted file mode 100644
index 4382c9c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import java.util.HashSet;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestTableSplit {
- @Rule
- public TestName name = new TestName();
-
- @Test
- public void testHashCode() {
- TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start".getBytes(),
- "row-end".getBytes(), "location");
- TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start".getBytes(),
- "row-end".getBytes(), "location");
- assertEquals (split1, split2);
- assertTrue (split1.hashCode() == split2.hashCode());
- HashSet<TableSplit> set = new HashSet<>(2);
- set.add(split1);
- set.add(split2);
- assertTrue(set.size() == 1);
- }
-
- /**
- * length of region should not influence hashcode
- * */
- @Test
- public void testHashCode_length() {
- TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start".getBytes(),
- "row-end".getBytes(), "location", 1984);
- TableSplit split2 = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start".getBytes(),
- "row-end".getBytes(), "location", 1982);
-
- assertEquals (split1, split2);
- assertTrue (split1.hashCode() == split2.hashCode());
- HashSet<TableSplit> set = new HashSet<>(2);
- set.add(split1);
- set.add(split2);
- assertTrue(set.size() == 1);
- }
-
- /**
- * Length of region need to be properly serialized.
- * */
- @Test
- public void testLengthIsSerialized() throws Exception {
- TableSplit split1 = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start".getBytes(),
- "row-end".getBytes(), "location", 666);
-
- TableSplit deserialized = new TableSplit(TableName.valueOf(name.getMethodName()),
- "row-start2".getBytes(),
- "row-end2".getBytes(), "location1");
- ReflectionUtils.copy(new Configuration(), split1, deserialized);
-
- Assert.assertEquals(666, deserialized.getLength());
- }
-
- @Test
- public void testToString() {
- TableSplit split =
- new TableSplit(TableName.valueOf(name.getMethodName()), "row-start".getBytes(), "row-end".getBytes(),
- "location");
- String str =
- "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
- + "end row: row-end, region location: location, "
- + "encoded region name: )";
- Assert.assertEquals(str, split.toString());
-
- split =
- new TableSplit(TableName.valueOf(name.getMethodName()), null, "row-start".getBytes(),
- "row-end".getBytes(), "location", "encoded-region-name", 1000L);
- str =
- "HBase table split(table name: " + name.getMethodName() + ", scan: , start row: row-start, "
- + "end row: row-end, region location: location, "
- + "encoded region name: encoded-region-name)";
- Assert.assertEquals(str, split.toString());
-
- split = new TableSplit((TableName) null, null, null, null);
- str =
- "HBase table split(table name: null, scan: , start row: null, "
- + "end row: null, region location: null, "
- + "encoded region name: )";
- Assert.assertEquals(str, split.toString());
-
- split = new TableSplit((TableName) null, null, null, null, null, null, 1000L);
- str =
- "HBase table split(table name: null, scan: , start row: null, "
- + "end row: null, region location: null, "
- + "encoded region name: null)";
- Assert.assertEquals(str, split.toString());
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
deleted file mode 100644
index 6796c94..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestTimeRangeMapRed {
- private final static Log log = LogFactory.getLog(TestTimeRangeMapRed.class);
- private static final HBaseTestingUtility UTIL =
- new HBaseTestingUtility();
- private Admin admin;
-
- private static final byte [] KEY = Bytes.toBytes("row1");
- private static final NavigableMap<Long, Boolean> TIMESTAMP = new TreeMap<>();
- static {
- TIMESTAMP.put((long)1245620000, false);
- TIMESTAMP.put((long)1245620005, true); // include
- TIMESTAMP.put((long)1245620010, true); // include
- TIMESTAMP.put((long)1245620055, true); // include
- TIMESTAMP.put((long)1245620100, true); // include
- TIMESTAMP.put((long)1245620150, false);
- TIMESTAMP.put((long)1245620250, false);
- }
- static final long MINSTAMP = 1245620005;
- static final long MAXSTAMP = 1245620100 + 1; // maxStamp itself is excluded. so increment it.
-
- static final TableName TABLE_NAME = TableName.valueOf("table123");
- static final byte[] FAMILY_NAME = Bytes.toBytes("text");
- static final byte[] COLUMN_NAME = Bytes.toBytes("input");
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void before() throws Exception {
- this.admin = UTIL.getAdmin();
- }
-
- private static class ProcessTimeRangeMapper
- extends TableMapper<ImmutableBytesWritable, MapWritable>
- implements Configurable {
-
- private Configuration conf = null;
- private Table table = null;
-
- @Override
- public void map(ImmutableBytesWritable key, Result result,
- Context context)
- throws IOException {
- List<Long> tsList = new ArrayList<>();
- for (Cell kv : result.listCells()) {
- tsList.add(kv.getTimestamp());
- }
-
- List<Put> puts = new ArrayList<>();
- for (Long ts : tsList) {
- Put put = new Put(key.get());
- put.setDurability(Durability.SKIP_WAL);
- put.addColumn(FAMILY_NAME, COLUMN_NAME, ts, Bytes.toBytes(true));
- puts.add(put);
- }
- table.put(puts);
- }
-
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- @Override
- public void setConf(Configuration configuration) {
- this.conf = configuration;
- try {
- Connection connection = ConnectionFactory.createConnection(conf);
- table = connection.getTable(TABLE_NAME);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
-
- @Test
- public void testTimeRangeMapRed()
- throws IOException, InterruptedException, ClassNotFoundException {
- final HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
- final HColumnDescriptor col = new HColumnDescriptor(FAMILY_NAME);
- col.setMaxVersions(Integer.MAX_VALUE);
- desc.addFamily(col);
- admin.createTable(desc);
- List<Put> puts = new ArrayList<>();
- for (Map.Entry<Long, Boolean> entry : TIMESTAMP.entrySet()) {
- Put put = new Put(KEY);
- put.setDurability(Durability.SKIP_WAL);
- put.addColumn(FAMILY_NAME, COLUMN_NAME, entry.getKey(), Bytes.toBytes(false));
- puts.add(put);
- }
- Table table = UTIL.getConnection().getTable(desc.getTableName());
- table.put(puts);
- runTestOnTable();
- verify(table);
- table.close();
- }
-
- private void runTestOnTable()
- throws IOException, InterruptedException, ClassNotFoundException {
- Job job = null;
- try {
- job = new Job(UTIL.getConfiguration(), "test123");
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setNumReduceTasks(0);
- Scan scan = new Scan();
- scan.addColumn(FAMILY_NAME, COLUMN_NAME);
- scan.setTimeRange(MINSTAMP, MAXSTAMP);
- scan.setMaxVersions();
- TableMapReduceUtil.initTableMapperJob(TABLE_NAME,
- scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job);
- job.waitForCompletion(true);
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } finally {
- if (job != null) {
- FileUtil.fullyDelete(
- new File(job.getConfiguration().get("hadoop.tmp.dir")));
- }
- }
- }
-
- private void verify(final Table table) throws IOException {
- Scan scan = new Scan();
- scan.addColumn(FAMILY_NAME, COLUMN_NAME);
- scan.setMaxVersions(1);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r: scanner) {
- for (Cell kv : r.listCells()) {
- log.debug(Bytes.toString(r.getRow()) + "\t" + Bytes.toString(CellUtil.cloneFamily(kv))
- + "\t" + Bytes.toString(CellUtil.cloneQualifier(kv))
- + "\t" + kv.getTimestamp() + "\t" + Bytes.toBoolean(CellUtil.cloneValue(kv)));
- org.junit.Assert.assertEquals(TIMESTAMP.get(kv.getTimestamp()),
- Bytes.toBoolean(CellUtil.cloneValue(kv)));
- }
- }
- scanner.close();
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
deleted file mode 100644
index 427c5cc..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LauncherSecurityManager;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * Basic test for the WALPlayer M/R tool
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestWALPlayer {
- private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- private static MiniHBaseCluster cluster;
- private static Path rootDir;
- private static Path walRootDir;
- private static FileSystem fs;
- private static FileSystem logFs;
- private static Configuration conf;
-
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- conf= TEST_UTIL.getConfiguration();
- rootDir = TEST_UTIL.createRootDir();
- walRootDir = TEST_UTIL.createWALRootDir();
- fs = FSUtils.getRootDirFileSystem(conf);
- logFs = FSUtils.getWALFileSystem(conf);
- cluster = TEST_UTIL.startMiniCluster();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- fs.delete(rootDir, true);
- logFs.delete(walRootDir, true);
- }
-
- /**
- * Simple end-to-end test
- * @throws Exception
- */
- @Test
- public void testWALPlayer() throws Exception {
- final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
- final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
- final byte[] FAMILY = Bytes.toBytes("family");
- final byte[] COLUMN1 = Bytes.toBytes("c1");
- final byte[] COLUMN2 = Bytes.toBytes("c2");
- final byte[] ROW = Bytes.toBytes("row");
- Table t1 = TEST_UTIL.createTable(tableName1, FAMILY);
- Table t2 = TEST_UTIL.createTable(tableName2, FAMILY);
-
- // put a row into the first table
- Put p = new Put(ROW);
- p.addColumn(FAMILY, COLUMN1, COLUMN1);
- p.addColumn(FAMILY, COLUMN2, COLUMN2);
- t1.put(p);
- // delete one column
- Delete d = new Delete(ROW);
- d.addColumns(FAMILY, COLUMN1);
- t1.delete(d);
-
- // replay the WAL, map table 1 to table 2
- WAL log = cluster.getRegionServer(0).getWAL(null);
- log.rollWriter();
- String walInputDir = new Path(cluster.getMaster().getMasterFileSystem()
- .getWALRootDir(), HConstants.HREGION_LOGDIR_NAME).toString();
-
- Configuration configuration= TEST_UTIL.getConfiguration();
- WALPlayer player = new WALPlayer(configuration);
- String optionName="_test_.name";
- configuration.set(optionName, "1000");
- player.setupTime(configuration, optionName);
- assertEquals(1000,configuration.getLong(optionName,0));
- assertEquals(0, ToolRunner.run(configuration, player,
- new String[] {walInputDir, tableName1.getNameAsString(),
- tableName2.getNameAsString() }));
-
-
- // verify the WAL was player into table 2
- Get g = new Get(ROW);
- Result r = t2.get(g);
- assertEquals(1, r.size());
- assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN2));
- }
-
- /**
- * Test WALKeyValueMapper setup and map
- */
- @Test
- public void testWALKeyValueMapper() throws Exception {
- testWALKeyValueMapper(WALPlayer.TABLES_KEY);
- }
-
- @Test
- public void testWALKeyValueMapperWithDeprecatedConfig() throws Exception {
- testWALKeyValueMapper("hlog.input.tables");
- }
-
- private void testWALKeyValueMapper(final String tableConfigKey) throws Exception {
- Configuration configuration = new Configuration();
- configuration.set(tableConfigKey, "table");
- WALKeyValueMapper mapper = new WALKeyValueMapper();
- WALKey key = mock(WALKey.class);
- when(key.getTablename()).thenReturn(TableName.valueOf("table"));
- @SuppressWarnings("unchecked")
- Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue>.Context context = mock(Context.class);
- when(context.getConfiguration()).thenReturn(configuration);
-
- WALEdit value = mock(WALEdit.class);
- ArrayList<Cell> values = new ArrayList<>();
- KeyValue kv1 = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("family"), null);
-
- values.add(kv1);
- when(value.getCells()).thenReturn(values);
- mapper.setup(context);
-
- doAnswer(new Answer<Void>() {
-
- @Override
- public Void answer(InvocationOnMock invocation) throws Throwable {
- ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
- KeyValue key = (KeyValue) invocation.getArguments()[1];
- assertEquals("row", Bytes.toString(writer.get()));
- assertEquals("row", Bytes.toString(CellUtil.cloneRow(key)));
- return null;
- }
- }).when(context).write(any(ImmutableBytesWritable.class), any(KeyValue.class));
-
- mapper.map(key, value, context);
-
- }
-
- /**
- * Test main method
- */
- @Test
- public void testMainMethod() throws Exception {
-
- PrintStream oldPrintStream = System.err;
- SecurityManager SECURITY_MANAGER = System.getSecurityManager();
- LauncherSecurityManager newSecurityManager= new LauncherSecurityManager();
- System.setSecurityManager(newSecurityManager);
- ByteArrayOutputStream data = new ByteArrayOutputStream();
- String[] args = {};
- System.setErr(new PrintStream(data));
- try {
- System.setErr(new PrintStream(data));
- try {
- WALPlayer.main(args);
- fail("should be SecurityException");
- } catch (SecurityException e) {
- assertEquals(-1, newSecurityManager.getExitCode());
- assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
- assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
- " <tables> [<tableMappings>]"));
- assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
- }
-
- } finally {
- System.setErr(oldPrintStream);
- System.setSecurityManager(SECURITY_MANAGER);
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
deleted file mode 100644
index 34725b4..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALRecordReader.java
+++ /dev/null
@@ -1,276 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.util.List;
-import java.util.NavigableMap;
-import java.util.TreeMap;
-import java.util.concurrent.atomic.AtomicLong;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
-import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
-import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
-import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.wal.WALFactory;
-import org.apache.hadoop.hbase.wal.WALKey;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.MapReduceTestUtil;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * JUnit tests for the WALRecordReader
- */
-@Category({MapReduceTests.class, MediumTests.class})
-public class TestWALRecordReader {
- private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
- private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- private static Configuration conf;
- private static FileSystem fs;
- private static Path hbaseDir;
- private static FileSystem walFs;
- private static Path walRootDir;
- // visible for TestHLogRecordReader
- static final TableName tableName = TableName.valueOf(getName());
- private static final byte [] rowName = tableName.getName();
- // visible for TestHLogRecordReader
- static final HRegionInfo info = new HRegionInfo(tableName,
- Bytes.toBytes(""), Bytes.toBytes(""), false);
- private static final byte [] family = Bytes.toBytes("column");
- private static final byte [] value = Bytes.toBytes("value");
- private static HTableDescriptor htd;
- private static Path logDir;
- protected MultiVersionConcurrencyControl mvcc;
- protected static NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-
- private static String getName() {
- return "TestWALRecordReader";
- }
-
- @Before
- public void setUp() throws Exception {
- fs.delete(hbaseDir, true);
- walFs.delete(walRootDir, true);
- mvcc = new MultiVersionConcurrencyControl();
- }
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- // Make block sizes small.
- conf = TEST_UTIL.getConfiguration();
- conf.setInt("dfs.blocksize", 1024 * 1024);
- conf.setInt("dfs.replication", 1);
- TEST_UTIL.startMiniDFSCluster(1);
-
- conf = TEST_UTIL.getConfiguration();
- fs = TEST_UTIL.getDFSCluster().getFileSystem();
-
- hbaseDir = TEST_UTIL.createRootDir();
- walRootDir = TEST_UTIL.createWALRootDir();
- walFs = FSUtils.getWALFileSystem(conf);
- logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
-
- htd = new HTableDescriptor(tableName);
- htd.addFamily(new HColumnDescriptor(family));
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- fs.delete(hbaseDir, true);
- walFs.delete(walRootDir, true);
- TEST_UTIL.shutdownMiniCluster();
- }
-
- /**
- * Test partial reads from the log based on passed time range
- * @throws Exception
- */
- @Test
- public void testPartialRead() throws Exception {
- final WALFactory walfactory = new WALFactory(conf, null, getName());
- WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
- // This test depends on timestamp being millisecond based and the filename of the WAL also
- // being millisecond based.
- long ts = System.currentTimeMillis();
- WALEdit edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
- log.append(info, getWalKey(ts, scopes), edit, true);
- edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
- log.append(info, getWalKey(ts+1, scopes), edit, true);
- log.sync();
- LOG.info("Before 1st WAL roll " + log.toString());
- log.rollWriter();
- LOG.info("Past 1st WAL roll " + log.toString());
-
- Thread.sleep(1);
- long ts1 = System.currentTimeMillis();
-
- edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
- log.append(info, getWalKey(ts1+1, scopes), edit, true);
- edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
- log.append(info, getWalKey(ts1+2, scopes), edit, true);
- log.sync();
- log.shutdown();
- walfactory.shutdown();
- LOG.info("Closed WAL " + log.toString());
-
-
- WALInputFormat input = new WALInputFormat();
- Configuration jobConf = new Configuration(conf);
- jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
- jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
-
- // only 1st file is considered, and only its 1st entry is used
- List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
-
- assertEquals(1, splits.size());
- testSplit(splits.get(0), Bytes.toBytes("1"));
-
- jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
- jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
- splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
- // both files need to be considered
- assertEquals(2, splits.size());
- // only the 2nd entry from the 1st file is used
- testSplit(splits.get(0), Bytes.toBytes("2"));
- // only the 1nd entry from the 2nd file is used
- testSplit(splits.get(1), Bytes.toBytes("3"));
- }
-
- /**
- * Test basic functionality
- * @throws Exception
- */
- @Test
- public void testWALRecordReader() throws Exception {
- final WALFactory walfactory = new WALFactory(conf, null, getName());
- WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
- byte [] value = Bytes.toBytes("value");
- final AtomicLong sequenceId = new AtomicLong(0);
- WALEdit edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
- System.currentTimeMillis(), value));
- long txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
- log.sync(txid);
-
- Thread.sleep(1); // make sure 2nd log gets a later timestamp
- long secondTs = System.currentTimeMillis();
- log.rollWriter();
-
- edit = new WALEdit();
- edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
- System.currentTimeMillis(), value));
- txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
- log.sync(txid);
- log.shutdown();
- walfactory.shutdown();
- long thirdTs = System.currentTimeMillis();
-
- // should have 2 log files now
- WALInputFormat input = new WALInputFormat();
- Configuration jobConf = new Configuration(conf);
- jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
-
- // make sure both logs are found
- List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
- assertEquals(2, splits.size());
-
- // should return exactly one KV
- testSplit(splits.get(0), Bytes.toBytes("1"));
- // same for the 2nd split
- testSplit(splits.get(1), Bytes.toBytes("2"));
-
- // now test basic time ranges:
-
- // set an endtime, the 2nd log file can be ignored completely.
- jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
- splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
- assertEquals(1, splits.size());
- testSplit(splits.get(0), Bytes.toBytes("1"));
-
- // now set a start time
- jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
- jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
- splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
- // both logs need to be considered
- assertEquals(2, splits.size());
- // but both readers skip all edits
- testSplit(splits.get(0));
- testSplit(splits.get(1));
- }
-
- protected WALKey getWalKey(final long time, NavigableMap<byte[], Integer> scopes) {
- return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc, scopes);
- }
-
- protected WALRecordReader getReader() {
- return new WALKeyRecordReader();
- }
-
- /**
- * Create a new reader from the split, and match the edits against the passed columns.
- */
- private void testSplit(InputSplit split, byte[]... columns) throws Exception {
- final WALRecordReader reader = getReader();
- reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
-
- for (byte[] column : columns) {
- assertTrue(reader.nextKeyValue());
- Cell cell = reader.getCurrentValue().getCells().get(0);
- if (!Bytes.equals(column, 0, column.length, cell.getQualifierArray(),
- cell.getQualifierOffset(), cell.getQualifierLength())) {
- assertTrue(
- "expected ["
- + Bytes.toString(column)
- + "], actual ["
- + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
- cell.getQualifierLength()) + "]", false);
- }
- }
- assertFalse(reader.nextKeyValue());
- reader.close();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
deleted file mode 100644
index aea5036..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TsvImporterCustomTestMapper.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.KeyValue;
-
-import java.io.IOException;
-
-/**
- * Dummy mapper used for unit tests to verify that the mapper can be injected.
- * This approach would be used if a custom transformation needed to be done after
- * reading the input data before writing it to HFiles.
- */
-public class TsvImporterCustomTestMapper extends TsvImporterMapper {
-
- @Override
- protected void setup(Context context) {
- doSetup(context);
- }
-
- /**
- * Convert a line of TSV text into an HBase table row after transforming the
- * values by multiplying them by 3.
- */
- @Override
- public void map(LongWritable offset, Text value, Context context)
- throws IOException {
- byte[] family = Bytes.toBytes("FAM");
- final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };
-
- // do some basic line parsing
- byte[] lineBytes = value.getBytes();
- String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");
-
- // create the rowKey and Put
- ImmutableBytesWritable rowKey =
- new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
- Put put = new Put(rowKey.copyBytes());
- put.setDurability(Durability.SKIP_WAL);
-
- //The value should look like this: VALUE1 or VALUE2. Let's multiply
- //the integer by 3
- for(int i = 1; i < valueTokens.length; i++) {
- String prefix = valueTokens[i].substring(0, "VALUE".length());
- String suffix = valueTokens[i].substring("VALUE".length());
- String newValue = prefix + Integer.parseInt(suffix) * 3;
-
- KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
- qualifiers[i-1], Bytes.toBytes(newValue));
- put.add(kv);
- }
-
- try {
- context.write(rowKey, put);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-}
[15/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
deleted file mode 100644
index c72a0c3..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SyncTable.java
+++ /dev/null
@@ -1,786 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Collections;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterators;
-
-public class SyncTable extends Configured implements Tool {
-
- private static final Log LOG = LogFactory.getLog(SyncTable.class);
-
- static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
- static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
- static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
- static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
- static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
- static final String DRY_RUN_CONF_KEY="sync.table.dry.run";
-
- Path sourceHashDir;
- String sourceTableName;
- String targetTableName;
-
- String sourceZkCluster;
- String targetZkCluster;
- boolean dryRun;
-
- Counters counters;
-
- public SyncTable(Configuration conf) {
- super(conf);
- }
-
- public Job createSubmittableJob(String[] args) throws IOException {
- FileSystem fs = sourceHashDir.getFileSystem(getConf());
- if (!fs.exists(sourceHashDir)) {
- throw new IOException("Source hash dir not found: " + sourceHashDir);
- }
-
- HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
- LOG.info("Read source hash manifest: " + tableHash);
- LOG.info("Read " + tableHash.partitions.size() + " partition keys");
- if (!tableHash.tableName.equals(sourceTableName)) {
- LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
- + tableHash.tableName + " but job is reading from: " + sourceTableName);
- }
- if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
- throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
- + " should be 1 more than the number of partition keys. However, the manifest file "
- + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
- + " found in the partitions file is " + tableHash.partitions.size());
- }
-
- Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
- int dataSubdirCount = 0;
- for (FileStatus file : fs.listStatus(dataDir)) {
- if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
- dataSubdirCount++;
- }
- }
-
- if (dataSubdirCount != tableHash.numHashFiles) {
- throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
- + " should be 1 more than the number of partition keys. However, the number of data dirs"
- + " found is " + dataSubdirCount + " but the number of partition keys"
- + " found in the partitions file is " + tableHash.partitions.size());
- }
-
- Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
- "syncTable_" + sourceTableName + "-" + targetTableName));
- Configuration jobConf = job.getConfiguration();
- job.setJarByClass(HashTable.class);
- jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
- jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
- jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
- if (sourceZkCluster != null) {
- jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
- }
- if (targetZkCluster != null) {
- jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
- }
- jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
-
- TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
- SyncMapper.class, null, null, job);
-
- job.setNumReduceTasks(0);
-
- if (dryRun) {
- job.setOutputFormatClass(NullOutputFormat.class);
- } else {
- // No reducers. Just write straight to table. Call initTableReducerJob
- // because it sets up the TableOutputFormat.
- TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
- targetZkCluster, null, null);
-
- // would be nice to add an option for bulk load instead
- }
-
- // Obtain an authentication token, for the specified cluster, on behalf of the current user
- if (sourceZkCluster != null) {
- Configuration peerConf =
- HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
- TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
- }
- return job;
- }
-
- public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
- Path sourceHashDir;
-
- Connection sourceConnection;
- Connection targetConnection;
- Table sourceTable;
- Table targetTable;
- boolean dryRun;
-
- HashTable.TableHash sourceTableHash;
- HashTable.TableHash.Reader sourceHashReader;
- ImmutableBytesWritable currentSourceHash;
- ImmutableBytesWritable nextSourceKey;
- HashTable.ResultHasher targetHasher;
-
- Throwable mapperException;
-
- public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
- SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
- MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
-
- @Override
- protected void setup(Context context) throws IOException {
-
- Configuration conf = context.getConfiguration();
- sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
- sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
- targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
- TableOutputFormat.OUTPUT_CONF_PREFIX);
- sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
- targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
- dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false);
-
- sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
- LOG.info("Read source hash manifest: " + sourceTableHash);
- LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
-
- TableSplit split = (TableSplit) context.getInputSplit();
- ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
-
- sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
- findNextKeyHashPair();
-
- // create a hasher, but don't start it right away
- // instead, find the first hash batch at or after the start row
- // and skip any rows that come before. they will be caught by the previous task
- targetHasher = new HashTable.ResultHasher();
- }
-
- private static Connection openConnection(Configuration conf, String zkClusterConfKey,
- String configPrefix)
- throws IOException {
- String zkCluster = conf.get(zkClusterConfKey);
- Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
- zkCluster, configPrefix);
- return ConnectionFactory.createConnection(clusterConf);
- }
-
- private static Table openTable(Connection connection, Configuration conf,
- String tableNameConfKey) throws IOException {
- return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
- }
-
- /**
- * Attempt to read the next source key/hash pair.
- * If there are no more, set nextSourceKey to null
- */
- private void findNextKeyHashPair() throws IOException {
- boolean hasNext = sourceHashReader.next();
- if (hasNext) {
- nextSourceKey = sourceHashReader.getCurrentKey();
- } else {
- // no more keys - last hash goes to the end
- nextSourceKey = null;
- }
- }
-
- @Override
- protected void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException, InterruptedException {
- try {
- // first, finish any hash batches that end before the scanned row
- while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
- moveToNextBatch(context);
- }
-
- // next, add the scanned row (as long as we've reached the first batch)
- if (targetHasher.isBatchStarted()) {
- targetHasher.hashResult(value);
- }
- } catch (Throwable t) {
- mapperException = t;
- Throwables.propagateIfInstanceOf(t, IOException.class);
- Throwables.propagateIfInstanceOf(t, InterruptedException.class);
- Throwables.propagate(t);
- }
- }
-
- /**
- * If there is an open hash batch, complete it and sync if there are diffs.
- * Start a new batch, and seek to read the
- */
- private void moveToNextBatch(Context context) throws IOException, InterruptedException {
- if (targetHasher.isBatchStarted()) {
- finishBatchAndCompareHashes(context);
- }
- targetHasher.startBatch(nextSourceKey);
- currentSourceHash = sourceHashReader.getCurrentHash();
-
- findNextKeyHashPair();
- }
-
- /**
- * Finish the currently open hash batch.
- * Compare the target hash to the given source hash.
- * If they do not match, then sync the covered key range.
- */
- private void finishBatchAndCompareHashes(Context context)
- throws IOException, InterruptedException {
- targetHasher.finishBatch();
- context.getCounter(Counter.BATCHES).increment(1);
- if (targetHasher.getBatchSize() == 0) {
- context.getCounter(Counter.EMPTY_BATCHES).increment(1);
- }
- ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
- if (targetHash.equals(currentSourceHash)) {
- context.getCounter(Counter.HASHES_MATCHED).increment(1);
- } else {
- context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
-
- ImmutableBytesWritable stopRow = nextSourceKey == null
- ? new ImmutableBytesWritable(sourceTableHash.stopRow)
- : nextSourceKey;
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey())
- + " to " + toHex(stopRow)
- + " sourceHash: " + toHex(currentSourceHash)
- + " targetHash: " + toHex(targetHash));
- }
-
- syncRange(context, targetHasher.getBatchStartKey(), stopRow);
- }
- }
- private static String toHex(ImmutableBytesWritable bytes) {
- return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
- }
-
- private static final CellScanner EMPTY_CELL_SCANNER
- = new CellScanner(Collections.<Result>emptyIterator());
-
- /**
- * Rescan the given range directly from the source and target tables.
- * Count and log differences, and if this is not a dry run, output Puts and Deletes
- * to make the target table match the source table for this range
- */
- private void syncRange(Context context, ImmutableBytesWritable startRow,
- ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
- Scan scan = sourceTableHash.initScan();
- scan.setStartRow(startRow.copyBytes());
- scan.setStopRow(stopRow.copyBytes());
-
- ResultScanner sourceScanner = sourceTable.getScanner(scan);
- CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
-
- ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
- CellScanner targetCells = new CellScanner(targetScanner.iterator());
-
- boolean rangeMatched = true;
- byte[] nextSourceRow = sourceCells.nextRow();
- byte[] nextTargetRow = targetCells.nextRow();
- while(nextSourceRow != null || nextTargetRow != null) {
- boolean rowMatched;
- int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
- if (rowComparison < 0) {
- if (LOG.isInfoEnabled()) {
- LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
- }
- context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
-
- rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
- nextSourceRow = sourceCells.nextRow(); // advance only source to next row
- } else if (rowComparison > 0) {
- if (LOG.isInfoEnabled()) {
- LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
- }
- context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
-
- rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
- nextTargetRow = targetCells.nextRow(); // advance only target to next row
- } else {
- // current row is the same on both sides, compare cell by cell
- rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
- nextSourceRow = sourceCells.nextRow();
- nextTargetRow = targetCells.nextRow();
- }
-
- if (!rowMatched) {
- rangeMatched = false;
- }
- }
-
- sourceScanner.close();
- targetScanner.close();
-
- context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
- .increment(1);
- }
-
- private static class CellScanner {
- private final Iterator<Result> results;
-
- private byte[] currentRow;
- private Result currentRowResult;
- private int nextCellInRow;
-
- private Result nextRowResult;
-
- public CellScanner(Iterator<Result> results) {
- this.results = results;
- }
-
- /**
- * Advance to the next row and return its row key.
- * Returns null iff there are no more rows.
- */
- public byte[] nextRow() {
- if (nextRowResult == null) {
- // no cached row - check scanner for more
- while (results.hasNext()) {
- nextRowResult = results.next();
- Cell nextCell = nextRowResult.rawCells()[0];
- if (currentRow == null
- || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
- nextCell.getRowOffset(), nextCell.getRowLength())) {
- // found next row
- break;
- } else {
- // found another result from current row, keep scanning
- nextRowResult = null;
- }
- }
-
- if (nextRowResult == null) {
- // end of data, no more rows
- currentRowResult = null;
- currentRow = null;
- return null;
- }
- }
-
- // advance to cached result for next row
- currentRowResult = nextRowResult;
- nextCellInRow = 0;
- currentRow = currentRowResult.getRow();
- nextRowResult = null;
- return currentRow;
- }
-
- /**
- * Returns the next Cell in the current row or null iff none remain.
- */
- public Cell nextCellInRow() {
- if (currentRowResult == null) {
- // nothing left in current row
- return null;
- }
-
- Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
- nextCellInRow++;
- if (nextCellInRow == currentRowResult.size()) {
- if (results.hasNext()) {
- Result result = results.next();
- Cell cell = result.rawCells()[0];
- if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
- cell.getRowOffset(), cell.getRowLength())) {
- // result is part of current row
- currentRowResult = result;
- nextCellInRow = 0;
- } else {
- // result is part of next row, cache it
- nextRowResult = result;
- // current row is complete
- currentRowResult = null;
- }
- } else {
- // end of data
- currentRowResult = null;
- }
- }
- return nextCell;
- }
- }
-
- /**
- * Compare the cells for the given row from the source and target tables.
- * Count and log any differences.
- * If not a dry run, output a Put and/or Delete needed to sync the target table
- * to match the source table.
- */
- private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
- CellScanner targetCells) throws IOException, InterruptedException {
- Put put = null;
- Delete delete = null;
- long matchingCells = 0;
- boolean matchingRow = true;
- Cell sourceCell = sourceCells.nextCellInRow();
- Cell targetCell = targetCells.nextCellInRow();
- while (sourceCell != null || targetCell != null) {
-
- int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
- if (cellKeyComparison < 0) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Target missing cell: " + sourceCell);
- }
- context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
- matchingRow = false;
-
- if (!dryRun) {
- if (put == null) {
- put = new Put(rowKey);
- }
- put.add(sourceCell);
- }
-
- sourceCell = sourceCells.nextCellInRow();
- } else if (cellKeyComparison > 0) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Source missing cell: " + targetCell);
- }
- context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
- matchingRow = false;
-
- if (!dryRun) {
- if (delete == null) {
- delete = new Delete(rowKey);
- }
- // add a tombstone to exactly match the target cell that is missing on the source
- delete.addColumn(CellUtil.cloneFamily(targetCell),
- CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
- }
-
- targetCell = targetCells.nextCellInRow();
- } else {
- // the cell keys are equal, now check values
- if (CellUtil.matchingValue(sourceCell, targetCell)) {
- matchingCells++;
- } else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Different values: ");
- LOG.debug(" source cell: " + sourceCell
- + " value: " + Bytes.toHex(sourceCell.getValueArray(),
- sourceCell.getValueOffset(), sourceCell.getValueLength()));
- LOG.debug(" target cell: " + targetCell
- + " value: " + Bytes.toHex(targetCell.getValueArray(),
- targetCell.getValueOffset(), targetCell.getValueLength()));
- }
- context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
- matchingRow = false;
-
- if (!dryRun) {
- // overwrite target cell
- if (put == null) {
- put = new Put(rowKey);
- }
- put.add(sourceCell);
- }
- }
- sourceCell = sourceCells.nextCellInRow();
- targetCell = targetCells.nextCellInRow();
- }
-
- if (!dryRun && sourceTableHash.scanBatch > 0) {
- if (put != null && put.size() >= sourceTableHash.scanBatch) {
- context.write(new ImmutableBytesWritable(rowKey), put);
- put = null;
- }
- if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
- context.write(new ImmutableBytesWritable(rowKey), delete);
- delete = null;
- }
- }
- }
-
- if (!dryRun) {
- if (put != null) {
- context.write(new ImmutableBytesWritable(rowKey), put);
- }
- if (delete != null) {
- context.write(new ImmutableBytesWritable(rowKey), delete);
- }
- }
-
- if (matchingCells > 0) {
- context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
- }
- if (matchingRow) {
- context.getCounter(Counter.MATCHINGROWS).increment(1);
- return true;
- } else {
- context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
- return false;
- }
- }
-
- /**
- * Compare row keys of the given Result objects.
- * Nulls are after non-nulls
- */
- private static int compareRowKeys(byte[] r1, byte[] r2) {
- if (r1 == null) {
- return 1; // source missing row
- } else if (r2 == null) {
- return -1; // target missing row
- } else {
- // Sync on no META tables only. We can directly do what CellComparator is doing inside.
- // Never the call going to MetaCellComparator.
- return Bytes.compareTo(r1, 0, r1.length, r2, 0, r2.length);
- }
- }
-
- /**
- * Compare families, qualifiers, and timestamps of the given Cells.
- * They are assumed to be of the same row.
- * Nulls are after non-nulls.
- */
- private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
- if (c1 == null) {
- return 1; // source missing cell
- }
- if (c2 == null) {
- return -1; // target missing cell
- }
-
- int result = CellComparator.compareFamilies(c1, c2);
- if (result != 0) {
- return result;
- }
-
- result = CellComparator.compareQualifiers(c1, c2);
- if (result != 0) {
- return result;
- }
-
- // note timestamp comparison is inverted - more recent cells first
- return CellComparator.compareTimestamps(c1, c2);
- }
-
- @Override
- protected void cleanup(Context context)
- throws IOException, InterruptedException {
- if (mapperException == null) {
- try {
- finishRemainingHashRanges(context);
- } catch (Throwable t) {
- mapperException = t;
- }
- }
-
- try {
- sourceTable.close();
- targetTable.close();
- sourceConnection.close();
- targetConnection.close();
- } catch (Throwable t) {
- if (mapperException == null) {
- mapperException = t;
- } else {
- LOG.error("Suppressing exception from closing tables", t);
- }
- }
-
- // propagate first exception
- if (mapperException != null) {
- Throwables.propagateIfInstanceOf(mapperException, IOException.class);
- Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
- Throwables.propagate(mapperException);
- }
- }
-
- private void finishRemainingHashRanges(Context context) throws IOException,
- InterruptedException {
- TableSplit split = (TableSplit) context.getInputSplit();
- byte[] splitEndRow = split.getEndRow();
- boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
-
- // if there are more hash batches that begin before the end of this split move to them
- while (nextSourceKey != null
- && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
- moveToNextBatch(context);
- }
-
- if (targetHasher.isBatchStarted()) {
- // need to complete the final open hash batch
-
- if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
- || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
- // the open hash range continues past the end of this region
- // add a scan to complete the current hash range
- Scan scan = sourceTableHash.initScan();
- scan.setStartRow(splitEndRow);
- if (nextSourceKey == null) {
- scan.setStopRow(sourceTableHash.stopRow);
- } else {
- scan.setStopRow(nextSourceKey.copyBytes());
- }
-
- ResultScanner targetScanner = null;
- try {
- targetScanner = targetTable.getScanner(scan);
- for (Result row : targetScanner) {
- targetHasher.hashResult(row);
- }
- } finally {
- if (targetScanner != null) {
- targetScanner.close();
- }
- }
- } // else current batch ends exactly at split end row
-
- finishBatchAndCompareHashes(context);
- }
- }
- }
-
- private static final int NUM_ARGS = 3;
- private static void printUsage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- System.err.println();
- }
- System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
- System.err.println();
- System.err.println("Options:");
-
- System.err.println(" sourcezkcluster ZK cluster key of the source table");
- System.err.println(" (defaults to cluster in classpath's config)");
- System.err.println(" targetzkcluster ZK cluster key of the target table");
- System.err.println(" (defaults to cluster in classpath's config)");
- System.err.println(" dryrun if true, output counters but no writes");
- System.err.println(" (defaults to false)");
- System.err.println();
- System.err.println("Args:");
- System.err.println(" sourcehashdir path to HashTable output dir for source table");
- System.err.println(" (see org.apache.hadoop.hbase.mapreduce.HashTable)");
- System.err.println(" sourcetable Name of the source table to sync from");
- System.err.println(" targettable Name of the target table to sync to");
- System.err.println();
- System.err.println("Examples:");
- System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
- System.err.println(" to a local target cluster:");
- System.err.println(" $ hbase " +
- "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
- + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
- + " hdfs://nn:9000/hashes/tableA tableA tableA");
- }
-
- private boolean doCommandLine(final String[] args) {
- if (args.length < NUM_ARGS) {
- printUsage(null);
- return false;
- }
- try {
- sourceHashDir = new Path(args[args.length - 3]);
- sourceTableName = args[args.length - 2];
- targetTableName = args[args.length - 1];
-
- for (int i = 0; i < args.length - NUM_ARGS; i++) {
- String cmd = args[i];
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
- printUsage(null);
- return false;
- }
-
- final String sourceZkClusterKey = "--sourcezkcluster=";
- if (cmd.startsWith(sourceZkClusterKey)) {
- sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
- continue;
- }
-
- final String targetZkClusterKey = "--targetzkcluster=";
- if (cmd.startsWith(targetZkClusterKey)) {
- targetZkCluster = cmd.substring(targetZkClusterKey.length());
- continue;
- }
-
- final String dryRunKey = "--dryrun=";
- if (cmd.startsWith(dryRunKey)) {
- dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
- continue;
- }
-
- printUsage("Invalid argument '" + cmd + "'");
- return false;
- }
-
-
- } catch (Exception e) {
- e.printStackTrace();
- printUsage("Can't start because " + e.getMessage());
- return false;
- }
- return true;
- }
-
- /**
- * Main entry point.
- */
- public static void main(String[] args) throws Exception {
- int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
- System.exit(ret);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
- if (!doCommandLine(otherArgs)) {
- return 1;
- }
-
- Job job = createSubmittableJob(otherArgs);
- if (!job.waitForCompletion(true)) {
- LOG.info("Map-reduce job failed!");
- return 1;
- }
- counters = job.getCounters();
- return 0;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
deleted file mode 100644
index 63868da..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * Convert HBase tabular data into a format that is consumable by Map/Reduce.
- */
-@InterfaceAudience.Public
-public class TableInputFormat extends TableInputFormatBase
-implements Configurable {
-
- @SuppressWarnings("hiding")
- private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
-
- /** Job parameter that specifies the input table. */
- public static final String INPUT_TABLE = "hbase.mapreduce.inputtable";
- /**
- * If specified, use start keys of this table to split.
- * This is useful when you are preparing data for bulkload.
- */
- private static final String SPLIT_TABLE = "hbase.mapreduce.splittable";
- /** Base-64 encoded scanner. All other SCAN_ confs are ignored if this is specified.
- * See {@link TableMapReduceUtil#convertScanToString(Scan)} for more details.
- */
- public static final String SCAN = "hbase.mapreduce.scan";
- /** Scan start row */
- public static final String SCAN_ROW_START = "hbase.mapreduce.scan.row.start";
- /** Scan stop row */
- public static final String SCAN_ROW_STOP = "hbase.mapreduce.scan.row.stop";
- /** Column Family to Scan */
- public static final String SCAN_COLUMN_FAMILY = "hbase.mapreduce.scan.column.family";
- /** Space delimited list of columns and column families to scan. */
- public static final String SCAN_COLUMNS = "hbase.mapreduce.scan.columns";
- /** The timestamp used to filter columns with a specific timestamp. */
- public static final String SCAN_TIMESTAMP = "hbase.mapreduce.scan.timestamp";
- /** The starting timestamp used to filter columns with a specific range of versions. */
- public static final String SCAN_TIMERANGE_START = "hbase.mapreduce.scan.timerange.start";
- /** The ending timestamp used to filter columns with a specific range of versions. */
- public static final String SCAN_TIMERANGE_END = "hbase.mapreduce.scan.timerange.end";
- /** The maximum number of version to return. */
- public static final String SCAN_MAXVERSIONS = "hbase.mapreduce.scan.maxversions";
- /** Set to false to disable server-side caching of blocks for this scan. */
- public static final String SCAN_CACHEBLOCKS = "hbase.mapreduce.scan.cacheblocks";
- /** The number of rows for caching that will be passed to scanners. */
- public static final String SCAN_CACHEDROWS = "hbase.mapreduce.scan.cachedrows";
- /** Set the maximum number of values to return for each call to next(). */
- public static final String SCAN_BATCHSIZE = "hbase.mapreduce.scan.batchsize";
- /** Specify if we have to shuffle the map tasks. */
- public static final String SHUFFLE_MAPS = "hbase.mapreduce.inputtable.shufflemaps";
-
- /** The configuration. */
- private Configuration conf = null;
-
- /**
- * Returns the current configuration.
- *
- * @return The current configuration.
- * @see org.apache.hadoop.conf.Configurable#getConf()
- */
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * Sets the configuration. This is used to set the details for the table to
- * be scanned.
- *
- * @param configuration The configuration to set.
- * @see org.apache.hadoop.conf.Configurable#setConf(
- * org.apache.hadoop.conf.Configuration)
- */
- @Override
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
- justification="Intentional")
- public void setConf(Configuration configuration) {
- this.conf = configuration;
-
- Scan scan = null;
-
- if (conf.get(SCAN) != null) {
- try {
- scan = TableMapReduceUtil.convertStringToScan(conf.get(SCAN));
- } catch (IOException e) {
- LOG.error("An error occurred.", e);
- }
- } else {
- try {
- scan = createScanFromConfiguration(conf);
- } catch (Exception e) {
- LOG.error(StringUtils.stringifyException(e));
- }
- }
-
- setScan(scan);
- }
-
- /**
- * Sets up a {@link Scan} instance, applying settings from the configuration property
- * constants defined in {@code TableInputFormat}. This allows specifying things such as:
- * <ul>
- * <li>start and stop rows</li>
- * <li>column qualifiers or families</li>
- * <li>timestamps or timerange</li>
- * <li>scanner caching and batch size</li>
- * </ul>
- */
- public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
- Scan scan = new Scan();
-
- if (conf.get(SCAN_ROW_START) != null) {
- scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
- }
-
- if (conf.get(SCAN_ROW_STOP) != null) {
- scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
- }
-
- if (conf.get(SCAN_COLUMNS) != null) {
- addColumns(scan, conf.get(SCAN_COLUMNS));
- }
-
- for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
- scan.addFamily(Bytes.toBytes(columnFamily));
- }
-
- if (conf.get(SCAN_TIMESTAMP) != null) {
- scan.setTimeStamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
- }
-
- if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
- scan.setTimeRange(
- Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
- Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
- }
-
- if (conf.get(SCAN_MAXVERSIONS) != null) {
- scan.setMaxVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
- }
-
- if (conf.get(SCAN_CACHEDROWS) != null) {
- scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
- }
-
- if (conf.get(SCAN_BATCHSIZE) != null) {
- scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
- }
-
- // false by default, full table scans generate too much BC churn
- scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));
-
- return scan;
- }
-
- @Override
- protected void initialize(JobContext context) throws IOException {
- // Do we have to worry about mis-matches between the Configuration from setConf and the one
- // in this context?
- TableName tableName = TableName.valueOf(conf.get(INPUT_TABLE));
- try {
- initializeTable(ConnectionFactory.createConnection(new Configuration(conf)), tableName);
- } catch (Exception e) {
- LOG.error(StringUtils.stringifyException(e));
- }
- }
-
- /**
- * Parses a combined family and qualifier and adds either both or just the
- * family in case there is no qualifier. This assumes the older colon
- * divided notation, e.g. "family:qualifier".
- *
- * @param scan The Scan to update.
- * @param familyAndQualifier family and qualifier
- * @throws IllegalArgumentException When familyAndQualifier is invalid.
- */
- private static void addColumn(Scan scan, byte[] familyAndQualifier) {
- byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
- if (fq.length == 1) {
- scan.addFamily(fq[0]);
- } else if (fq.length == 2) {
- scan.addColumn(fq[0], fq[1]);
- } else {
- throw new IllegalArgumentException("Invalid familyAndQualifier provided.");
- }
- }
-
- /**
- * Adds an array of columns specified using old format, family:qualifier.
- * <p>
- * Overrides previous calls to {@link Scan#addColumn(byte[], byte[])}for any families in the
- * input.
- *
- * @param scan The Scan to update.
- * @param columns array of columns, formatted as <code>family:qualifier</code>
- * @see Scan#addColumn(byte[], byte[])
- */
- public static void addColumns(Scan scan, byte [][] columns) {
- for (byte[] column : columns) {
- addColumn(scan, column);
- }
- }
-
- /**
- * Calculates the splits that will serve as input for the map tasks. The
- * number of splits matches the number of regions in a table. Splits are shuffled if
- * required.
- * @param context The current job context.
- * @return The list of input splits.
- * @throws IOException When creating the list of splits fails.
- * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
- * org.apache.hadoop.mapreduce.JobContext)
- */
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- List<InputSplit> splits = super.getSplits(context);
- if ((conf.get(SHUFFLE_MAPS) != null) && "true".equals(conf.get(SHUFFLE_MAPS).toLowerCase(Locale.ROOT))) {
- Collections.shuffle(splits);
- }
- return splits;
- }
-
- /**
- * Convenience method to parse a string representation of an array of column specifiers.
- *
- * @param scan The Scan to update.
- * @param columns The columns to parse.
- */
- private static void addColumns(Scan scan, String columns) {
- String[] cols = columns.split(" ");
- for (String col : cols) {
- addColumn(scan, Bytes.toBytes(col));
- }
- }
-
- @Override
- protected Pair<byte[][], byte[][]> getStartEndKeys() throws IOException {
- if (conf.get(SPLIT_TABLE) != null) {
- TableName splitTableName = TableName.valueOf(conf.get(SPLIT_TABLE));
- try (Connection conn = ConnectionFactory.createConnection(getConf())) {
- try (RegionLocator rl = conn.getRegionLocator(splitTableName)) {
- return rl.getStartEndKeys();
- }
- }
- }
-
- return super.getStartEndKeys();
- }
-
- /**
- * Sets split table in map-reduce job.
- */
- public static void configureSplitTable(Job job, TableName tableName) {
- job.getConfiguration().set(SPLIT_TABLE, tableName.getNameAsString());
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
deleted file mode 100644
index ce1928e6..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ /dev/null
@@ -1,653 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.InetSocketAddress;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Addressing;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.RegionSizeCalculator;
-import org.apache.hadoop.hbase.util.Strings;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.net.DNS;
-import org.apache.hadoop.util.StringUtils;
-
-/**
- * A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName},
- * an {@link Scan} instance that defines the input columns etc. Subclasses may use
- * other TableRecordReader implementations.
- *
- * Subclasses MUST ensure initializeTable(Connection, TableName) is called for an instance to
- * function properly. Each of the entry points to this class used by the MapReduce framework,
- * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
- * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
- * retrieving the necessary configuration information. If your subclass overrides either of these
- * methods, either call the parent version or call initialize yourself.
- *
- * <p>
- * An example of a subclass:
- * <pre>
- * class ExampleTIF extends TableInputFormatBase {
- *
- * {@literal @}Override
- * protected void initialize(JobContext context) throws IOException {
- * // We are responsible for the lifecycle of this connection until we hand it over in
- * // initializeTable.
- * Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
- * job.getConfiguration()));
- * TableName tableName = TableName.valueOf("exampleTable");
- * // mandatory. once passed here, TableInputFormatBase will handle closing the connection.
- * initializeTable(connection, tableName);
- * byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- * Bytes.toBytes("columnB") };
- * // optional, by default we'll get everything for the table.
- * Scan scan = new Scan();
- * for (byte[] family : inputColumns) {
- * scan.addFamily(family);
- * }
- * Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
- * scan.setFilter(exampleFilter);
- * setScan(scan);
- * }
- * }
- * </pre>
- */
-@InterfaceAudience.Public
-public abstract class TableInputFormatBase
-extends InputFormat<ImmutableBytesWritable, Result> {
-
- /** Specify if we enable auto-balance for input in M/R jobs.*/
- public static final String MAPREDUCE_INPUT_AUTOBALANCE = "hbase.mapreduce.input.autobalance";
- /** Specify if ratio for data skew in M/R jobs, it goes well with the enabling hbase.mapreduce
- * .input.autobalance property.*/
- public static final String INPUT_AUTOBALANCE_MAXSKEWRATIO = "hbase.mapreduce.input.autobalance" +
- ".maxskewratio";
- /** Specify if the row key in table is text (ASCII between 32~126),
- * default is true. False means the table is using binary row key*/
- public static final String TABLE_ROW_TEXTKEY = "hbase.table.row.textkey";
-
- private static final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
-
- private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
- "initialized. Ensure you call initializeTable either in your constructor or initialize " +
- "method";
- private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
- " previous error. Please look at the previous logs lines from" +
- " the task's full log for more details.";
-
- /** Holds the details for the internal scanner.
- *
- * @see Scan */
- private Scan scan = null;
- /** The {@link Admin}. */
- private Admin admin;
- /** The {@link Table} to scan. */
- private Table table;
- /** The {@link RegionLocator} of the table. */
- private RegionLocator regionLocator;
- /** The reader scanning the table, can be a custom one. */
- private TableRecordReader tableRecordReader = null;
- /** The underlying {@link Connection} of the table. */
- private Connection connection;
-
-
- /** The reverse DNS lookup cache mapping: IPAddress => HostName */
- private HashMap<InetAddress, String> reverseDNSCacheMap = new HashMap<>();
-
- /**
- * Builds a {@link TableRecordReader}. If no {@link TableRecordReader} was provided, uses
- * the default.
- *
- * @param split The split to work with.
- * @param context The current context.
- * @return The newly created record reader.
- * @throws IOException When creating the reader fails.
- * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
- * org.apache.hadoop.mapreduce.InputSplit,
- * org.apache.hadoop.mapreduce.TaskAttemptContext)
- */
- @Override
- public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
- InputSplit split, TaskAttemptContext context)
- throws IOException {
- // Just in case a subclass is relying on JobConfigurable magic.
- if (table == null) {
- initialize(context);
- }
- // null check in case our child overrides getTable to not throw.
- try {
- if (getTable() == null) {
- // initialize() must not have been implemented in the subclass.
- throw new IOException(INITIALIZATION_ERROR);
- }
- } catch (IllegalStateException exception) {
- throw new IOException(INITIALIZATION_ERROR, exception);
- }
- TableSplit tSplit = (TableSplit) split;
- LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
- final TableRecordReader trr =
- this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
- Scan sc = new Scan(this.scan);
- sc.setStartRow(tSplit.getStartRow());
- sc.setStopRow(tSplit.getEndRow());
- trr.setScan(sc);
- trr.setTable(getTable());
- return new RecordReader<ImmutableBytesWritable, Result>() {
-
- @Override
- public void close() throws IOException {
- trr.close();
- closeTable();
- }
-
- @Override
- public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
- return trr.getCurrentKey();
- }
-
- @Override
- public Result getCurrentValue() throws IOException, InterruptedException {
- return trr.getCurrentValue();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- return trr.getProgress();
- }
-
- @Override
- public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException,
- InterruptedException {
- trr.initialize(inputsplit, context);
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- return trr.nextKeyValue();
- }
- };
- }
-
- protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
- return getRegionLocator().getStartEndKeys();
- }
-
- /**
- * Calculates the splits that will serve as input for the map tasks. The
- * number of splits matches the number of regions in a table.
- *
- * @param context The current job context.
- * @return The list of input splits.
- * @throws IOException When creating the list of splits fails.
- * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
- * org.apache.hadoop.mapreduce.JobContext)
- */
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- boolean closeOnFinish = false;
-
- // Just in case a subclass is relying on JobConfigurable magic.
- if (table == null) {
- initialize(context);
- closeOnFinish = true;
- }
-
- // null check in case our child overrides getTable to not throw.
- try {
- if (getTable() == null) {
- // initialize() must not have been implemented in the subclass.
- throw new IOException(INITIALIZATION_ERROR);
- }
- } catch (IllegalStateException exception) {
- throw new IOException(INITIALIZATION_ERROR, exception);
- }
-
- try {
- RegionSizeCalculator sizeCalculator =
- new RegionSizeCalculator(getRegionLocator(), getAdmin());
-
- TableName tableName = getTable().getName();
-
- Pair<byte[][], byte[][]> keys = getStartEndKeys();
- if (keys == null || keys.getFirst() == null ||
- keys.getFirst().length == 0) {
- HRegionLocation regLoc =
- getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
- if (null == regLoc) {
- throw new IOException("Expecting at least one region.");
- }
- List<InputSplit> splits = new ArrayList<>(1);
- long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
- TableSplit split = new TableSplit(tableName, scan,
- HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
- .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
- splits.add(split);
- return splits;
- }
- List<InputSplit> splits = new ArrayList<>(keys.getFirst().length);
- for (int i = 0; i < keys.getFirst().length; i++) {
- if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
- continue;
- }
-
- byte[] startRow = scan.getStartRow();
- byte[] stopRow = scan.getStopRow();
- // determine if the given start an stop key fall into the region
- if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
- Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
- (stopRow.length == 0 ||
- Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
- byte[] splitStart = startRow.length == 0 ||
- Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
- keys.getFirst()[i] : startRow;
- byte[] splitStop = (stopRow.length == 0 ||
- Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
- keys.getSecond()[i].length > 0 ?
- keys.getSecond()[i] : stopRow;
-
- HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
- // The below InetSocketAddress creation does a name resolution.
- InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
- if (isa.isUnresolved()) {
- LOG.warn("Failed resolve " + isa);
- }
- InetAddress regionAddress = isa.getAddress();
- String regionLocation;
- regionLocation = reverseDNS(regionAddress);
-
- byte[] regionName = location.getRegionInfo().getRegionName();
- String encodedRegionName = location.getRegionInfo().getEncodedName();
- long regionSize = sizeCalculator.getRegionSize(regionName);
- TableSplit split = new TableSplit(tableName, scan,
- splitStart, splitStop, regionLocation, encodedRegionName, regionSize);
- splits.add(split);
- if (LOG.isDebugEnabled()) {
- LOG.debug("getSplits: split -> " + i + " -> " + split);
- }
- }
- }
- //The default value of "hbase.mapreduce.input.autobalance" is false, which means not enabled.
- boolean enableAutoBalance = context.getConfiguration()
- .getBoolean(MAPREDUCE_INPUT_AUTOBALANCE, false);
- if (enableAutoBalance) {
- long totalRegionSize=0;
- for (int i = 0; i < splits.size(); i++){
- TableSplit ts = (TableSplit)splits.get(i);
- totalRegionSize += ts.getLength();
- }
- long averageRegionSize = totalRegionSize / splits.size();
- // the averageRegionSize must be positive.
- if (averageRegionSize <= 0) {
- LOG.warn("The averageRegionSize is not positive: "+ averageRegionSize + ", " +
- "set it to 1.");
- averageRegionSize = 1;
- }
- return calculateRebalancedSplits(splits, context, averageRegionSize);
- } else {
- return splits;
- }
- } finally {
- if (closeOnFinish) {
- closeTable();
- }
- }
- }
-
- String reverseDNS(InetAddress ipAddress) throws UnknownHostException {
- String hostName = this.reverseDNSCacheMap.get(ipAddress);
- if (hostName == null) {
- String ipAddressString = null;
- try {
- ipAddressString = DNS.reverseDns(ipAddress, null);
- } catch (Exception e) {
- // We can use InetAddress in case the jndi failed to pull up the reverse DNS entry from the
- // name service. Also, in case of ipv6, we need to use the InetAddress since resolving
- // reverse DNS using jndi doesn't work well with ipv6 addresses.
- ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
- }
- if (ipAddressString == null) throw new UnknownHostException("No host found for " + ipAddress);
- hostName = Strings.domainNamePointerToHostName(ipAddressString);
- this.reverseDNSCacheMap.put(ipAddress, hostName);
- }
- return hostName;
- }
-
- /**
- * Calculates the number of MapReduce input splits for the map tasks. The number of
- * MapReduce input splits depends on the average region size and the "data skew ratio" user set in
- * configuration.
- *
- * @param list The list of input splits before balance.
- * @param context The current job context.
- * @param average The average size of all regions .
- * @return The list of input splits.
- * @throws IOException When creating the list of splits fails.
- * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
- * org.apache.hadoop.mapreduce.JobContext)
- */
- private List<InputSplit> calculateRebalancedSplits(List<InputSplit> list, JobContext context,
- long average) throws IOException {
- List<InputSplit> resultList = new ArrayList<>();
- Configuration conf = context.getConfiguration();
- //The default data skew ratio is 3
- long dataSkewRatio = conf.getLong(INPUT_AUTOBALANCE_MAXSKEWRATIO, 3);
- //It determines which mode to use: text key mode or binary key mode. The default is text mode.
- boolean isTextKey = context.getConfiguration().getBoolean(TABLE_ROW_TEXTKEY, true);
- long dataSkewThreshold = dataSkewRatio * average;
- int count = 0;
- while (count < list.size()) {
- TableSplit ts = (TableSplit)list.get(count);
- TableName tableName = ts.getTable();
- String regionLocation = ts.getRegionLocation();
- String encodedRegionName = ts.getEncodedRegionName();
- long regionSize = ts.getLength();
- if (regionSize >= dataSkewThreshold) {
- // if the current region size is large than the data skew threshold,
- // split the region into two MapReduce input splits.
- byte[] splitKey = getSplitKey(ts.getStartRow(), ts.getEndRow(), isTextKey);
- if (Arrays.equals(ts.getEndRow(), splitKey)) {
- // Not splitting since the end key is the same as the split key
- resultList.add(ts);
- } else {
- //Set the size of child TableSplit as 1/2 of the region size. The exact size of the
- // MapReduce input splits is not far off.
- TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), splitKey,
- regionLocation, regionSize / 2);
- TableSplit t2 = new TableSplit(tableName, scan, splitKey, ts.getEndRow(), regionLocation,
- regionSize - regionSize / 2);
- resultList.add(t1);
- resultList.add(t2);
- }
- count++;
- } else if (regionSize >= average) {
- // if the region size between average size and data skew threshold size,
- // make this region as one MapReduce input split.
- resultList.add(ts);
- count++;
- } else {
- // if the total size of several small continuous regions less than the average region size,
- // combine them into one MapReduce input split.
- long totalSize = regionSize;
- byte[] splitStartKey = ts.getStartRow();
- byte[] splitEndKey = ts.getEndRow();
- count++;
- for (; count < list.size(); count++) {
- TableSplit nextRegion = (TableSplit)list.get(count);
- long nextRegionSize = nextRegion.getLength();
- if (totalSize + nextRegionSize <= dataSkewThreshold) {
- totalSize = totalSize + nextRegionSize;
- splitEndKey = nextRegion.getEndRow();
- } else {
- break;
- }
- }
- TableSplit t = new TableSplit(tableName, scan, splitStartKey, splitEndKey,
- regionLocation, encodedRegionName, totalSize);
- resultList.add(t);
- }
- }
- return resultList;
- }
-
- /**
- * select a split point in the region. The selection of the split point is based on an uniform
- * distribution assumption for the keys in a region.
- * Here are some examples:
- *
- * <table>
- * <tr>
- * <th>start key</th>
- * <th>end key</th>
- * <th>is text</th>
- * <th>split point</th>
- * </tr>
- * <tr>
- * <td>'a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'</td>
- * <td>'a', 'a', 'a', 'f', 'f', 'f'</td>
- * <td>true</td>
- * <td>'a', 'a', 'a', 'd', 'd', -78, 50, -77, 51</td>
- * </tr>
- * <tr>
- * <td>'1', '1', '1', '0', '0', '0'</td>
- * <td>'1', '1', '2', '5', '7', '9', '0'</td>
- * <td>true</td>
- * <td>'1', '1', '1', -78, -77, -76, -104</td>
- * </tr>
- * <tr>
- * <td>'1', '1', '1', '0'</td>
- * <td>'1', '1', '2', '0'</td>
- * <td>true</td>
- * <td>'1', '1', '1', -80</td>
- * </tr>
- * <tr>
- * <td>13, -19, 126, 127</td>
- * <td>13, -19, 127, 0</td>
- * <td>false</td>
- * <td>13, -19, 126, -65</td>
- * </tr>
- * </table>
- *
- * Set this function as "public static", make it easier for test.
- *
- * @param start Start key of the region
- * @param end End key of the region
- * @param isText It determines to use text key mode or binary key mode
- * @return The split point in the region.
- */
- @InterfaceAudience.Private
- public static byte[] getSplitKey(byte[] start, byte[] end, boolean isText) {
- byte upperLimitByte;
- byte lowerLimitByte;
- //Use text mode or binary mode.
- if (isText) {
- //The range of text char set in ASCII is [32,126], the lower limit is space and the upper
- // limit is '~'.
- upperLimitByte = '~';
- lowerLimitByte = ' ';
- } else {
- upperLimitByte = -1;
- lowerLimitByte = 0;
- }
- // For special case
- // Example 1 : startkey=null, endkey="hhhqqqwww", splitKey="h"
- // Example 2 (text key mode): startKey="ffffaaa", endKey=null, splitkey="f~~~~~~"
- if (start.length == 0 && end.length == 0){
- return new byte[]{(byte) ((lowerLimitByte + upperLimitByte) / 2)};
- }
- if (start.length == 0 && end.length != 0){
- return new byte[]{ end[0] };
- }
- if (start.length != 0 && end.length == 0){
- byte[] result =new byte[start.length];
- result[0]=start[0];
- for (int k = 1; k < start.length; k++){
- result[k] = upperLimitByte;
- }
- return result;
- }
- return Bytes.split(start, end, false, 1)[1];
- }
-
- /**
- * Test if the given region is to be included in the InputSplit while splitting
- * the regions of a table.
- * <p>
- * This optimization is effective when there is a specific reasoning to exclude an entire region from the M-R job,
- * (and hence, not contributing to the InputSplit), given the start and end keys of the same. <br>
- * Useful when we need to remember the last-processed top record and revisit the [last, current) interval for M-R processing,
- * continuously. In addition to reducing InputSplits, reduces the load on the region server as well, due to the ordering of the keys.
- * <br>
- * <br>
- * Note: It is possible that <code>endKey.length() == 0 </code> , for the last (recent) region.
- * <br>
- * Override this method, if you want to bulk exclude regions altogether from M-R. By default, no region is excluded( i.e. all regions are included).
- *
- *
- * @param startKey Start key of the region
- * @param endKey End key of the region
- * @return true, if this region needs to be included as part of the input (default).
- *
- */
- protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
- return true;
- }
-
- /**
- * Allows subclasses to get the {@link RegionLocator}.
- */
- protected RegionLocator getRegionLocator() {
- if (regionLocator == null) {
- throw new IllegalStateException(NOT_INITIALIZED);
- }
- return regionLocator;
- }
-
- /**
- * Allows subclasses to get the {@link Table}.
- */
- protected Table getTable() {
- if (table == null) {
- throw new IllegalStateException(NOT_INITIALIZED);
- }
- return table;
- }
-
- /**
- * Allows subclasses to get the {@link Admin}.
- */
- protected Admin getAdmin() {
- if (admin == null) {
- throw new IllegalStateException(NOT_INITIALIZED);
- }
- return admin;
- }
-
- /**
- * Allows subclasses to initialize the table information.
- *
- * @param connection The Connection to the HBase cluster. MUST be unmanaged. We will close.
- * @param tableName The {@link TableName} of the table to process.
- * @throws IOException
- */
- protected void initializeTable(Connection connection, TableName tableName) throws IOException {
- if (this.table != null || this.connection != null) {
- LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
- "reference; TableInputFormatBase will not close these old references when done.");
- }
- this.table = connection.getTable(tableName);
- this.regionLocator = connection.getRegionLocator(tableName);
- this.admin = connection.getAdmin();
- this.connection = connection;
- }
-
- /**
- * Gets the scan defining the actual details like columns etc.
- *
- * @return The internal scan instance.
- */
- public Scan getScan() {
- if (this.scan == null) this.scan = new Scan();
- return scan;
- }
-
- /**
- * Sets the scan defining the actual details like columns etc.
- *
- * @param scan The scan to set.
- */
- public void setScan(Scan scan) {
- this.scan = scan;
- }
-
- /**
- * Allows subclasses to set the {@link TableRecordReader}.
- *
- * @param tableRecordReader A different {@link TableRecordReader}
- * implementation.
- */
- protected void setTableRecordReader(TableRecordReader tableRecordReader) {
- this.tableRecordReader = tableRecordReader;
- }
-
- /**
- * Handle subclass specific set up.
- * Each of the entry points used by the MapReduce framework,
- * {@link #createRecordReader(InputSplit, TaskAttemptContext)} and {@link #getSplits(JobContext)},
- * will call {@link #initialize(JobContext)} as a convenient centralized location to handle
- * retrieving the necessary configuration information and calling
- * {@link #initializeTable(Connection, TableName)}.
- *
- * Subclasses should implement their initialize call such that it is safe to call multiple times.
- * The current TableInputFormatBase implementation relies on a non-null table reference to decide
- * if an initialize call is needed, but this behavior may change in the future. In particular,
- * it is critical that initializeTable not be called multiple times since this will leak
- * Connection instances.
- *
- */
- protected void initialize(JobContext context) throws IOException {
- }
-
- /**
- * Close the Table and related objects that were initialized via
- * {@link #initializeTable(Connection, TableName)}.
- *
- * @throws IOException
- */
- protected void closeTable() throws IOException {
- close(admin, table, regionLocator, connection);
- admin = null;
- table = null;
- regionLocator = null;
- connection = null;
- }
-
- private void close(Closeable... closables) throws IOException {
- for (Closeable c : closables) {
- if(c != null) { c.close(); }
- }
- }
-
-}
[18/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
deleted file mode 100644
index 7fea254..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java
+++ /dev/null
@@ -1,902 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.InetSocketAddress;
-import java.net.URLDecoder;
-import java.net.URLEncoder;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.UUID;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.fs.HFileSystem;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileContext;
-import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.regionserver.BloomType;
-import org.apache.hadoop.hbase.regionserver.HStore;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
-import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Writes HFiles. Passed Cells must arrive in order.
- * Writes current time as the sequence id for the file. Sets the major compacted
- * attribute on created @{link {@link HFile}s. Calling write(null,null) will forcibly roll
- * all HFiles being written.
- * <p>
- * Using this class as part of a MapReduce job is best done
- * using {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}.
- */
-@InterfaceAudience.Public
-public class HFileOutputFormat2
- extends FileOutputFormat<ImmutableBytesWritable, Cell> {
- private static final Log LOG = LogFactory.getLog(HFileOutputFormat2.class);
- static class TableInfo {
- private TableDescriptor tableDesctiptor;
- private RegionLocator regionLocator;
-
- public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) {
- this.tableDesctiptor = tableDesctiptor;
- this.regionLocator = regionLocator;
- }
-
- /**
- * The modification for the returned HTD doesn't affect the inner TD.
- * @return A clone of inner table descriptor
- * @deprecated use {@link #getTableDescriptor}
- */
- @Deprecated
- public HTableDescriptor getHTableDescriptor() {
- return new HTableDescriptor(tableDesctiptor);
- }
-
- public TableDescriptor getTableDescriptor() {
- return tableDesctiptor;
- }
-
- public RegionLocator getRegionLocator() {
- return regionLocator;
- }
- }
-
- protected static final byte[] tableSeparator = ";".getBytes(StandardCharsets.UTF_8);
-
- protected static byte[] combineTableNameSuffix(byte[] tableName,
- byte[] suffix ) {
- return Bytes.add(tableName, tableSeparator, suffix);
- }
-
- // The following constants are private since these are used by
- // HFileOutputFormat2 to internally transfer data between job setup and
- // reducer run using conf.
- // These should not be changed by the client.
- static final String COMPRESSION_FAMILIES_CONF_KEY =
- "hbase.hfileoutputformat.families.compression";
- static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
- "hbase.hfileoutputformat.families.bloomtype";
- static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
- "hbase.mapreduce.hfileoutputformat.blocksize";
- static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
- "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
-
- // This constant is public since the client can modify this when setting
- // up their conf object and thus refer to this symbol.
- // It is present for backwards compatibility reasons. Use it only to
- // override the auto-detection of datablock encoding.
- public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
- "hbase.mapreduce.hfileoutputformat.datablock.encoding";
-
- /**
- * Keep locality while generating HFiles for bulkload. See HBASE-12596
- */
- public static final String LOCALITY_SENSITIVE_CONF_KEY =
- "hbase.bulkload.locality.sensitive.enabled";
- private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
- static final String OUTPUT_TABLE_NAME_CONF_KEY =
- "hbase.mapreduce.hfileoutputformat.table.name";
- static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY =
- "hbase.mapreduce.use.multi.table.hfileoutputformat";
-
- public static final String STORAGE_POLICY_PROPERTY = "hbase.hstore.storagepolicy";
- public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
-
- @Override
- public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
- final TaskAttemptContext context) throws IOException, InterruptedException {
- return createRecordWriter(context);
- }
-
- protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) {
- return combineTableNameSuffix(tableName, family);
- }
-
- static <V extends Cell> RecordWriter<ImmutableBytesWritable, V>
- createRecordWriter(final TaskAttemptContext context)
- throws IOException {
-
- // Get the path of the temporary output file
- final Path outputPath = FileOutputFormat.getOutputPath(context);
- final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
- final Configuration conf = context.getConfiguration();
- final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
- final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
- if (writeTableNames==null || writeTableNames.isEmpty()) {
- throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY
- + " cannot be empty");
- }
- final FileSystem fs = outputDir.getFileSystem(conf);
- // These configs. are from hbase-*.xml
- final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
- HConstants.DEFAULT_MAX_FILE_SIZE);
- // Invented config. Add to hbase-*.xml if other than default compression.
- final String defaultCompressionStr = conf.get("hfile.compression",
- Compression.Algorithm.NONE.getName());
- final Algorithm defaultCompression = HFileWriterImpl
- .compressionByName(defaultCompressionStr);
- final boolean compactionExclude = conf.getBoolean(
- "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
-
- final Set<String> allTableNames = Arrays.stream(writeTableNames.split(
- Bytes.toString(tableSeparator))).collect(Collectors.toSet());
-
- // create a map from column family to the compression algorithm
- final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
- final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
- final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
-
- String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
- final Map<byte[], DataBlockEncoding> datablockEncodingMap
- = createFamilyDataBlockEncodingMap(conf);
- final DataBlockEncoding overriddenEncoding;
- if (dataBlockEncodingStr != null) {
- overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
- } else {
- overriddenEncoding = null;
- }
-
- return new RecordWriter<ImmutableBytesWritable, V>() {
- // Map of families to writers and how much has been output on the writer.
- private final Map<byte[], WriterLength> writers =
- new TreeMap<>(Bytes.BYTES_COMPARATOR);
- private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
- private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTime());
- private boolean rollRequested = false;
-
- @Override
- public void write(ImmutableBytesWritable row, V cell)
- throws IOException {
- KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
-
- // null input == user explicitly wants to flush
- if (row == null && kv == null) {
- rollWriters();
- return;
- }
-
- byte[] rowKey = CellUtil.cloneRow(kv);
- long length = kv.getLength();
- byte[] family = CellUtil.cloneFamily(kv);
- byte[] tableNameBytes = null;
- if (writeMultipleTables) {
- tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get());
- if (!allTableNames.contains(Bytes.toString(tableNameBytes))) {
- throw new IllegalArgumentException("TableName '" + Bytes.toString(tableNameBytes) +
- "' not" + " expected");
- }
- } else {
- tableNameBytes = writeTableNames.getBytes(StandardCharsets.UTF_8);
- }
- byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
- WriterLength wl = this.writers.get(tableAndFamily);
-
- // If this is a new column family, verify that the directory exists
- if (wl == null) {
- Path writerPath = null;
- if (writeMultipleTables) {
- writerPath = new Path(outputDir, new Path(Bytes.toString(tableNameBytes), Bytes
- .toString(family)));
- }
- else {
- writerPath = new Path(outputDir, Bytes.toString(family));
- }
- fs.mkdirs(writerPath);
- configureStoragePolicy(conf, fs, tableAndFamily, writerPath);
- }
-
- // If any of the HFiles for the column families has reached
- // maxsize, we need to roll all the writers
- if (wl != null && wl.written + length >= maxsize) {
- this.rollRequested = true;
- }
-
- // This can only happen once a row is finished though
- if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
- rollWriters();
- }
-
- // create a new WAL writer, if necessary
- if (wl == null || wl.writer == null) {
- if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
- HRegionLocation loc = null;
-
- String tableName = Bytes.toString(tableNameBytes);
- if (tableName != null) {
- try (Connection connection = ConnectionFactory.createConnection(conf);
- RegionLocator locator =
- connection.getRegionLocator(TableName.valueOf(tableName))) {
- loc = locator.getRegionLocation(rowKey);
- } catch (Throwable e) {
- LOG.warn("There's something wrong when locating rowkey: " +
- Bytes.toString(rowKey) + " for tablename: " + tableName, e);
- loc = null;
- } }
-
- if (null == loc) {
- if (LOG.isTraceEnabled()) {
- LOG.trace("failed to get region location, so use default writer for rowkey: " +
- Bytes.toString(rowKey));
- }
- wl = getNewWriter(tableNameBytes, family, conf, null);
- } else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("first rowkey: [" + Bytes.toString(rowKey) + "]");
- }
- InetSocketAddress initialIsa =
- new InetSocketAddress(loc.getHostname(), loc.getPort());
- if (initialIsa.isUnresolved()) {
- if (LOG.isTraceEnabled()) {
- LOG.trace("failed to resolve bind address: " + loc.getHostname() + ":"
- + loc.getPort() + ", so use default writer");
- }
- wl = getNewWriter(tableNameBytes, family, conf, null);
- } else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("use favored nodes writer: " + initialIsa.getHostString());
- }
- wl = getNewWriter(tableNameBytes, family, conf, new InetSocketAddress[] { initialIsa
- });
- }
- }
- } else {
- wl = getNewWriter(tableNameBytes, family, conf, null);
- }
- }
-
- // we now have the proper WAL writer. full steam ahead
- kv.updateLatestStamp(this.now);
- wl.writer.append(kv);
- wl.written += length;
-
- // Copy the row so we know when a row transition.
- this.previousRow = rowKey;
- }
-
- private void rollWriters() throws IOException {
- for (WriterLength wl : this.writers.values()) {
- if (wl.writer != null) {
- LOG.info(
- "Writer=" + wl.writer.getPath() + ((wl.written == 0)? "": ", wrote=" + wl.written));
- close(wl.writer);
- }
- wl.writer = null;
- wl.written = 0;
- }
- this.rollRequested = false;
- }
-
- /*
- * Create a new StoreFile.Writer.
- * @param family
- * @return A WriterLength, containing a new StoreFile.Writer.
- * @throws IOException
- */
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
- justification="Not important")
- private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration
- conf, InetSocketAddress[] favoredNodes) throws IOException {
- byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
- Path familydir = new Path(outputDir, Bytes.toString(family));
- if (writeMultipleTables) {
- familydir = new Path(outputDir,
- new Path(Bytes.toString(tableName), Bytes.toString(family)));
- }
- WriterLength wl = new WriterLength();
- Algorithm compression = compressionMap.get(tableAndFamily);
- compression = compression == null ? defaultCompression : compression;
- BloomType bloomType = bloomTypeMap.get(tableAndFamily);
- bloomType = bloomType == null ? BloomType.NONE : bloomType;
- Integer blockSize = blockSizeMap.get(tableAndFamily);
- blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
- DataBlockEncoding encoding = overriddenEncoding;
- encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding;
- encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
- Configuration tempConf = new Configuration(conf);
- tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
- HFileContextBuilder contextBuilder = new HFileContextBuilder()
- .withCompression(compression)
- .withChecksumType(HStore.getChecksumType(conf))
- .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
- .withBlockSize(blockSize);
-
- if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
- contextBuilder.withIncludesTags(true);
- }
-
- contextBuilder.withDataBlockEncoding(encoding);
- HFileContext hFileContext = contextBuilder.build();
- if (null == favoredNodes) {
- wl.writer =
- new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), fs)
- .withOutputDir(familydir).withBloomType(bloomType)
- .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext).build();
- } else {
- wl.writer =
- new StoreFileWriter.Builder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
- .withOutputDir(familydir).withBloomType(bloomType)
- .withComparator(CellComparator.COMPARATOR).withFileContext(hFileContext)
- .withFavoredNodes(favoredNodes).build();
- }
-
- this.writers.put(tableAndFamily, wl);
- return wl;
- }
-
- private void close(final StoreFileWriter w) throws IOException {
- if (w != null) {
- w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
- Bytes.toBytes(System.currentTimeMillis()));
- w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
- Bytes.toBytes(context.getTaskAttemptID().toString()));
- w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
- Bytes.toBytes(true));
- w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
- Bytes.toBytes(compactionExclude));
- w.appendTrackedTimestampsToMetadata();
- w.close();
- }
- }
-
- @Override
- public void close(TaskAttemptContext c)
- throws IOException, InterruptedException {
- for (WriterLength wl: this.writers.values()) {
- close(wl.writer);
- }
- }
- };
- }
-
- /**
- * Configure block storage policy for CF after the directory is created.
- */
- static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
- byte[] tableAndFamily, Path cfPath) {
- if (null == conf || null == fs || null == tableAndFamily || null == cfPath) {
- return;
- }
-
- String policy =
- conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily),
- conf.get(STORAGE_POLICY_PROPERTY));
- FSUtils.setStoragePolicy(fs, cfPath, policy);
- }
-
- /*
- * Data structure to hold a Writer and amount of data written on it.
- */
- static class WriterLength {
- long written = 0;
- StoreFileWriter writer = null;
- }
-
- /**
- * Return the start keys of all of the regions in this table,
- * as a list of ImmutableBytesWritable.
- */
- private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators,
- boolean writeMultipleTables)
- throws IOException {
-
- ArrayList<ImmutableBytesWritable> ret = new ArrayList<>();
- for(RegionLocator regionLocator : regionLocators)
- {
- TableName tableName = regionLocator.getName();
- LOG.info("Looking up current regions for table " + tableName);
- byte[][] byteKeys = regionLocator.getStartKeys();
- for (byte[] byteKey : byteKeys) {
- byte[] fullKey = byteKey; //HFileOutputFormat2 use case
- if (writeMultipleTables)
- {
- //MultiTableHFileOutputFormat use case
- fullKey = combineTableNameSuffix(tableName.getName(), byteKey);
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("SplitPoint startkey for table [" + tableName + "]: [" + Bytes.toStringBinary
- (fullKey) + "]");
- }
- ret.add(new ImmutableBytesWritable(fullKey));
- }
- }
- return ret;
- }
-
- /**
- * Write out a {@link SequenceFile} that can be read by
- * {@link TotalOrderPartitioner} that contains the split points in startKeys.
- */
- @SuppressWarnings("deprecation")
- private static void writePartitions(Configuration conf, Path partitionsPath,
- List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException {
- LOG.info("Writing partition information to " + partitionsPath);
- if (startKeys.isEmpty()) {
- throw new IllegalArgumentException("No regions passed");
- }
-
- // We're generating a list of split points, and we don't ever
- // have keys < the first region (which has an empty start key)
- // so we need to remove it. Otherwise we would end up with an
- // empty reducer with index 0
- TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
- ImmutableBytesWritable first = sorted.first();
- if (writeMultipleTables) {
- first = new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first
- ().get()));
- }
- if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
- throw new IllegalArgumentException(
- "First region of table should have empty start key. Instead has: "
- + Bytes.toStringBinary(first.get()));
- }
- sorted.remove(sorted.first());
-
- // Write the actual file
- FileSystem fs = partitionsPath.getFileSystem(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(
- fs, conf, partitionsPath, ImmutableBytesWritable.class,
- NullWritable.class);
-
- try {
- for (ImmutableBytesWritable startKey : sorted) {
- writer.append(startKey, NullWritable.get());
- }
- } finally {
- writer.close();
- }
- }
-
- /**
- * Configure a MapReduce Job to perform an incremental load into the given
- * table. This
- * <ul>
- * <li>Inspects the table to configure a total order partitioner</li>
- * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
- * <li>Sets the number of reduce tasks to match the current number of regions</li>
- * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
- * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
- * PutSortReducer)</li>
- * </ul>
- * The user should be sure to set the map output value class to either KeyValue or Put before
- * running this function.
- */
- public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
- throws IOException {
- configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
- }
-
- /**
- * Configure a MapReduce Job to perform an incremental load into the given
- * table. This
- * <ul>
- * <li>Inspects the table to configure a total order partitioner</li>
- * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
- * <li>Sets the number of reduce tasks to match the current number of regions</li>
- * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
- * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
- * PutSortReducer)</li>
- * </ul>
- * The user should be sure to set the map output value class to either KeyValue or Put before
- * running this function.
- */
- public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor,
- RegionLocator regionLocator) throws IOException {
- ArrayList<TableInfo> singleTableInfo = new ArrayList<>();
- singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator));
- configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class);
- }
-
- static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, Class<? extends OutputFormat<?, ?>> cls) throws IOException {
- Configuration conf = job.getConfiguration();
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(KeyValue.class);
- job.setOutputFormatClass(cls);
-
- if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) {
- throw new IllegalArgumentException("Duplicate entries found in TableInfo argument");
- }
- boolean writeMultipleTables = false;
- if (MultiTableHFileOutputFormat.class.equals(cls)) {
- writeMultipleTables = true;
- conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true);
- }
- // Based on the configured map output class, set the correct reducer to properly
- // sort the incoming values.
- // TODO it would be nice to pick one or the other of these formats.
- if (KeyValue.class.equals(job.getMapOutputValueClass())) {
- job.setReducerClass(KeyValueSortReducer.class);
- } else if (Put.class.equals(job.getMapOutputValueClass())) {
- job.setReducerClass(PutSortReducer.class);
- } else if (Text.class.equals(job.getMapOutputValueClass())) {
- job.setReducerClass(TextSortReducer.class);
- } else {
- LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
- }
-
- conf.setStrings("io.serializations", conf.get("io.serializations"),
- MutationSerialization.class.getName(), ResultSerialization.class.getName(),
- KeyValueSerialization.class.getName());
-
- if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
- LOG.info("bulkload locality sensitive enabled");
- }
-
- /* Now get the region start keys for every table required */
- List<String> allTableNames = new ArrayList<>(multiTableInfo.size());
- List<RegionLocator> regionLocators = new ArrayList<>( multiTableInfo.size());
- List<TableDescriptor> tableDescriptors = new ArrayList<>( multiTableInfo.size());
-
- for( TableInfo tableInfo : multiTableInfo )
- {
- regionLocators.add(tableInfo.getRegionLocator());
- allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString());
- tableDescriptors.add(tableInfo.getTableDescriptor());
- }
- // Record tablenames for creating writer by favored nodes, and decoding compression, block size and other attributes of columnfamily per table
- conf.set(OUTPUT_TABLE_NAME_CONF_KEY, StringUtils.join(allTableNames, Bytes
- .toString(tableSeparator)));
- List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocators, writeMultipleTables);
- // Use table's region boundaries for TOP split points.
- LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
- "to match current region count for all tables");
- job.setNumReduceTasks(startKeys.size());
-
- configurePartitioner(job, startKeys, writeMultipleTables);
- // Set compression algorithms based on column families
-
- conf.set(COMPRESSION_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(compressionDetails,
- tableDescriptors));
- conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(blockSizeDetails,
- tableDescriptors));
- conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomTypeDetails,
- tableDescriptors));
- conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
- serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors));
-
- TableMapReduceUtil.addDependencyJars(job);
- TableMapReduceUtil.initCredentials(job);
- LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
- }
-
- public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
- IOException {
- Configuration conf = job.getConfiguration();
-
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(KeyValue.class);
- job.setOutputFormatClass(HFileOutputFormat2.class);
-
- ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
- singleTableDescriptor.add(tableDescriptor);
-
- conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
- // Set compression algorithms based on column families
- conf.set(COMPRESSION_FAMILIES_CONF_KEY,
- serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
- conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
- serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
- conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
- serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
- conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
- serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
-
- TableMapReduceUtil.addDependencyJars(job);
- TableMapReduceUtil.initCredentials(job);
- LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
- }
-
- /**
- * Runs inside the task to deserialize column family to compression algorithm
- * map from the configuration.
- *
- * @param conf to read the serialized values from
- * @return a map from column family to the configured compression algorithm
- */
- @VisibleForTesting
- static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
- conf) {
- Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
- COMPRESSION_FAMILIES_CONF_KEY);
- Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
- Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
- compressionMap.put(e.getKey(), algorithm);
- }
- return compressionMap;
- }
-
- /**
- * Runs inside the task to deserialize column family to bloom filter type
- * map from the configuration.
- *
- * @param conf to read the serialized values from
- * @return a map from column family to the the configured bloom filter type
- */
- @VisibleForTesting
- static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
- Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
- BLOOM_TYPE_FAMILIES_CONF_KEY);
- Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
- BloomType bloomType = BloomType.valueOf(e.getValue());
- bloomTypeMap.put(e.getKey(), bloomType);
- }
- return bloomTypeMap;
- }
-
- /**
- * Runs inside the task to deserialize column family to block size
- * map from the configuration.
- *
- * @param conf to read the serialized values from
- * @return a map from column family to the configured block size
- */
- @VisibleForTesting
- static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
- Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
- BLOCK_SIZE_FAMILIES_CONF_KEY);
- Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
- Integer blockSize = Integer.parseInt(e.getValue());
- blockSizeMap.put(e.getKey(), blockSize);
- }
- return blockSizeMap;
- }
-
- /**
- * Runs inside the task to deserialize column family to data block encoding
- * type map from the configuration.
- *
- * @param conf to read the serialized values from
- * @return a map from column family to HFileDataBlockEncoder for the
- * configured data block type for the family
- */
- @VisibleForTesting
- static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
- Configuration conf) {
- Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
- DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
- Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
- encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
- }
- return encoderMap;
- }
-
-
- /**
- * Run inside the task to deserialize column family to given conf value map.
- *
- * @param conf to read the serialized values from
- * @param confName conf key to read from the configuration
- * @return a map of column family to the given configuration value
- */
- private static Map<byte[], String> createFamilyConfValueMap(
- Configuration conf, String confName) {
- Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
- String confVal = conf.get(confName, "");
- for (String familyConf : confVal.split("&")) {
- String[] familySplit = familyConf.split("=");
- if (familySplit.length != 2) {
- continue;
- }
- try {
- confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(StandardCharsets.UTF_8),
- URLDecoder.decode(familySplit[1], "UTF-8"));
- } catch (UnsupportedEncodingException e) {
- // will not happen with UTF-8 encoding
- throw new AssertionError(e);
- }
- }
- return confValMap;
- }
-
- /**
- * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
- * <code>splitPoints</code>. Cleans up the partitions file after job exists.
- */
- static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
- writeMultipleTables)
- throws IOException {
- Configuration conf = job.getConfiguration();
- // create the partitions file
- FileSystem fs = FileSystem.get(conf);
- String hbaseTmpFsDir =
- conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
- HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
- Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
- fs.makeQualified(partitionsPath);
- writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
- fs.deleteOnExit(partitionsPath);
-
- // configure job to use it
- job.setPartitionerClass(TotalOrderPartitioner.class);
- TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
- }
-
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
- @VisibleForTesting
- static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, List<TableDescriptor> allTables)
- throws UnsupportedEncodingException {
- StringBuilder attributeValue = new StringBuilder();
- int i = 0;
- for (TableDescriptor tableDescriptor : allTables) {
- if (tableDescriptor == null) {
- // could happen with mock table instance
- // CODEREVIEW: Can I set an empty string in conf if mock table instance?
- return "";
- }
- for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) {
- if (i++ > 0) {
- attributeValue.append('&');
- }
- attributeValue.append(URLEncoder.encode(
- Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), familyDescriptor.getName())),
- "UTF-8"));
- attributeValue.append('=');
- attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8"));
- }
- }
- // Get rid of the last ampersand
- return attributeValue.toString();
- }
-
- /**
- * Serialize column family to compression algorithm map to configuration.
- * Invoked while configuring the MR job for incremental load.
- *
- * @param tableDescriptor to read the properties from
- * @param conf to persist serialized values into
- * @throws IOException
- * on failure to read column family descriptors
- */
- @VisibleForTesting
- static Function<ColumnFamilyDescriptor, String> compressionDetails = familyDescriptor ->
- familyDescriptor.getCompressionType().getName();
-
- /**
- * Serialize column family to block size map to configuration. Invoked while
- * configuring the MR job for incremental load.
- *
- * @param tableDescriptor
- * to read the properties from
- * @param conf
- * to persist serialized values into
- *
- * @throws IOException
- * on failure to read column family descriptors
- */
- @VisibleForTesting
- static Function<ColumnFamilyDescriptor, String> blockSizeDetails = familyDescriptor -> String
- .valueOf(familyDescriptor.getBlocksize());
-
- /**
- * Serialize column family to bloom type map to configuration. Invoked while
- * configuring the MR job for incremental load.
- *
- * @param tableDescriptor
- * to read the properties from
- * @param conf
- * to persist serialized values into
- *
- * @throws IOException
- * on failure to read column family descriptors
- */
- @VisibleForTesting
- static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> {
- String bloomType = familyDescriptor.getBloomFilterType().toString();
- if (bloomType == null) {
- bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name();
- }
- return bloomType;
- };
-
- /**
- * Serialize column family to data block encoding map to configuration.
- * Invoked while configuring the MR job for incremental load.
- *
- * @param tableDescriptor
- * to read the properties from
- * @param conf
- * to persist serialized values into
- * @throws IOException
- * on failure to read column family descriptors
- */
- @VisibleForTesting
- static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> {
- DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
- if (encoding == null) {
- encoding = DataBlockEncoding.NONE;
- }
- return encoding.toString();
- };
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
deleted file mode 100644
index 3475a48..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HRegionPartitioner.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapred.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * This is used to partition the output keys into groups of keys.
- * Keys are grouped according to the regions that currently exist
- * so that each reducer fills a single region so load is distributed.
- *
- * <p>This class is not suitable as partitioner creating hfiles
- * for incremental bulk loads as region spread will likely change between time of
- * hfile creation and load time. See {@link LoadIncrementalHFiles}
- * and <a href="http://hbase.apache.org/book.html#arch.bulk.load">Bulk Load</a>.
- *
- * @param <KEY> The type of the key.
- * @param <VALUE> The type of the value.
- */
-@InterfaceAudience.Public
-public class HRegionPartitioner<KEY, VALUE>
-extends Partitioner<ImmutableBytesWritable, VALUE>
-implements Configurable {
-
- private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
- private Configuration conf = null;
- // Connection and locator are not cleaned up; they just die when partitioner is done.
- private Connection connection;
- private RegionLocator locator;
- private byte[][] startKeys;
-
- /**
- * Gets the partition number for a given key (hence record) given the total
- * number of partitions i.e. number of reduce-tasks for the job.
- *
- * <p>Typically a hash function on a all or a subset of the key.</p>
- *
- * @param key The key to be partitioned.
- * @param value The entry value.
- * @param numPartitions The total number of partitions.
- * @return The partition number for the <code>key</code>.
- * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
- * java.lang.Object, java.lang.Object, int)
- */
- @Override
- public int getPartition(ImmutableBytesWritable key,
- VALUE value, int numPartitions) {
- byte[] region = null;
- // Only one region return 0
- if (this.startKeys.length == 1){
- return 0;
- }
- try {
- // Not sure if this is cached after a split so we could have problems
- // here if a region splits while mapping
- region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
- } catch (IOException e) {
- LOG.error(e);
- }
- for (int i = 0; i < this.startKeys.length; i++){
- if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
- if (i >= numPartitions-1){
- // cover if we have less reduces then regions.
- return (Integer.toString(i).hashCode()
- & Integer.MAX_VALUE) % numPartitions;
- }
- return i;
- }
- }
- // if above fails to find start key that match we need to return something
- return 0;
- }
-
- /**
- * Returns the current configuration.
- *
- * @return The current configuration.
- * @see org.apache.hadoop.conf.Configurable#getConf()
- */
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * Sets the configuration. This is used to determine the start keys for the
- * given table.
- *
- * @param configuration The configuration to set.
- * @see org.apache.hadoop.conf.Configurable#setConf(
- * org.apache.hadoop.conf.Configuration)
- */
- @Override
- public void setConf(Configuration configuration) {
- this.conf = HBaseConfiguration.create(configuration);
- try {
- this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(conf));
- TableName tableName = TableName.valueOf(conf.get(TableOutputFormat.OUTPUT_TABLE));
- this.locator = this.connection.getRegionLocator(tableName);
- } catch (IOException e) {
- LOG.error(e);
- }
- try {
- this.startKeys = this.locator.getStartKeys();
- } catch (IOException e) {
- LOG.error(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
deleted file mode 100644
index dfac471..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HashTable.java
+++ /dev/null
@@ -1,747 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.io.MapFile;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Charsets;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Throwables;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Ordering;
-
-public class HashTable extends Configured implements Tool {
-
- private static final Log LOG = LogFactory.getLog(HashTable.class);
-
- private static final int DEFAULT_BATCH_SIZE = 8000;
-
- private final static String HASH_BATCH_SIZE_CONF_KEY = "hash.batch.size";
- final static String PARTITIONS_FILE_NAME = "partitions";
- final static String MANIFEST_FILE_NAME = "manifest";
- final static String HASH_DATA_DIR = "hashes";
- final static String OUTPUT_DATA_FILE_PREFIX = "part-r-";
- private final static String TMP_MANIFEST_FILE_NAME = "manifest.tmp";
-
- TableHash tableHash = new TableHash();
- Path destPath;
-
- public HashTable(Configuration conf) {
- super(conf);
- }
-
- public static class TableHash {
-
- Path hashDir;
-
- String tableName;
- String families = null;
- long batchSize = DEFAULT_BATCH_SIZE;
- int numHashFiles = 0;
- byte[] startRow = HConstants.EMPTY_START_ROW;
- byte[] stopRow = HConstants.EMPTY_END_ROW;
- int scanBatch = 0;
- int versions = -1;
- long startTime = 0;
- long endTime = 0;
-
- List<ImmutableBytesWritable> partitions;
-
- public static TableHash read(Configuration conf, Path hashDir) throws IOException {
- TableHash tableHash = new TableHash();
- FileSystem fs = hashDir.getFileSystem(conf);
- tableHash.hashDir = hashDir;
- tableHash.readPropertiesFile(fs, new Path(hashDir, MANIFEST_FILE_NAME));
- tableHash.readPartitionFile(fs, conf, new Path(hashDir, PARTITIONS_FILE_NAME));
- return tableHash;
- }
-
- void writePropertiesFile(FileSystem fs, Path path) throws IOException {
- Properties p = new Properties();
- p.setProperty("table", tableName);
- if (families != null) {
- p.setProperty("columnFamilies", families);
- }
- p.setProperty("targetBatchSize", Long.toString(batchSize));
- p.setProperty("numHashFiles", Integer.toString(numHashFiles));
- if (!isTableStartRow(startRow)) {
- p.setProperty("startRowHex", Bytes.toHex(startRow));
- }
- if (!isTableEndRow(stopRow)) {
- p.setProperty("stopRowHex", Bytes.toHex(stopRow));
- }
- if (scanBatch > 0) {
- p.setProperty("scanBatch", Integer.toString(scanBatch));
- }
- if (versions >= 0) {
- p.setProperty("versions", Integer.toString(versions));
- }
- if (startTime != 0) {
- p.setProperty("startTimestamp", Long.toString(startTime));
- }
- if (endTime != 0) {
- p.setProperty("endTimestamp", Long.toString(endTime));
- }
-
- try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) {
- p.store(osw, null);
- }
- }
-
- void readPropertiesFile(FileSystem fs, Path path) throws IOException {
- Properties p = new Properties();
- try (FSDataInputStream in = fs.open(path)) {
- try (InputStreamReader isr = new InputStreamReader(in, Charsets.UTF_8)) {
- p.load(isr);
- }
- }
- tableName = p.getProperty("table");
- families = p.getProperty("columnFamilies");
- batchSize = Long.parseLong(p.getProperty("targetBatchSize"));
- numHashFiles = Integer.parseInt(p.getProperty("numHashFiles"));
-
- String startRowHex = p.getProperty("startRowHex");
- if (startRowHex != null) {
- startRow = Bytes.fromHex(startRowHex);
- }
- String stopRowHex = p.getProperty("stopRowHex");
- if (stopRowHex != null) {
- stopRow = Bytes.fromHex(stopRowHex);
- }
-
- String scanBatchString = p.getProperty("scanBatch");
- if (scanBatchString != null) {
- scanBatch = Integer.parseInt(scanBatchString);
- }
-
- String versionString = p.getProperty("versions");
- if (versionString != null) {
- versions = Integer.parseInt(versionString);
- }
-
- String startTimeString = p.getProperty("startTimestamp");
- if (startTimeString != null) {
- startTime = Long.parseLong(startTimeString);
- }
-
- String endTimeString = p.getProperty("endTimestamp");
- if (endTimeString != null) {
- endTime = Long.parseLong(endTimeString);
- }
- }
-
- Scan initScan() throws IOException {
- Scan scan = new Scan();
- scan.setCacheBlocks(false);
- if (startTime != 0 || endTime != 0) {
- scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
- }
- if (scanBatch > 0) {
- scan.setBatch(scanBatch);
- }
- if (versions >= 0) {
- scan.setMaxVersions(versions);
- }
- if (!isTableStartRow(startRow)) {
- scan.setStartRow(startRow);
- }
- if (!isTableEndRow(stopRow)) {
- scan.setStopRow(stopRow);
- }
- if(families != null) {
- for(String fam : families.split(",")) {
- scan.addFamily(Bytes.toBytes(fam));
- }
- }
- return scan;
- }
-
- /**
- * Choose partitions between row ranges to hash to a single output file
- * Selects region boundaries that fall within the scan range, and groups them
- * into the desired number of partitions.
- */
- void selectPartitions(Pair<byte[][], byte[][]> regionStartEndKeys) {
- List<byte[]> startKeys = new ArrayList<>();
- for (int i = 0; i < regionStartEndKeys.getFirst().length; i++) {
- byte[] regionStartKey = regionStartEndKeys.getFirst()[i];
- byte[] regionEndKey = regionStartEndKeys.getSecond()[i];
-
- // if scan begins after this region, or starts before this region, then drop this region
- // in other words:
- // IF (scan begins before the end of this region
- // AND scan ends before the start of this region)
- // THEN include this region
- if ((isTableStartRow(startRow) || isTableEndRow(regionEndKey)
- || Bytes.compareTo(startRow, regionEndKey) < 0)
- && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey)
- || Bytes.compareTo(stopRow, regionStartKey) > 0)) {
- startKeys.add(regionStartKey);
- }
- }
-
- int numRegions = startKeys.size();
- if (numHashFiles == 0) {
- numHashFiles = numRegions / 100;
- }
- if (numHashFiles == 0) {
- numHashFiles = 1;
- }
- if (numHashFiles > numRegions) {
- // can't partition within regions
- numHashFiles = numRegions;
- }
-
- // choose a subset of start keys to group regions into ranges
- partitions = new ArrayList<>(numHashFiles - 1);
- // skip the first start key as it is not a partition between ranges.
- for (long i = 1; i < numHashFiles; i++) {
- int splitIndex = (int) (numRegions * i / numHashFiles);
- partitions.add(new ImmutableBytesWritable(startKeys.get(splitIndex)));
- }
- }
-
- void writePartitionFile(Configuration conf, Path path) throws IOException {
- FileSystem fs = path.getFileSystem(conf);
- @SuppressWarnings("deprecation")
- SequenceFile.Writer writer = SequenceFile.createWriter(
- fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
-
- for (int i = 0; i < partitions.size(); i++) {
- writer.append(partitions.get(i), NullWritable.get());
- }
- writer.close();
- }
-
- private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
- throws IOException {
- @SuppressWarnings("deprecation")
- SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
- ImmutableBytesWritable key = new ImmutableBytesWritable();
- partitions = new ArrayList<>();
- while (reader.next(key)) {
- partitions.add(new ImmutableBytesWritable(key.copyBytes()));
- }
- reader.close();
-
- if (!Ordering.natural().isOrdered(partitions)) {
- throw new IOException("Partitions are not ordered!");
- }
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("tableName=").append(tableName);
- if (families != null) {
- sb.append(", families=").append(families);
- }
- sb.append(", batchSize=").append(batchSize);
- sb.append(", numHashFiles=").append(numHashFiles);
- if (!isTableStartRow(startRow)) {
- sb.append(", startRowHex=").append(Bytes.toHex(startRow));
- }
- if (!isTableEndRow(stopRow)) {
- sb.append(", stopRowHex=").append(Bytes.toHex(stopRow));
- }
- if (scanBatch >= 0) {
- sb.append(", scanBatch=").append(scanBatch);
- }
- if (versions >= 0) {
- sb.append(", versions=").append(versions);
- }
- if (startTime != 0) {
- sb.append("startTime=").append(startTime);
- }
- if (endTime != 0) {
- sb.append("endTime=").append(endTime);
- }
- return sb.toString();
- }
-
- static String getDataFileName(int hashFileIndex) {
- return String.format(HashTable.OUTPUT_DATA_FILE_PREFIX + "%05d", hashFileIndex);
- }
-
- /**
- * Open a TableHash.Reader starting at the first hash at or after the given key.
- * @throws IOException
- */
- public Reader newReader(Configuration conf, ImmutableBytesWritable startKey)
- throws IOException {
- return new Reader(conf, startKey);
- }
-
- public class Reader implements java.io.Closeable {
- private final Configuration conf;
-
- private int hashFileIndex;
- private MapFile.Reader mapFileReader;
-
- private boolean cachedNext;
- private ImmutableBytesWritable key;
- private ImmutableBytesWritable hash;
-
- Reader(Configuration conf, ImmutableBytesWritable startKey) throws IOException {
- this.conf = conf;
- int partitionIndex = Collections.binarySearch(partitions, startKey);
- if (partitionIndex >= 0) {
- // if the key is equal to a partition, then go the file after that partition
- hashFileIndex = partitionIndex+1;
- } else {
- // if the key is between partitions, then go to the file between those partitions
- hashFileIndex = -1-partitionIndex;
- }
- openHashFile();
-
- // MapFile's don't make it easy to seek() so that the subsequent next() returns
- // the desired key/value pair. So we cache it for the first call of next().
- hash = new ImmutableBytesWritable();
- key = (ImmutableBytesWritable) mapFileReader.getClosest(startKey, hash);
- if (key == null) {
- cachedNext = false;
- hash = null;
- } else {
- cachedNext = true;
- }
- }
-
- /**
- * Read the next key/hash pair.
- * Returns true if such a pair exists and false when at the end of the data.
- */
- public boolean next() throws IOException {
- if (cachedNext) {
- cachedNext = false;
- return true;
- }
- key = new ImmutableBytesWritable();
- hash = new ImmutableBytesWritable();
- while (true) {
- boolean hasNext = mapFileReader.next(key, hash);
- if (hasNext) {
- return true;
- }
- hashFileIndex++;
- if (hashFileIndex < TableHash.this.numHashFiles) {
- mapFileReader.close();
- openHashFile();
- } else {
- key = null;
- hash = null;
- return false;
- }
- }
- }
-
- /**
- * Get the current key
- * @return the current key or null if there is no current key
- */
- public ImmutableBytesWritable getCurrentKey() {
- return key;
- }
-
- /**
- * Get the current hash
- * @return the current hash or null if there is no current hash
- */
- public ImmutableBytesWritable getCurrentHash() {
- return hash;
- }
-
- private void openHashFile() throws IOException {
- if (mapFileReader != null) {
- mapFileReader.close();
- }
- Path dataDir = new Path(TableHash.this.hashDir, HASH_DATA_DIR);
- Path dataFile = new Path(dataDir, getDataFileName(hashFileIndex));
- mapFileReader = new MapFile.Reader(dataFile, conf);
- }
-
- @Override
- public void close() throws IOException {
- mapFileReader.close();
- }
- }
- }
-
- static boolean isTableStartRow(byte[] row) {
- return Bytes.equals(HConstants.EMPTY_START_ROW, row);
- }
-
- static boolean isTableEndRow(byte[] row) {
- return Bytes.equals(HConstants.EMPTY_END_ROW, row);
- }
-
- public Job createSubmittableJob(String[] args) throws IOException {
- Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
- generatePartitions(partitionsPath);
-
- Job job = Job.getInstance(getConf(),
- getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
- Configuration jobConf = job.getConfiguration();
- jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
- job.setJarByClass(HashTable.class);
-
- TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
- HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
-
- // use a TotalOrderPartitioner and reducers to group region output into hash files
- job.setPartitionerClass(TotalOrderPartitioner.class);
- TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
- job.setReducerClass(Reducer.class); // identity reducer
- job.setNumReduceTasks(tableHash.numHashFiles);
- job.setOutputKeyClass(ImmutableBytesWritable.class);
- job.setOutputValueClass(ImmutableBytesWritable.class);
- job.setOutputFormatClass(MapFileOutputFormat.class);
- FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
-
- return job;
- }
-
- private void generatePartitions(Path partitionsPath) throws IOException {
- Connection connection = ConnectionFactory.createConnection(getConf());
- Pair<byte[][], byte[][]> regionKeys
- = connection.getRegionLocator(TableName.valueOf(tableHash.tableName)).getStartEndKeys();
- connection.close();
-
- tableHash.selectPartitions(regionKeys);
- LOG.info("Writing " + tableHash.partitions.size() + " partition keys to " + partitionsPath);
-
- tableHash.writePartitionFile(getConf(), partitionsPath);
- }
-
- static class ResultHasher {
- private MessageDigest digest;
-
- private boolean batchStarted = false;
- private ImmutableBytesWritable batchStartKey;
- private ImmutableBytesWritable batchHash;
- private long batchSize = 0;
-
-
- public ResultHasher() {
- try {
- digest = MessageDigest.getInstance("MD5");
- } catch (NoSuchAlgorithmException e) {
- Throwables.propagate(e);
- }
- }
-
- public void startBatch(ImmutableBytesWritable row) {
- if (batchStarted) {
- throw new RuntimeException("Cannot start new batch without finishing existing one.");
- }
- batchStarted = true;
- batchSize = 0;
- batchStartKey = row;
- batchHash = null;
- }
-
- public void hashResult(Result result) {
- if (!batchStarted) {
- throw new RuntimeException("Cannot add to batch that has not been started.");
- }
- for (Cell cell : result.rawCells()) {
- int rowLength = cell.getRowLength();
- int familyLength = cell.getFamilyLength();
- int qualifierLength = cell.getQualifierLength();
- int valueLength = cell.getValueLength();
- digest.update(cell.getRowArray(), cell.getRowOffset(), rowLength);
- digest.update(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);
- digest.update(cell.getQualifierArray(), cell.getQualifierOffset(), qualifierLength);
- long ts = cell.getTimestamp();
- for (int i = 8; i > 0; i--) {
- digest.update((byte) ts);
- ts >>>= 8;
- }
- digest.update(cell.getValueArray(), cell.getValueOffset(), valueLength);
-
- batchSize += rowLength + familyLength + qualifierLength + 8 + valueLength;
- }
- }
-
- public void finishBatch() {
- if (!batchStarted) {
- throw new RuntimeException("Cannot finish batch that has not started.");
- }
- batchStarted = false;
- batchHash = new ImmutableBytesWritable(digest.digest());
- }
-
- public boolean isBatchStarted() {
- return batchStarted;
- }
-
- public ImmutableBytesWritable getBatchStartKey() {
- return batchStartKey;
- }
-
- public ImmutableBytesWritable getBatchHash() {
- return batchHash;
- }
-
- public long getBatchSize() {
- return batchSize;
- }
- }
-
- public static class HashMapper
- extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
-
- private ResultHasher hasher;
- private long targetBatchSize;
-
- private ImmutableBytesWritable currentRow;
-
- @Override
- protected void setup(Context context) throws IOException, InterruptedException {
- targetBatchSize = context.getConfiguration()
- .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
- hasher = new ResultHasher();
-
- TableSplit split = (TableSplit) context.getInputSplit();
- hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
- }
-
- @Override
- protected void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException, InterruptedException {
-
- if (currentRow == null || !currentRow.equals(key)) {
- currentRow = new ImmutableBytesWritable(key); // not immutable
-
- if (hasher.getBatchSize() >= targetBatchSize) {
- hasher.finishBatch();
- context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
- hasher.startBatch(currentRow);
- }
- }
-
- hasher.hashResult(value);
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- hasher.finishBatch();
- context.write(hasher.getBatchStartKey(), hasher.getBatchHash());
- }
- }
-
- private void writeTempManifestFile() throws IOException {
- Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
- FileSystem fs = tempManifestPath.getFileSystem(getConf());
- tableHash.writePropertiesFile(fs, tempManifestPath);
- }
-
- private void completeManifest() throws IOException {
- Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME);
- Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME);
- FileSystem fs = tempManifestPath.getFileSystem(getConf());
- fs.rename(tempManifestPath, manifestPath);
- }
-
- private static final int NUM_ARGS = 2;
- private static void printUsage(final String errorMsg) {
- if (errorMsg != null && errorMsg.length() > 0) {
- System.err.println("ERROR: " + errorMsg);
- System.err.println();
- }
- System.err.println("Usage: HashTable [options] <tablename> <outputpath>");
- System.err.println();
- System.err.println("Options:");
- System.err.println(" batchsize the target amount of bytes to hash in each batch");
- System.err.println(" rows are added to the batch until this size is reached");
- System.err.println(" (defaults to " + DEFAULT_BATCH_SIZE + " bytes)");
- System.err.println(" numhashfiles the number of hash files to create");
- System.err.println(" if set to fewer than number of regions then");
- System.err.println(" the job will create this number of reducers");
- System.err.println(" (defaults to 1/100 of regions -- at least 1)");
- System.err.println(" startrow the start row");
- System.err.println(" stoprow the stop row");
- System.err.println(" starttime beginning of the time range (unixtime in millis)");
- System.err.println(" without endtime means from starttime to forever");
- System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
- System.err.println(" scanbatch scanner batch size to support intra row scans");
- System.err.println(" versions number of cell versions to include");
- System.err.println(" families comma-separated list of families to include");
- System.err.println();
- System.err.println("Args:");
- System.err.println(" tablename Name of the table to hash");
- System.err.println(" outputpath Filesystem path to put the output data");
- System.err.println();
- System.err.println("Examples:");
- System.err.println(" To hash 'TestTable' in 32kB batches for a 1 hour window into 50 files:");
- System.err.println(" $ hbase " +
- "org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=32000 --numhashfiles=50"
- + " --starttime=1265875194289 --endtime=1265878794289 --families=cf2,cf3"
- + " TestTable /hashes/testTable");
- }
-
- private boolean doCommandLine(final String[] args) {
- if (args.length < NUM_ARGS) {
- printUsage(null);
- return false;
- }
- try {
-
- tableHash.tableName = args[args.length-2];
- destPath = new Path(args[args.length-1]);
-
- for (int i = 0; i < args.length - NUM_ARGS; i++) {
- String cmd = args[i];
- if (cmd.equals("-h") || cmd.startsWith("--h")) {
- printUsage(null);
- return false;
- }
-
- final String batchSizeArgKey = "--batchsize=";
- if (cmd.startsWith(batchSizeArgKey)) {
- tableHash.batchSize = Long.parseLong(cmd.substring(batchSizeArgKey.length()));
- continue;
- }
-
- final String numHashFilesArgKey = "--numhashfiles=";
- if (cmd.startsWith(numHashFilesArgKey)) {
- tableHash.numHashFiles = Integer.parseInt(cmd.substring(numHashFilesArgKey.length()));
- continue;
- }
-
- final String startRowArgKey = "--startrow=";
- if (cmd.startsWith(startRowArgKey)) {
- tableHash.startRow = Bytes.fromHex(cmd.substring(startRowArgKey.length()));
- continue;
- }
-
- final String stopRowArgKey = "--stoprow=";
- if (cmd.startsWith(stopRowArgKey)) {
- tableHash.stopRow = Bytes.fromHex(cmd.substring(stopRowArgKey.length()));
- continue;
- }
-
- final String startTimeArgKey = "--starttime=";
- if (cmd.startsWith(startTimeArgKey)) {
- tableHash.startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
- continue;
- }
-
- final String endTimeArgKey = "--endtime=";
- if (cmd.startsWith(endTimeArgKey)) {
- tableHash.endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
- continue;
- }
-
- final String scanBatchArgKey = "--scanbatch=";
- if (cmd.startsWith(scanBatchArgKey)) {
- tableHash.scanBatch = Integer.parseInt(cmd.substring(scanBatchArgKey.length()));
- continue;
- }
-
- final String versionsArgKey = "--versions=";
- if (cmd.startsWith(versionsArgKey)) {
- tableHash.versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
- continue;
- }
-
- final String familiesArgKey = "--families=";
- if (cmd.startsWith(familiesArgKey)) {
- tableHash.families = cmd.substring(familiesArgKey.length());
- continue;
- }
-
- printUsage("Invalid argument '" + cmd + "'");
- return false;
- }
- if ((tableHash.startTime != 0 || tableHash.endTime != 0)
- && (tableHash.startTime >= tableHash.endTime)) {
- printUsage("Invalid time range filter: starttime="
- + tableHash.startTime + " >= endtime=" + tableHash.endTime);
- return false;
- }
-
- } catch (Exception e) {
- e.printStackTrace();
- printUsage("Can't start because " + e.getMessage());
- return false;
- }
- return true;
- }
-
- /**
- * Main entry point.
- */
- public static void main(String[] args) throws Exception {
- int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args);
- System.exit(ret);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
- if (!doCommandLine(otherArgs)) {
- return 1;
- }
-
- Job job = createSubmittableJob(otherArgs);
- writeTempManifestFile();
- if (!job.waitForCompletion(true)) {
- LOG.info("Map-reduce job failed!");
- return 1;
- }
- completeManifest();
- return 0;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
deleted file mode 100644
index 7103ef8..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableMapper.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Pass the given key and record as-is to the reduce phase.
- */
-@InterfaceAudience.Public
-public class IdentityTableMapper
-extends TableMapper<ImmutableBytesWritable, Result> {
-
- /**
- * Use this before submitting a TableMap job. It will appropriately set up
- * the job.
- *
- * @param table The table name.
- * @param scan The scan with the columns to scan.
- * @param mapper The mapper class.
- * @param job The job configuration.
- * @throws IOException When setting up the job fails.
- */
- @SuppressWarnings("rawtypes")
- public static void initJob(String table, Scan scan,
- Class<? extends TableMapper> mapper, Job job) throws IOException {
- TableMapReduceUtil.initTableMapperJob(table, scan, mapper,
- ImmutableBytesWritable.class, Result.class, job);
- }
-
- /**
- * Pass the key, value to reduce.
- *
- * @param key The current key.
- * @param value The current value.
- * @param context The current context.
- * @throws IOException When writing the record fails.
- * @throws InterruptedException When the job is aborted.
- */
- public void map(ImmutableBytesWritable key, Result value, Context context)
- throws IOException, InterruptedException {
- context.write(key, value);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
deleted file mode 100644
index 5289f46..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/IdentityTableReducer.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.io.Writable;
-
-/**
- * Convenience class that simply writes all values (which must be
- * {@link org.apache.hadoop.hbase.client.Put Put} or
- * {@link org.apache.hadoop.hbase.client.Delete Delete} instances)
- * passed to it out to the configured HBase table. This works in combination
- * with {@link TableOutputFormat} which actually does the writing to HBase.<p>
- *
- * Keys are passed along but ignored in TableOutputFormat. However, they can
- * be used to control how your values will be divided up amongst the specified
- * number of reducers. <p>
- *
- * You can also use the {@link TableMapReduceUtil} class to set up the two
- * classes in one step:
- * <blockquote><code>
- * TableMapReduceUtil.initTableReducerJob("table", IdentityTableReducer.class, job);
- * </code></blockquote>
- * This will also set the proper {@link TableOutputFormat} which is given the
- * <code>table</code> parameter. The
- * {@link org.apache.hadoop.hbase.client.Put Put} or
- * {@link org.apache.hadoop.hbase.client.Delete Delete} define the
- * row and columns implicitly.
- */
-@InterfaceAudience.Public
-public class IdentityTableReducer
-extends TableReducer<Writable, Mutation, Writable> {
-
- @SuppressWarnings("unused")
- private static final Log LOG = LogFactory.getLog(IdentityTableReducer.class);
-
- /**
- * Writes each given record, consisting of the row key and the given values,
- * to the configured {@link org.apache.hadoop.mapreduce.OutputFormat}.
- * It is emitting the row key and each {@link org.apache.hadoop.hbase.client.Put Put}
- * or {@link org.apache.hadoop.hbase.client.Delete Delete} as separate pairs.
- *
- * @param key The current row key.
- * @param values The {@link org.apache.hadoop.hbase.client.Put Put} or
- * {@link org.apache.hadoop.hbase.client.Delete Delete} list for the given
- * row.
- * @param context The context of the reduce.
- * @throws IOException When writing the record fails.
- * @throws InterruptedException When the job gets interrupted.
- */
- @Override
- public void reduce(Writable key, Iterable<Mutation> values, Context context)
- throws IOException, InterruptedException {
- for(Mutation putOrDelete : values) {
- context.write(key, putOrDelete);
- }
- }
-}
[41/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
HBASE-18640 Move mapreduce out of hbase-server into separate module.
- Moves out o.a.h.h.{mapred, mapreduce} to new hbase-mapreduce module which depends
on hbase-server because of classes like *Snapshot{Input,Output}Format.java, WALs, replication, etc
- hbase-backup depends on it for WALPlayer and MR job stuff
- A bunch of tools needed to be pulled into hbase-mapreduce becuase of their dependencies on MR.
These are: CompactionTool, LoadTestTool, PerformanceEvaluation, ExportSnapshot
This is better place of them than hbase-server. But ideal place would be in separate hbase-tools module.
- There were some tests in hbase-server which were digging into these tools for static util funtions or
confs. Moved these to better/easily shared place. For eg. security related stuff to HBaseKerberosUtils.
- Note that hbase-mapreduce has secondPartExecution tests. On my machine they took like 20 min, so maybe
more on apache jenkins. That's basically equal reduction of runtime of hbase-server tests, which is a
big win!
Change-Id: Ieeb7235014717ca83ee5cb13b2a27fddfa6838e8
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/664b6be0
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/664b6be0
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/664b6be0
Branch: refs/heads/master
Commit: 664b6be0ef65218328847ea501fa88cb877e6759
Parents: 8d33949
Author: Apekshit Sharma <ap...@apache.org>
Authored: Sun Aug 20 14:34:16 2017 -0700
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Fri Aug 25 18:38:48 2017 -0700
----------------------------------------------------------------------
hbase-assembly/pom.xml | 4 +
.../src/main/assembly/hadoop-two-compat.xml | 1 +
hbase-assembly/src/main/assembly/src.xml | 1 +
hbase-backup/pom.xml | 10 +
hbase-examples/pom.xml | 4 +
hbase-it/pom.xml | 16 +
.../hadoop/hbase/IntegrationTestIngest.java | 5 +-
.../IntegrationTestIngestStripeCompactions.java | 4 +-
.../hbase/IntegrationTestIngestWithMOB.java | 5 +-
.../hbase/IntegrationTestRegionReplicaPerf.java | 3 +-
.../mapreduce/IntegrationTestImportTsv.java | 1 -
.../test/IntegrationTestLoadAndVerify.java | 2 +-
hbase-mapreduce/pom.xml | 316 +++
.../org/apache/hadoop/hbase/mapred/Driver.java | 52 +
.../hadoop/hbase/mapred/GroupingTableMap.java | 157 ++
.../hadoop/hbase/mapred/HRegionPartitioner.java | 95 +
.../hadoop/hbase/mapred/IdentityTableMap.java | 76 +
.../hbase/mapred/IdentityTableReduce.java | 61 +
.../mapred/MultiTableSnapshotInputFormat.java | 128 +
.../apache/hadoop/hbase/mapred/RowCounter.java | 121 +
.../hadoop/hbase/mapred/TableInputFormat.java | 90 +
.../hbase/mapred/TableInputFormatBase.java | 313 +++
.../apache/hadoop/hbase/mapred/TableMap.java | 38 +
.../hadoop/hbase/mapred/TableMapReduceUtil.java | 376 +++
.../hadoop/hbase/mapred/TableOutputFormat.java | 134 +
.../hadoop/hbase/mapred/TableRecordReader.java | 139 +
.../hbase/mapred/TableRecordReaderImpl.java | 259 ++
.../apache/hadoop/hbase/mapred/TableReduce.java | 38 +
.../hbase/mapred/TableSnapshotInputFormat.java | 166 ++
.../apache/hadoop/hbase/mapred/TableSplit.java | 154 +
.../hadoop/hbase/mapred/package-info.java | 26 +
.../hadoop/hbase/mapreduce/CellCounter.java | 333 +++
.../hadoop/hbase/mapreduce/CellCreator.java | 134 +
.../hadoop/hbase/mapreduce/CopyTable.java | 386 +++
.../DefaultVisibilityExpressionResolver.java | 144 +
.../apache/hadoop/hbase/mapreduce/Driver.java | 64 +
.../apache/hadoop/hbase/mapreduce/Export.java | 197 ++
.../hbase/mapreduce/GroupingTableMapper.java | 177 ++
.../hbase/mapreduce/HFileInputFormat.java | 174 ++
.../hbase/mapreduce/HFileOutputFormat2.java | 902 ++++++
.../hbase/mapreduce/HRegionPartitioner.java | 140 +
.../hadoop/hbase/mapreduce/HashTable.java | 747 +++++
.../hbase/mapreduce/IdentityTableMapper.java | 67 +
.../hbase/mapreduce/IdentityTableReducer.java | 79 +
.../apache/hadoop/hbase/mapreduce/Import.java | 780 ++++++
.../hadoop/hbase/mapreduce/ImportTsv.java | 793 ++++++
.../hadoop/hbase/mapreduce/JarFinder.java | 186 ++
.../hbase/mapreduce/KeyValueSerialization.java | 88 +
.../hbase/mapreduce/KeyValueSortReducer.java | 57 +
.../mapreduce/MultiTableHFileOutputFormat.java | 122 +
.../hbase/mapreduce/MultiTableInputFormat.java | 104 +
.../mapreduce/MultiTableInputFormatBase.java | 296 ++
.../hbase/mapreduce/MultiTableOutputFormat.java | 176 ++
.../MultiTableSnapshotInputFormat.java | 106 +
.../MultiTableSnapshotInputFormatImpl.java | 252 ++
.../mapreduce/MultithreadedTableMapper.java | 301 ++
.../hbase/mapreduce/MutationSerialization.java | 98 +
.../hadoop/hbase/mapreduce/PutCombiner.java | 98 +
.../hadoop/hbase/mapreduce/PutSortReducer.java | 147 +
.../hbase/mapreduce/RegionSizeCalculator.java | 127 +
.../hbase/mapreduce/ResultSerialization.java | 158 ++
.../hadoop/hbase/mapreduce/RowCounter.java | 265 ++
.../mapreduce/SimpleTotalOrderPartitioner.java | 143 +
.../hadoop/hbase/mapreduce/SyncTable.java | 786 ++++++
.../hbase/mapreduce/TableInputFormat.java | 294 ++
.../hbase/mapreduce/TableInputFormatBase.java | 652 +++++
.../hbase/mapreduce/TableMapReduceUtil.java | 1027 +++++++
.../hadoop/hbase/mapreduce/TableMapper.java | 38 +
.../hbase/mapreduce/TableOutputCommitter.java | 67 +
.../hbase/mapreduce/TableOutputFormat.java | 239 ++
.../hbase/mapreduce/TableRecordReader.java | 147 +
.../hbase/mapreduce/TableRecordReaderImpl.java | 315 +++
.../hadoop/hbase/mapreduce/TableReducer.java | 45 +
.../mapreduce/TableSnapshotInputFormat.java | 209 ++
.../mapreduce/TableSnapshotInputFormatImpl.java | 410 +++
.../hadoop/hbase/mapreduce/TableSplit.java | 395 +++
.../hadoop/hbase/mapreduce/TextSortReducer.java | 213 ++
.../hbase/mapreduce/TsvImporterMapper.java | 232 ++
.../hbase/mapreduce/TsvImporterTextMapper.java | 128 +
.../mapreduce/VisibilityExpressionResolver.java | 45 +
.../hadoop/hbase/mapreduce/WALInputFormat.java | 344 +++
.../hadoop/hbase/mapreduce/WALPlayer.java | 384 +++
.../hadoop/hbase/mapreduce/package-info.java | 26 +
.../replication/VerifyReplication.java | 700 +++++
.../hbase/regionserver/CompactionTool.java | 470 ++++
.../hadoop/hbase/snapshot/ExportSnapshot.java | 1111 ++++++++
.../util/MapreduceDependencyClasspathTool.java | 73 +
.../hadoop/hbase/PerformanceEvaluation.java | 2627 ++++++++++++++++++
.../hadoop/hbase/ScanPerformanceEvaluation.java | 406 +++
.../hadoop/hbase/TestPerformanceEvaluation.java | 218 ++
.../apache/hadoop/hbase/mapred/TestDriver.java | 41 +
.../hbase/mapred/TestGroupingTableMap.java | 181 ++
.../hbase/mapred/TestIdentityTableMap.java | 64 +
.../TestMultiTableSnapshotInputFormat.java | 135 +
.../hadoop/hbase/mapred/TestRowCounter.java | 163 ++
.../hadoop/hbase/mapred/TestSplitTable.java | 116 +
.../hbase/mapred/TestTableInputFormat.java | 460 +++
.../hadoop/hbase/mapred/TestTableMapReduce.java | 103 +
.../hbase/mapred/TestTableMapReduceUtil.java | 272 ++
.../TestTableOutputFormatConnectionExhaust.java | 104 +
.../mapred/TestTableSnapshotInputFormat.java | 271 ++
...opSecurityEnabledUserProviderForTesting.java | 41 +
.../MultiTableInputFormatTestBase.java | 277 ++
.../hadoop/hbase/mapreduce/NMapInputFormat.java | 134 +
.../TableSnapshotInputFormatTestBase.java | 231 ++
.../hadoop/hbase/mapreduce/TestCellCounter.java | 376 +++
.../hadoop/hbase/mapreduce/TestCopyTable.java | 262 ++
.../mapreduce/TestGroupingTableMapper.java | 68 +
.../hbase/mapreduce/TestHFileOutputFormat2.java | 1496 ++++++++++
.../hbase/mapreduce/TestHRegionPartitioner.java | 71 +
.../hadoop/hbase/mapreduce/TestHashTable.java | 194 ++
.../hbase/mapreduce/TestImportExport.java | 726 +++++
.../TestImportTSVWithOperationAttributes.java | 266 ++
.../hbase/mapreduce/TestImportTSVWithTTLs.java | 175 ++
.../TestImportTSVWithVisibilityLabels.java | 495 ++++
.../hadoop/hbase/mapreduce/TestImportTsv.java | 571 ++++
.../hbase/mapreduce/TestImportTsvParser.java | 314 +++
.../hadoop/hbase/mapreduce/TestJarFinder.java | 132 +
.../TestLoadIncrementalHFilesSplitRecovery.java | 669 +++++
.../mapreduce/TestMultiTableInputFormat.java | 49 +
.../TestMultiTableSnapshotInputFormat.java | 92 +
.../TestMultiTableSnapshotInputFormatImpl.java | 186 ++
.../mapreduce/TestMultithreadedTableMapper.java | 264 ++
.../mapreduce/TestRegionSizeCalculator.java | 160 ++
.../hadoop/hbase/mapreduce/TestRowCounter.java | 400 +++
.../TestSecureLoadIncrementalHFiles.java | 70 +
...ecureLoadIncrementalHFilesSplitRecovery.java | 69 +
.../TestSimpleTotalOrderPartitioner.java | 81 +
.../hadoop/hbase/mapreduce/TestSyncTable.java | 339 +++
.../hbase/mapreduce/TestTableInputFormat.java | 481 ++++
.../mapreduce/TestTableInputFormatBase.java | 53 +
.../mapreduce/TestTableInputFormatScan1.java | 200 ++
.../mapreduce/TestTableInputFormatScan2.java | 118 +
.../mapreduce/TestTableInputFormatScanBase.java | 287 ++
.../hbase/mapreduce/TestTableMapReduce.java | 174 ++
.../hbase/mapreduce/TestTableMapReduceBase.java | 233 ++
.../hbase/mapreduce/TestTableMapReduceUtil.java | 99 +
.../mapreduce/TestTableSnapshotInputFormat.java | 373 +++
.../hadoop/hbase/mapreduce/TestTableSplit.java | 129 +
.../hbase/mapreduce/TestTimeRangeMapRed.java | 211 ++
.../hadoop/hbase/mapreduce/TestWALPlayer.java | 231 ++
.../hbase/mapreduce/TestWALRecordReader.java | 276 ++
.../mapreduce/TsvImporterCustomTestMapper.java | 80 +
.../TsvImporterCustomTestMapperForOprAttr.java | 57 +
.../replication/TestReplicationSmallTests.java | 1059 +++++++
.../hbase/snapshot/TestExportSnapshot.java | 381 +++
.../snapshot/TestExportSnapshotHelpers.java | 91 +
.../snapshot/TestExportSnapshotNoCluster.java | 112 +
.../hbase/snapshot/TestMobExportSnapshot.java | 65 +
.../snapshot/TestMobSecureExportSnapshot.java | 59 +
.../snapshot/TestSecureExportSnapshot.java | 64 +
.../apache/hadoop/hbase/util/LoadTestTool.java | 915 ++++++
.../src/test/resources/hbase-site.xml | 161 ++
.../src/test/resources/hbase-site2.xml | 146 +
.../src/test/resources/hdfs-site.xml | 32 +
.../src/test/resources/log4j.properties | 68 +
.../src/test/resources/mapred-queues.xml | 75 +
.../src/test/resources/mapred-site.xml | 34 +
.../PerformanceEvaluation_Counter.properties | 28 +
.../hbase/mapreduce/exportedTableIn94Format | Bin 0 -> 374 bytes
hbase-rest/pom.xml | 10 +
.../hbase/rest/PerformanceEvaluation.java | 6 +-
.../hbase/client/TableSnapshotScanner.java | 4 +-
.../org/apache/hadoop/hbase/mapred/Driver.java | 52 -
.../hadoop/hbase/mapred/GroupingTableMap.java | 157 --
.../hadoop/hbase/mapred/HRegionPartitioner.java | 96 -
.../hadoop/hbase/mapred/IdentityTableMap.java | 76 -
.../hbase/mapred/IdentityTableReduce.java | 61 -
.../mapred/MultiTableSnapshotInputFormat.java | 128 -
.../apache/hadoop/hbase/mapred/RowCounter.java | 121 -
.../hadoop/hbase/mapred/TableInputFormat.java | 90 -
.../hbase/mapred/TableInputFormatBase.java | 313 ---
.../apache/hadoop/hbase/mapred/TableMap.java | 38 -
.../hadoop/hbase/mapred/TableMapReduceUtil.java | 376 ---
.../hadoop/hbase/mapred/TableOutputFormat.java | 134 -
.../hadoop/hbase/mapred/TableRecordReader.java | 139 -
.../hbase/mapred/TableRecordReaderImpl.java | 259 --
.../apache/hadoop/hbase/mapred/TableReduce.java | 38 -
.../hbase/mapred/TableSnapshotInputFormat.java | 166 --
.../apache/hadoop/hbase/mapred/TableSplit.java | 154 -
.../hadoop/hbase/mapred/package-info.java | 26 -
.../hadoop/hbase/mapreduce/CellCounter.java | 333 ---
.../hadoop/hbase/mapreduce/CellCreator.java | 134 -
.../hadoop/hbase/mapreduce/CopyTable.java | 386 ---
.../DefaultVisibilityExpressionResolver.java | 144 -
.../apache/hadoop/hbase/mapreduce/Driver.java | 64 -
.../apache/hadoop/hbase/mapreduce/Export.java | 197 --
.../hbase/mapreduce/GroupingTableMapper.java | 177 --
.../hbase/mapreduce/HFileInputFormat.java | 174 --
.../hbase/mapreduce/HFileOutputFormat2.java | 902 ------
.../hbase/mapreduce/HRegionPartitioner.java | 140 -
.../hadoop/hbase/mapreduce/HashTable.java | 747 -----
.../hbase/mapreduce/IdentityTableMapper.java | 67 -
.../hbase/mapreduce/IdentityTableReducer.java | 79 -
.../apache/hadoop/hbase/mapreduce/Import.java | 780 ------
.../hadoop/hbase/mapreduce/ImportTsv.java | 793 ------
.../hadoop/hbase/mapreduce/JarFinder.java | 186 --
.../hbase/mapreduce/KeyValueSerialization.java | 88 -
.../hbase/mapreduce/KeyValueSortReducer.java | 56 -
.../mapreduce/MultiTableHFileOutputFormat.java | 122 -
.../hbase/mapreduce/MultiTableInputFormat.java | 104 -
.../mapreduce/MultiTableInputFormatBase.java | 297 --
.../hbase/mapreduce/MultiTableOutputFormat.java | 176 --
.../MultiTableSnapshotInputFormat.java | 106 -
.../MultiTableSnapshotInputFormatImpl.java | 252 --
.../mapreduce/MultithreadedTableMapper.java | 301 --
.../hbase/mapreduce/MutationSerialization.java | 98 -
.../hadoop/hbase/mapreduce/PutCombiner.java | 98 -
.../hadoop/hbase/mapreduce/PutSortReducer.java | 147 -
.../hbase/mapreduce/ResultSerialization.java | 158 --
.../hadoop/hbase/mapreduce/RowCounter.java | 265 --
.../mapreduce/SimpleTotalOrderPartitioner.java | 143 -
.../hadoop/hbase/mapreduce/SyncTable.java | 786 ------
.../hbase/mapreduce/TableInputFormat.java | 294 --
.../hbase/mapreduce/TableInputFormatBase.java | 653 -----
.../hbase/mapreduce/TableMapReduceUtil.java | 1027 -------
.../hadoop/hbase/mapreduce/TableMapper.java | 38 -
.../hbase/mapreduce/TableOutputCommitter.java | 67 -
.../hbase/mapreduce/TableOutputFormat.java | 239 --
.../hbase/mapreduce/TableRecordReader.java | 147 -
.../hbase/mapreduce/TableRecordReaderImpl.java | 315 ---
.../hadoop/hbase/mapreduce/TableReducer.java | 45 -
.../mapreduce/TableSnapshotInputFormat.java | 210 --
.../mapreduce/TableSnapshotInputFormatImpl.java | 412 ---
.../hadoop/hbase/mapreduce/TableSplit.java | 395 ---
.../hadoop/hbase/mapreduce/TextSortReducer.java | 213 --
.../hbase/mapreduce/TsvImporterMapper.java | 232 --
.../hbase/mapreduce/TsvImporterTextMapper.java | 128 -
.../mapreduce/VisibilityExpressionResolver.java | 45 -
.../hadoop/hbase/mapreduce/WALInputFormat.java | 344 ---
.../hadoop/hbase/mapreduce/WALPlayer.java | 384 ---
.../hadoop/hbase/mapreduce/package-info.java | 26 -
.../replication/VerifyReplication.java | 700 -----
.../hbase/regionserver/CompactionTool.java | 470 ----
.../hadoop/hbase/snapshot/ExportSnapshot.java | 1111 --------
.../util/MapreduceDependencyClasspathTool.java | 73 -
.../hadoop/hbase/util/RegionSizeCalculator.java | 146 -
.../hadoop/hbase/PerformanceEvaluation.java | 2626 -----------------
.../hadoop/hbase/ScanPerformanceEvaluation.java | 406 ---
.../hadoop/hbase/TestPerformanceEvaluation.java | 218 --
.../hbase/client/TestTableSnapshotScanner.java | 18 +-
.../apache/hadoop/hbase/mapred/TestDriver.java | 41 -
.../hbase/mapred/TestGroupingTableMap.java | 181 --
.../hbase/mapred/TestIdentityTableMap.java | 64 -
.../TestMultiTableSnapshotInputFormat.java | 135 -
.../hadoop/hbase/mapred/TestRowCounter.java | 163 --
.../hadoop/hbase/mapred/TestSplitTable.java | 116 -
.../hbase/mapred/TestTableInputFormat.java | 461 ---
.../hadoop/hbase/mapred/TestTableMapReduce.java | 103 -
.../hbase/mapred/TestTableMapReduceUtil.java | 272 --
.../TestTableOutputFormatConnectionExhaust.java | 104 -
.../mapred/TestTableSnapshotInputFormat.java | 271 --
...opSecurityEnabledUserProviderForTesting.java | 41 -
.../MultiTableInputFormatTestBase.java | 277 --
.../hadoop/hbase/mapreduce/NMapInputFormat.java | 134 -
.../TableSnapshotInputFormatTestBase.java | 231 --
.../hadoop/hbase/mapreduce/TestCellCounter.java | 376 ---
.../hadoop/hbase/mapreduce/TestCopyTable.java | 262 --
.../mapreduce/TestGroupingTableMapper.java | 68 -
.../hbase/mapreduce/TestHFileOutputFormat2.java | 1495 ----------
.../hbase/mapreduce/TestHRegionPartitioner.java | 71 -
.../hadoop/hbase/mapreduce/TestHashTable.java | 194 --
.../hbase/mapreduce/TestImportExport.java | 727 -----
.../TestImportTSVWithOperationAttributes.java | 266 --
.../hbase/mapreduce/TestImportTSVWithTTLs.java | 175 --
.../TestImportTSVWithVisibilityLabels.java | 495 ----
.../hadoop/hbase/mapreduce/TestImportTsv.java | 571 ----
.../hbase/mapreduce/TestImportTsvParser.java | 314 ---
.../hadoop/hbase/mapreduce/TestJarFinder.java | 132 -
.../mapreduce/TestLoadIncrementalHFiles.java | 1 -
.../TestLoadIncrementalHFilesSplitRecovery.java | 669 -----
.../mapreduce/TestMultiTableInputFormat.java | 49 -
.../TestMultiTableSnapshotInputFormat.java | 92 -
.../TestMultiTableSnapshotInputFormatImpl.java | 186 --
.../mapreduce/TestMultithreadedTableMapper.java | 264 --
.../hadoop/hbase/mapreduce/TestRowCounter.java | 400 ---
.../TestSecureLoadIncrementalHFiles.java | 70 -
...ecureLoadIncrementalHFilesSplitRecovery.java | 69 -
.../TestSimpleTotalOrderPartitioner.java | 81 -
.../hadoop/hbase/mapreduce/TestSyncTable.java | 339 ---
.../hbase/mapreduce/TestTableInputFormat.java | 481 ----
.../mapreduce/TestTableInputFormatBase.java | 53 -
.../mapreduce/TestTableInputFormatScan1.java | 200 --
.../mapreduce/TestTableInputFormatScan2.java | 118 -
.../mapreduce/TestTableInputFormatScanBase.java | 287 --
.../hbase/mapreduce/TestTableMapReduce.java | 174 --
.../hbase/mapreduce/TestTableMapReduceBase.java | 233 --
.../hbase/mapreduce/TestTableMapReduceUtil.java | 99 -
.../mapreduce/TestTableSnapshotInputFormat.java | 384 ---
.../hadoop/hbase/mapreduce/TestTableSplit.java | 129 -
.../hbase/mapreduce/TestTimeRangeMapRed.java | 211 --
.../hadoop/hbase/mapreduce/TestWALPlayer.java | 231 --
.../hbase/mapreduce/TestWALRecordReader.java | 276 --
.../mapreduce/TsvImporterCustomTestMapper.java | 80 -
.../TsvImporterCustomTestMapperForOprAttr.java | 58 -
.../hbase/namespace/TestNamespaceAuditor.java | 8 +-
.../regionserver/TestHRegionFileSystem.java | 7 +-
.../replication/TestReplicationSmallTests.java | 1059 -------
.../hbase/security/HBaseKerberosUtils.java | 26 +-
.../hbase/snapshot/TestExportSnapshot.java | 381 ---
.../snapshot/TestExportSnapshotHelpers.java | 91 -
.../snapshot/TestExportSnapshotNoCluster.java | 112 -
.../hbase/snapshot/TestMobExportSnapshot.java | 65 -
.../snapshot/TestMobSecureExportSnapshot.java | 59 -
.../snapshot/TestSecureExportSnapshot.java | 64 -
.../apache/hadoop/hbase/util/HFileTestUtil.java | 14 +
.../util/LoadTestDataGeneratorWithTags.java | 3 +-
.../apache/hadoop/hbase/util/LoadTestTool.java | 968 -------
.../hadoop/hbase/util/MultiThreadedAction.java | 2 +-
.../hbase/util/MultiThreadedReaderWithACL.java | 3 +-
.../hbase/util/MultiThreadedUpdaterWithACL.java | 3 +-
.../hadoop/hbase/util/RestartMetaTest.java | 8 +-
.../hbase/util/TestRegionSizeCalculator.java | 159 --
.../hbase/util/test/LoadTestDataGenerator.java | 23 +
.../PerformanceEvaluation_Counter.properties | 28 -
.../hbase/mapreduce/exportedTableIn94Format | Bin 374 -> 0 bytes
hbase-spark/pom.xml | 4 +
.../hbase/spark/TestJavaHBaseContext.java | 1 -
pom.xml | 13 +
src/main/asciidoc/_chapters/ops_mgt.adoc | 32 +-
320 files changed, 38781 insertions(+), 37899 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/pom.xml b/hbase-assembly/pom.xml
index c9488ca..18b63b5 100644
--- a/hbase-assembly/pom.xml
+++ b/hbase-assembly/pom.xml
@@ -195,6 +195,10 @@
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
<!-- To dump tools in hbase-procedure into cached_classpath.txt. -->
<dependency>
<groupId>org.apache.hbase</groupId>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
index 1592a3b..a66237b 100644
--- a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
+++ b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml
@@ -50,6 +50,7 @@
<include>org.apache.hbase:hbase-thrift</include>
<include>org.apache.hbase:hbase-external-blockcache</include>
<include>org.apache.hbase:hbase-backup</include>
+ <include>org.apache.hbase:hbase-mapreduce</include>
</includes>
<!-- Binaries for the dependencies also go in the hbase-jars directory -->
<binaries>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-assembly/src/main/assembly/src.xml
----------------------------------------------------------------------
diff --git a/hbase-assembly/src/main/assembly/src.xml b/hbase-assembly/src/main/assembly/src.xml
index b00f05f..a0b700c 100644
--- a/hbase-assembly/src/main/assembly/src.xml
+++ b/hbase-assembly/src/main/assembly/src.xml
@@ -62,6 +62,7 @@
<include>org.apache.hbase:hbase-testing-util</include>
<include>org.apache.hbase:hbase-thrift</include>
<include>org.apache.hbase:hbase-backup</include>
+ <include>org.apache.hbase:hbase-mapreduce</include>
</includes>
<!-- Include all the sources in the top directory -->
<sources>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-backup/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-backup/pom.xml b/hbase-backup/pom.xml
index 7c7d8b5..9b3aac6 100644
--- a/hbase-backup/pom.xml
+++ b/hbase-backup/pom.xml
@@ -109,6 +109,16 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-examples/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-examples/pom.xml b/hbase-examples/pom.xml
index 7a6a51a..422b28e 100644
--- a/hbase-examples/pom.xml
+++ b/hbase-examples/pom.xml
@@ -146,6 +146,10 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-endpoint</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-it/pom.xml b/hbase-it/pom.xml
index b9b12a4..5dc4d4f 100644
--- a/hbase-it/pom.xml
+++ b/hbase-it/pom.xml
@@ -200,6 +200,22 @@
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
<artifactId>hbase-rsgroup</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
index 76be4e8a..9bc3131 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.util.StringUtils;
@@ -70,7 +71,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
protected String[] LOAD_TEST_TOOL_INIT_ARGS = {
LoadTestTool.OPT_COLUMN_FAMILIES,
LoadTestTool.OPT_COMPRESSION,
- LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+ HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
LoadTestTool.OPT_INMEMORY,
LoadTestTool.OPT_ENCRYPTION,
LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
@@ -138,7 +139,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
String familiesString = getConf().get(
String.format("%s.%s", clazz, LoadTestTool.OPT_COLUMN_FAMILIES));
if (familiesString == null) {
- for (byte[] family : LoadTestTool.DEFAULT_COLUMN_FAMILIES) {
+ for (byte[] family : HFileTestUtil.DEFAULT_COLUMN_FAMILIES) {
families.add(Bytes.toString(family));
}
} else {
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
index d64fbb0..fc79abb 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestStripeCompactions.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.StoreEngine;
import org.apache.hadoop.hbase.regionserver.StripeStoreEngine;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
-import org.apache.hadoop.hbase.util.LoadTestTool;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.util.ToolRunner;
import org.junit.experimental.categories.Category;
@@ -41,7 +41,7 @@ public class IntegrationTestIngestStripeCompactions extends IntegrationTestInges
HTableDescriptor htd = new HTableDescriptor(getTablename());
htd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY, StripeStoreEngine.class.getName());
htd.setConfiguration(HStore.BLOCKING_STOREFILES_KEY, "100");
- HColumnDescriptor hcd = new HColumnDescriptor(LoadTestTool.DEFAULT_COLUMN_FAMILY);
+ HColumnDescriptor hcd = new HColumnDescriptor(HFileTestUtil.DEFAULT_COLUMN_FAMILY);
HBaseTestingUtility.createPreSplitLoadTestTable(util.getConfiguration(), htd, hcd);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
index 5bbb12b..010e4b9 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithMOB.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.hbase.util.LoadTestDataGeneratorWithMOB;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.util.ToolRunner;
@@ -44,7 +45,7 @@ import org.junit.experimental.categories.Category;
public class IntegrationTestIngestWithMOB extends IntegrationTestIngest {
private static final char COLON = ':';
- private byte[] mobColumnFamily = LoadTestTool.DEFAULT_COLUMN_FAMILY;
+ private byte[] mobColumnFamily = HFileTestUtil.DEFAULT_COLUMN_FAMILY;
public static final String THRESHOLD = "threshold";
public static final String MIN_MOB_DATA_SIZE = "minMobDataSize";
public static final String MAX_MOB_DATA_SIZE = "maxMobDataSize";
@@ -56,7 +57,7 @@ public class IntegrationTestIngestWithMOB extends IntegrationTestIngest {
//similar to LOAD_TEST_TOOL_INIT_ARGS except OPT_IN_MEMORY is removed
protected String[] LOAD_TEST_TOOL_MOB_INIT_ARGS = {
LoadTestTool.OPT_COMPRESSION,
- LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+ HFileTestUtil.OPT_DATA_BLOCK_ENCODING,
LoadTestTool.OPT_ENCRYPTION,
LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
LoadTestTool.OPT_REGION_REPLICATION,
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
index d649bdb..3135bd0 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRegionReplicaPerf.java
@@ -72,6 +72,7 @@ public class IntegrationTestRegionReplicaPerf extends IntegrationTestBase {
private static final String PRIMARY_TIMEOUT_DEFAULT = "" + 10 * 1000; // 10 ms
private static final String NUM_RS_KEY = "numRs";
private static final String NUM_RS_DEFAULT = "" + 3;
+ public static final String FAMILY_NAME = "info";
/** Extract a descriptive statistic from a {@link com.codahale.metrics.Histogram}. */
private enum Stat {
@@ -236,7 +237,7 @@ public class IntegrationTestRegionReplicaPerf extends IntegrationTestBase {
@Override
protected Set<String> getColumnFamilies() {
- return Sets.newHashSet(Bytes.toString(PerformanceEvaluation.FAMILY_NAME));
+ return Sets.newHashSet(FAMILY_NAME);
}
/** Compute the mean of the given {@code stat} from a timing results. */
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
index 9d04bf9..fb7acf4 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
@@ -29,7 +29,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
-import java.util.UUID;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
index f042521..b9d16a1 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.IntegrationTestBase;
import org.apache.hadoop.hbase.IntegrationTestingUtility;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
@@ -55,7 +56,6 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ScannerCallable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml
new file mode 100644
index 0000000..f75c9f9
--- /dev/null
+++ b/hbase-mapreduce/pom.xml
@@ -0,0 +1,316 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <!--
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ -->
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hbase-build-configuration</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <version>3.0.0-SNAPSHOT</version>
+ <relativePath>../hbase-build-configuration</relativePath>
+ </parent>
+ <artifactId>hbase-mapreduce</artifactId>
+ <name>Apache HBase - MapReduce</name>
+ <description>
+ This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which
+ are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs.
+ It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool,
+ ExportSnapshot, WALPlayer, etc
+ </description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!--Make it so assembly:single does nothing in here-->
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <skipAssembly>true</skipAssembly>
+ </configuration>
+ </plugin>
+ <!-- Testing plugins -->
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <properties>
+ <property>
+ <name>listener</name>
+ <value>org.apache.hadoop.hbase.ServerResourceCheckerJUnitListener</value>
+ </property>
+ </properties>
+ <systemPropertyVariables>
+ <org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>org.apache.hadoop.hbase.shaded.</org.apache.hadoop.hbase.shaded.io.netty.packagePrefix>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <!-- Make a jar and put the sources in the jar -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ </plugin>
+ </plugins>
+ <pluginManagement>
+ <plugins>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself.-->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <versionRange>[3.2,)</versionRange>
+ <goals>
+ <goal>compile</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore></ignore>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <dependencies>
+ <!-- Intra-project dependencies -->
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>jdk.tools</groupId>
+ <artifactId>jdk.tools</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <!-- General dependencies -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <profiles>
+ <!-- Skip the tests in this module -->
+ <profile>
+ <id>skipMapReduceTests</id>
+ <activation>
+ <property>
+ <name>skipMapReduceTests</name>
+ </property>
+ </activation>
+ <properties>
+ <surefire.skipFirstPart>true</surefire.skipFirstPart>
+ <surefire.skipSecondPart>true</surefire.skipSecondPart>
+ </properties>
+ </profile>
+ <!-- profile against Hadoop 2.x: This is the default. -->
+ <profile>
+ <id>hadoop-2.0</id>
+ <activation>
+ <property>
+ <!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
+ <!--h2--><name>!hadoop.profile</name>
+ </property>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>net.java.dev.jets3t</groupId>
+ <artifactId>jets3t</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ </profile>
+
+ <!--
+ profile for building against Hadoop 3.0.x. Activate using:
+ mvn -Dhadoop.profile=3.0
+ -->
+ <profile>
+ <id>hadoop-3.0</id>
+ <activation>
+ <property>
+ <name>hadoop.profile</name>
+ <value>3.0</value>
+ </property>
+ </activation>
+ <properties>
+ <hadoop.version>${hadoop-three.version}</hadoop.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+</project>
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
new file mode 100644
index 0000000..618c14a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/Driver.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.util.ProgramDriver;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing name of job
+ * to this main.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+@InterfaceStability.Stable
+public class Driver {
+
+ private static ProgramDriver pgd = new ProgramDriver();
+
+ @VisibleForTesting
+ static void setProgramDriver(ProgramDriver pgd0) {
+ pgd = pgd0;
+ }
+
+ /**
+ * @param args
+ * @throws Throwable
+ */
+ public static void main(String[] args) throws Throwable {
+ pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase table");
+ ProgramDriver.class.getMethod("driver", new Class[] { String[].class })
+ .invoke(pgd, new Object[] { args });
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
new file mode 100644
index 0000000..a534224
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
@@ -0,0 +1,157 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+
+/**
+ * Extract grouping columns from input record
+ */
+@InterfaceAudience.Public
+public class GroupingTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable,Result> {
+
+ /**
+ * JobConf parameter to specify the columns used to produce the key passed to
+ * collect from the map phase
+ */
+ public static final String GROUP_COLUMNS =
+ "hbase.mapred.groupingtablemap.columns";
+
+ protected byte [][] columns;
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up the
+ * JobConf.
+ *
+ * @param table table to be processed
+ * @param columns space separated list of columns to fetch
+ * @param groupColumns space separated list of columns used to form the key
+ * used in collect
+ * @param mapper map class
+ * @param job job configuration object
+ */
+ @SuppressWarnings("unchecked")
+ public static void initJob(String table, String columns, String groupColumns,
+ Class<? extends TableMap> mapper, JobConf job) {
+
+ TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+ ImmutableBytesWritable.class, Result.class, job);
+ job.set(GROUP_COLUMNS, groupColumns);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ super.configure(job);
+ String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
+ columns = new byte[cols.length][];
+ for(int i = 0; i < cols.length; i++) {
+ columns[i] = Bytes.toBytes(cols[i]);
+ }
+ }
+
+ /**
+ * Extract the grouping columns from value to construct a new key.
+ *
+ * Pass the new key and value to reduce.
+ * If any of the grouping columns are not found in the value, the record is skipped.
+ * @param key
+ * @param value
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable,Result> output,
+ Reporter reporter) throws IOException {
+
+ byte[][] keyVals = extractKeyValues(value);
+ if(keyVals != null) {
+ ImmutableBytesWritable tKey = createGroupKey(keyVals);
+ output.collect(tKey, value);
+ }
+ }
+
+ /**
+ * Extract columns values from the current record. This method returns
+ * null if any of the columns are not found.
+ *
+ * Override this method if you want to deal with nulls differently.
+ *
+ * @param r
+ * @return array of byte values
+ */
+ protected byte[][] extractKeyValues(Result r) {
+ byte[][] keyVals = null;
+ ArrayList<byte[]> foundList = new ArrayList<>();
+ int numCols = columns.length;
+ if (numCols > 0) {
+ for (Cell value: r.listCells()) {
+ byte [] column = KeyValue.makeColumn(CellUtil.cloneFamily(value),
+ CellUtil.cloneQualifier(value));
+ for (int i = 0; i < numCols; i++) {
+ if (Bytes.equals(column, columns[i])) {
+ foundList.add(CellUtil.cloneValue(value));
+ break;
+ }
+ }
+ }
+ if(foundList.size() == numCols) {
+ keyVals = foundList.toArray(new byte[numCols][]);
+ }
+ }
+ return keyVals;
+ }
+
+ /**
+ * Create a key by concatenating multiple column values.
+ * Override this function in order to produce different types of keys.
+ *
+ * @param vals
+ * @return key generated by concatenating multiple column values
+ */
+ protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
+ if(vals == null) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < vals.length; i++) {
+ if(i > 0) {
+ sb.append(" ");
+ }
+ sb.append(Bytes.toString(vals[i]));
+ }
+ return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
new file mode 100644
index 0000000..4f5323a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/HRegionPartitioner.java
@@ -0,0 +1,95 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+
+/**
+ * This is used to partition the output keys into groups of keys.
+ * Keys are grouped according to the regions that currently exist
+ * so that each reducer fills a single region so load is distributed.
+ *
+ * @param <K2>
+ * @param <V2>
+ */
+@InterfaceAudience.Public
+public class HRegionPartitioner<K2,V2>
+implements Partitioner<ImmutableBytesWritable, V2> {
+ private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class);
+ // Connection and locator are not cleaned up; they just die when partitioner is done.
+ private Connection connection;
+ private RegionLocator locator;
+ private byte[][] startKeys;
+
+ public void configure(JobConf job) {
+ try {
+ this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
+ TableName tableName = TableName.valueOf(job.get(TableOutputFormat.OUTPUT_TABLE));
+ this.locator = this.connection.getRegionLocator(tableName);
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+
+ try {
+ this.startKeys = this.locator.getStartKeys();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ }
+
+ public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
+ byte[] region = null;
+ // Only one region return 0
+ if (this.startKeys.length == 1){
+ return 0;
+ }
+ try {
+ // Not sure if this is cached after a split so we could have problems
+ // here if a region splits while mapping
+ region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
+ } catch (IOException e) {
+ LOG.error(e);
+ }
+ for (int i = 0; i < this.startKeys.length; i++){
+ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
+ if (i >= numPartitions-1){
+ // cover if we have less reduces then regions.
+ return (Integer.toString(i).hashCode()
+ & Integer.MAX_VALUE) % numPartitions;
+ }
+ return i;
+ }
+ }
+ // if above fails to find start key that match we need to return something
+ return 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
new file mode 100644
index 0000000..dfacff9
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableMap.java
@@ -0,0 +1,76 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Pass the given key and record as-is to reduce
+ */
+@InterfaceAudience.Public
+public class IdentityTableMap
+extends MapReduceBase
+implements TableMap<ImmutableBytesWritable, Result> {
+
+ /** constructor */
+ public IdentityTableMap() {
+ super();
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table table name
+ * @param columns columns to scan
+ * @param mapper mapper class
+ * @param job job configuration
+ */
+ @SuppressWarnings("unchecked")
+ public static void initJob(String table, String columns,
+ Class<? extends TableMap> mapper, JobConf job) {
+ TableMapReduceUtil.initTableMapJob(table, columns, mapper,
+ ImmutableBytesWritable.class,
+ Result.class, job);
+ }
+
+ /**
+ * Pass the key, value to reduce
+ * @param key
+ * @param value
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void map(ImmutableBytesWritable key, Result value,
+ OutputCollector<ImmutableBytesWritable,Result> output,
+ Reporter reporter) throws IOException {
+
+ // convert
+ output.collect(key, value);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
new file mode 100644
index 0000000..9c2e604
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/IdentityTableReduce.java
@@ -0,0 +1,61 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Write to table each key, record pair
+ */
+@InterfaceAudience.Public
+public class IdentityTableReduce
+extends MapReduceBase
+implements TableReduce<ImmutableBytesWritable, Put> {
+ @SuppressWarnings("unused")
+ private static final Log LOG =
+ LogFactory.getLog(IdentityTableReduce.class.getName());
+
+ /**
+ * No aggregation, output pairs of (key, record)
+ * @param key
+ * @param values
+ * @param output
+ * @param reporter
+ * @throws IOException
+ */
+ public void reduce(ImmutableBytesWritable key, Iterator<Put> values,
+ OutputCollector<ImmutableBytesWritable, Put> output,
+ Reporter reporter)
+ throws IOException {
+
+ while(values.hasNext()) {
+ output.collect(key, values.next());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
new file mode 100644
index 0000000..81dbb15
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/MultiTableSnapshotInputFormat.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * MultiTableSnapshotInputFormat generalizes {@link org.apache.hadoop.hbase.mapred
+ * .TableSnapshotInputFormat}
+ * allowing a MapReduce job to run over one or more table snapshots, with one or more scans
+ * configured for each.
+ * Internally, the input format delegates to {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * and thus has the same performance advantages; see {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat} for
+ * more details.
+ * Usage is similar to TableSnapshotInputFormat, with the following exception:
+ * initMultiTableSnapshotMapperJob takes in a map
+ * from snapshot name to a collection of scans. For each snapshot in the map, each corresponding
+ * scan will be applied;
+ * the overall dataset for the job is defined by the concatenation of the regions and tables
+ * included in each snapshot/scan
+ * pair.
+ * {@link TableMapReduceUtil#initMultiTableSnapshotMapperJob(Map,
+ * Class, Class, Class, JobConf, boolean, Path)}
+ * can be used to configure the job.
+ * <pre>{@code
+ * Job job = new Job(conf);
+ * Map<String, Collection<Scan>> snapshotScans = ImmutableMap.of(
+ * "snapshot1", ImmutableList.of(new Scan(Bytes.toBytes("a"), Bytes.toBytes("b"))),
+ * "snapshot2", ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2")))
+ * );
+ * Path restoreDir = new Path("/tmp/snapshot_restore_dir")
+ * TableMapReduceUtil.initTableSnapshotMapperJob(
+ * snapshotScans, MyTableMapper.class, MyMapKeyOutput.class,
+ * MyMapOutputValueWritable.class, job, true, restoreDir);
+ * }
+ * </pre>
+ * Internally, this input format restores each snapshot into a subdirectory of the given tmp
+ * directory. Input splits and
+ * record readers are created as described in {@link org.apache.hadoop.hbase.mapreduce
+ * .TableSnapshotInputFormat}
+ * (one per region).
+ * See {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat} for more notes on
+ * permissioning; the
+ * same caveats apply here.
+ *
+ * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
+ * @see org.apache.hadoop.hbase.client.TableSnapshotScanner
+ */
+@InterfaceAudience.Public
+public class MultiTableSnapshotInputFormat extends TableSnapshotInputFormat
+ implements InputFormat<ImmutableBytesWritable, Result> {
+
+ private final MultiTableSnapshotInputFormatImpl delegate;
+
+ public MultiTableSnapshotInputFormat() {
+ this.delegate = new MultiTableSnapshotInputFormatImpl();
+ }
+
+ @Override
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ List<TableSnapshotInputFormatImpl.InputSplit> splits = delegate.getSplits(job);
+ InputSplit[] results = new InputSplit[splits.size()];
+ for (int i = 0; i < splits.size(); i++) {
+ results[i] = new TableSnapshotRegionSplit(splits.get(i));
+ }
+ return results;
+ }
+
+ @Override
+ public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
+ Reporter reporter) throws IOException {
+ return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
+ }
+
+ /**
+ * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
+ * restoreDir.
+ * Sets: {@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormatImpl#RESTORE_DIRS_KEY},
+ * {@link org.apache.hadoop.hbase.mapreduce
+ * .MultiTableSnapshotInputFormatImpl#SNAPSHOT_TO_SCANS_KEY}
+ *
+ * @param conf
+ * @param snapshotScans
+ * @param restoreDir
+ * @throws IOException
+ */
+ public static void setInput(Configuration conf, Map<String, Collection<Scan>> snapshotScans,
+ Path restoreDir) throws IOException {
+ new MultiTableSnapshotInputFormatImpl().setInput(conf, snapshotScans, restoreDir);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
new file mode 100644
index 0000000..43560fd
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/RowCounter.java
@@ -0,0 +1,121 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a map to count rows.
+ * Map outputs table rows IF the input row has columns that have content.
+ * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
+ */
+@InterfaceAudience.Public
+public class RowCounter extends Configured implements Tool {
+ // Name of this 'program'
+ static final String NAME = "rowcounter";
+
+ /**
+ * Mapper that runs the count.
+ */
+ static class RowCounterMapper
+ implements TableMap<ImmutableBytesWritable, Result> {
+ private static enum Counters {ROWS}
+
+ public void map(ImmutableBytesWritable row, Result values,
+ OutputCollector<ImmutableBytesWritable, Result> output,
+ Reporter reporter)
+ throws IOException {
+ // Count every row containing data, whether it's in qualifiers or values
+ reporter.incrCounter(Counters.ROWS, 1);
+ }
+
+ public void configure(JobConf jc) {
+ // Nothing to do.
+ }
+
+ public void close() throws IOException {
+ // Nothing to do.
+ }
+ }
+
+ /**
+ * @param args
+ * @return the JobConf
+ * @throws IOException
+ */
+ public JobConf createSubmittableJob(String[] args) throws IOException {
+ JobConf c = new JobConf(getConf(), getClass());
+ c.setJobName(NAME);
+ // Columns are space delimited
+ StringBuilder sb = new StringBuilder();
+ final int columnoffset = 2;
+ for (int i = columnoffset; i < args.length; i++) {
+ if (i > columnoffset) {
+ sb.append(" ");
+ }
+ sb.append(args[i]);
+ }
+ // Second argument is the table name.
+ TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
+ RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
+ c.setNumReduceTasks(0);
+ // First arg is the output directory.
+ FileOutputFormat.setOutputPath(c, new Path(args[0]));
+ return c;
+ }
+
+ static int printUsage() {
+ System.out.println(NAME +
+ " <outputdir> <tablename> <column1> [<column2>...]");
+ return -1;
+ }
+
+ public int run(final String[] args) throws Exception {
+ // Make sure there are at least 3 parameters
+ if (args.length < 3) {
+ System.err.println("ERROR: Wrong number of parameters: " + args.length);
+ return printUsage();
+ }
+ JobClient.runJob(createSubmittableJob(args));
+ return 0;
+ }
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
+ System.exit(errCode);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
new file mode 100644
index 0000000..208849a
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
@@ -0,0 +1,90 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Convert HBase tabular data into a format that is consumable by Map/Reduce.
+ */
+@InterfaceAudience.Public
+public class TableInputFormat extends TableInputFormatBase implements
+ JobConfigurable {
+ private static final Log LOG = LogFactory.getLog(TableInputFormat.class);
+
+ /**
+ * space delimited list of columns
+ */
+ public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
+
+ public void configure(JobConf job) {
+ try {
+ initialize(job);
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ }
+ }
+
+ @Override
+ protected void initialize(JobConf job) throws IOException {
+ Path[] tableNames = FileInputFormat.getInputPaths(job);
+ String colArg = job.get(COLUMN_LIST);
+ String[] colNames = colArg.split(" ");
+ byte [][] m_cols = new byte[colNames.length][];
+ for (int i = 0; i < m_cols.length; i++) {
+ m_cols[i] = Bytes.toBytes(colNames[i]);
+ }
+ setInputColumns(m_cols);
+ Connection connection = ConnectionFactory.createConnection(job);
+ initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
+ }
+
+ public void validateInput(JobConf job) throws IOException {
+ // expecting exactly one path
+ Path [] tableNames = FileInputFormat.getInputPaths(job);
+ if (tableNames == null || tableNames.length > 1) {
+ throw new IOException("expecting one table name");
+ }
+
+ // connected to table?
+ if (getTable() == null) {
+ throw new IOException("could not connect to table '" +
+ tableNames[0].getName() + "'");
+ }
+
+ // expecting at least one column
+ String colArg = job.get(COLUMN_LIST);
+ if (colArg == null || colArg.length() == 0) {
+ throw new IOException("expecting at least one column");
+ }
+ }
+}
[32/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
new file mode 100644
index 0000000..acf6ff8
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/replication/VerifyReplication.java
@@ -0,0 +1,700 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce.replication;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableSnapshotScanner;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.mapreduce.TableSplit;
+import org.apache.hadoop.hbase.replication.ReplicationException;
+import org.apache.hadoop.hbase.replication.ReplicationFactory;
+import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
+import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
+import org.apache.hadoop.hbase.replication.ReplicationPeers;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * This map-only job compares the data from a local table with a remote one.
+ * Every cell is compared and must have exactly the same keys (even timestamp)
+ * as well as same value. It is possible to restrict the job by time range and
+ * families. The peer id that's provided must match the one given when the
+ * replication stream was setup.
+ * <p>
+ * Two counters are provided, Verifier.Counters.GOODROWS and BADROWS. The reason
+ * for a why a row is different is shown in the map's log.
+ */
+public class VerifyReplication extends Configured implements Tool {
+
+ private static final Log LOG =
+ LogFactory.getLog(VerifyReplication.class);
+
+ public final static String NAME = "verifyrep";
+ private final static String PEER_CONFIG_PREFIX = NAME + ".peer.";
+ long startTime = 0;
+ long endTime = Long.MAX_VALUE;
+ int batch = -1;
+ int versions = -1;
+ String tableName = null;
+ String families = null;
+ String delimiter = "";
+ String peerId = null;
+ String rowPrefixes = null;
+ int sleepMsBeforeReCompare = 0;
+ boolean verbose = false;
+ boolean includeDeletedCells = false;
+ //Source table snapshot name
+ String sourceSnapshotName = null;
+ //Temp location in source cluster to restore source snapshot
+ String sourceSnapshotTmpDir = null;
+ //Peer table snapshot name
+ String peerSnapshotName = null;
+ //Temp location in peer cluster to restore peer snapshot
+ String peerSnapshotTmpDir = null;
+ //Peer cluster Hadoop FS address
+ String peerFSAddress = null;
+ //Peer cluster HBase root dir location
+ String peerHBaseRootAddress = null;
+
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ /**
+ * Map-only comparator for 2 tables
+ */
+ public static class Verifier
+ extends TableMapper<ImmutableBytesWritable, Put> {
+
+
+
+ public static enum Counters {
+ GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
+
+ private Connection sourceConnection;
+ private Table sourceTable;
+ private Connection replicatedConnection;
+ private Table replicatedTable;
+ private ResultScanner replicatedScanner;
+ private Result currentCompareRowInPeerTable;
+ private int sleepMsBeforeReCompare;
+ private String delimiter = "";
+ private boolean verbose = false;
+ private int batch = -1;
+
+ /**
+ * Map method that compares every scanned row with the equivalent from
+ * a distant cluster.
+ * @param row The current table row key.
+ * @param value The columns.
+ * @param context The current context.
+ * @throws IOException When something is broken with the data.
+ */
+ @Override
+ public void map(ImmutableBytesWritable row, final Result value,
+ Context context)
+ throws IOException {
+ if (replicatedScanner == null) {
+ Configuration conf = context.getConfiguration();
+ sleepMsBeforeReCompare = conf.getInt(NAME +".sleepMsBeforeReCompare", 0);
+ delimiter = conf.get(NAME + ".delimiter", "");
+ verbose = conf.getBoolean(NAME +".verbose", false);
+ batch = conf.getInt(NAME + ".batch", -1);
+ final Scan scan = new Scan();
+ if (batch > 0) {
+ scan.setBatch(batch);
+ }
+ scan.setCacheBlocks(false);
+ scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
+ long startTime = conf.getLong(NAME + ".startTime", 0);
+ long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
+ String families = conf.get(NAME + ".families", null);
+ if(families != null) {
+ String[] fams = families.split(",");
+ for(String fam : fams) {
+ scan.addFamily(Bytes.toBytes(fam));
+ }
+ }
+ boolean includeDeletedCells = conf.getBoolean(NAME + ".includeDeletedCells", false);
+ scan.setRaw(includeDeletedCells);
+ String rowPrefixes = conf.get(NAME + ".rowPrefixes", null);
+ setRowPrefixFilter(scan, rowPrefixes);
+ scan.setTimeRange(startTime, endTime);
+ int versions = conf.getInt(NAME+".versions", -1);
+ LOG.info("Setting number of version inside map as: " + versions);
+ if (versions >= 0) {
+ scan.setMaxVersions(versions);
+ }
+ TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName"));
+ sourceConnection = ConnectionFactory.createConnection(conf);
+ sourceTable = sourceConnection.getTable(tableName);
+
+ final InputSplit tableSplit = context.getInputSplit();
+
+ String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
+ Configuration peerConf = HBaseConfiguration.createClusterConf(conf,
+ zkClusterKey, PEER_CONFIG_PREFIX);
+
+ replicatedConnection = ConnectionFactory.createConnection(peerConf);
+ replicatedTable = replicatedConnection.getTable(tableName);
+ scan.setStartRow(value.getRow());
+
+ byte[] endRow = null;
+ if (tableSplit instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit) {
+ endRow = ((TableSnapshotInputFormat.TableSnapshotRegionSplit) tableSplit).getRegionInfo()
+ .getEndKey();
+ } else {
+ endRow = ((TableSplit) tableSplit).getEndRow();
+ }
+
+ scan.setStopRow(endRow);
+
+ String peerSnapshotName = conf.get(NAME + ".peerSnapshotName", null);
+ if (peerSnapshotName != null) {
+ String peerSnapshotTmpDir = conf.get(NAME + ".peerSnapshotTmpDir", null);
+ String peerFSAddress = conf.get(NAME + ".peerFSAddress", null);
+ String peerHBaseRootAddress = conf.get(NAME + ".peerHBaseRootAddress", null);
+ FileSystem.setDefaultUri(peerConf, peerFSAddress);
+ FSUtils.setRootDir(peerConf, new Path(peerHBaseRootAddress));
+ LOG.info("Using peer snapshot:" + peerSnapshotName + " with temp dir:"
+ + peerSnapshotTmpDir + " peer root uri:" + FSUtils.getRootDir(peerConf)
+ + " peerFSAddress:" + peerFSAddress);
+
+ replicatedScanner = new TableSnapshotScanner(peerConf,
+ new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName, scan);
+ } else {
+ replicatedScanner = replicatedTable.getScanner(scan);
+ }
+ currentCompareRowInPeerTable = replicatedScanner.next();
+ }
+ while (true) {
+ if (currentCompareRowInPeerTable == null) {
+ // reach the region end of peer table, row only in source table
+ logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
+ break;
+ }
+ int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
+ if (rowCmpRet == 0) {
+ // rowkey is same, need to compare the content of the row
+ try {
+ Result.compareResults(value, currentCompareRowInPeerTable);
+ context.getCounter(Counters.GOODROWS).increment(1);
+ if (verbose) {
+ LOG.info("Good row key: " + delimiter
+ + Bytes.toStringBinary(value.getRow()) + delimiter);
+ }
+ } catch (Exception e) {
+ logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
+ }
+ currentCompareRowInPeerTable = replicatedScanner.next();
+ break;
+ } else if (rowCmpRet < 0) {
+ // row only exists in source table
+ logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
+ break;
+ } else {
+ // row only exists in peer table
+ logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
+ currentCompareRowInPeerTable);
+ currentCompareRowInPeerTable = replicatedScanner.next();
+ }
+ }
+ }
+
+ private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
+ if (sleepMsBeforeReCompare > 0) {
+ Threads.sleep(sleepMsBeforeReCompare);
+ try {
+ Result sourceResult = sourceTable.get(new Get(row.getRow()));
+ Result replicatedResult = replicatedTable.get(new Get(row.getRow()));
+ Result.compareResults(sourceResult, replicatedResult);
+ if (!sourceResult.isEmpty()) {
+ context.getCounter(Counters.GOODROWS).increment(1);
+ if (verbose) {
+ LOG.info("Good row key (with recompare): " + delimiter + Bytes.toStringBinary(row.getRow())
+ + delimiter);
+ }
+ }
+ return;
+ } catch (Exception e) {
+ LOG.error("recompare fail after sleep, rowkey=" + delimiter +
+ Bytes.toStringBinary(row.getRow()) + delimiter);
+ }
+ }
+ context.getCounter(counter).increment(1);
+ context.getCounter(Counters.BADROWS).increment(1);
+ LOG.error(counter.toString() + ", rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) +
+ delimiter);
+ }
+
+ @Override
+ protected void cleanup(Context context) {
+ if (replicatedScanner != null) {
+ try {
+ while (currentCompareRowInPeerTable != null) {
+ logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
+ currentCompareRowInPeerTable);
+ currentCompareRowInPeerTable = replicatedScanner.next();
+ }
+ } catch (Exception e) {
+ LOG.error("fail to scan peer table in cleanup", e);
+ } finally {
+ replicatedScanner.close();
+ replicatedScanner = null;
+ }
+ }
+
+ if (sourceTable != null) {
+ try {
+ sourceTable.close();
+ } catch (IOException e) {
+ LOG.error("fail to close source table in cleanup", e);
+ }
+ }
+ if(sourceConnection != null){
+ try {
+ sourceConnection.close();
+ } catch (Exception e) {
+ LOG.error("fail to close source connection in cleanup", e);
+ }
+ }
+
+ if(replicatedTable != null){
+ try{
+ replicatedTable.close();
+ } catch (Exception e) {
+ LOG.error("fail to close replicated table in cleanup", e);
+ }
+ }
+ if(replicatedConnection != null){
+ try {
+ replicatedConnection.close();
+ } catch (Exception e) {
+ LOG.error("fail to close replicated connection in cleanup", e);
+ }
+ }
+ }
+ }
+
+ private static Pair<ReplicationPeerConfig, Configuration> getPeerQuorumConfig(
+ final Configuration conf, String peerId) throws IOException {
+ ZooKeeperWatcher localZKW = null;
+ ReplicationPeerZKImpl peer = null;
+ try {
+ localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
+ new Abortable() {
+ @Override public void abort(String why, Throwable e) {}
+ @Override public boolean isAborted() {return false;}
+ });
+
+ ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
+ rp.init();
+
+ Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
+ if (pair == null) {
+ throw new IOException("Couldn't get peer conf!");
+ }
+
+ return pair;
+ } catch (ReplicationException e) {
+ throw new IOException(
+ "An error occurred while trying to connect to the remove peer cluster", e);
+ } finally {
+ if (peer != null) {
+ peer.close();
+ }
+ if (localZKW != null) {
+ localZKW.close();
+ }
+ }
+ }
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws java.io.IOException When setting up the job fails.
+ */
+ public Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ if (!doCommandLine(args)) {
+ return null;
+ }
+ conf.set(NAME+".peerId", peerId);
+ conf.set(NAME+".tableName", tableName);
+ conf.setLong(NAME+".startTime", startTime);
+ conf.setLong(NAME+".endTime", endTime);
+ conf.setInt(NAME +".sleepMsBeforeReCompare", sleepMsBeforeReCompare);
+ conf.set(NAME + ".delimiter", delimiter);
+ conf.setInt(NAME + ".batch", batch);
+ conf.setBoolean(NAME +".verbose", verbose);
+ conf.setBoolean(NAME +".includeDeletedCells", includeDeletedCells);
+ if (families != null) {
+ conf.set(NAME+".families", families);
+ }
+ if (rowPrefixes != null){
+ conf.set(NAME+".rowPrefixes", rowPrefixes);
+ }
+
+ Pair<ReplicationPeerConfig, Configuration> peerConfigPair = getPeerQuorumConfig(conf, peerId);
+ ReplicationPeerConfig peerConfig = peerConfigPair.getFirst();
+ String peerQuorumAddress = peerConfig.getClusterKey();
+ LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " +
+ peerConfig.getConfiguration());
+ conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
+ HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX,
+ peerConfig.getConfiguration().entrySet());
+
+ conf.setInt(NAME + ".versions", versions);
+ LOG.info("Number of version: " + versions);
+
+ //Set Snapshot specific parameters
+ if (peerSnapshotName != null) {
+ conf.set(NAME + ".peerSnapshotName", peerSnapshotName);
+ conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir);
+ conf.set(NAME + ".peerFSAddress", peerFSAddress);
+ conf.set(NAME + ".peerHBaseRootAddress", peerHBaseRootAddress);
+
+ // This is to create HDFS delegation token for peer cluster in case of secured
+ conf.setStrings(MRJobConfig.JOB_NAMENODES, peerFSAddress);
+ }
+
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(VerifyReplication.class);
+
+ Scan scan = new Scan();
+ scan.setTimeRange(startTime, endTime);
+ scan.setRaw(includeDeletedCells);
+ scan.setCacheBlocks(false);
+ if (batch > 0) {
+ scan.setBatch(batch);
+ }
+ if (versions >= 0) {
+ scan.setMaxVersions(versions);
+ LOG.info("Number of versions set to " + versions);
+ }
+ if(families != null) {
+ String[] fams = families.split(",");
+ for(String fam : fams) {
+ scan.addFamily(Bytes.toBytes(fam));
+ }
+ }
+
+ setRowPrefixFilter(scan, rowPrefixes);
+
+ if (sourceSnapshotName != null) {
+ Path snapshotTempPath = new Path(sourceSnapshotTmpDir);
+ LOG.info(
+ "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir);
+ TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null,
+ null, job, true, snapshotTempPath);
+ } else {
+ TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job);
+ }
+ Configuration peerClusterConf = peerConfigPair.getSecond();
+ // Obtain the auth token from peer cluster
+ TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf);
+
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.setNumReduceTasks(0);
+ return job;
+ }
+
+ private static void setRowPrefixFilter(Scan scan, String rowPrefixes) {
+ if (rowPrefixes != null && !rowPrefixes.isEmpty()) {
+ String[] rowPrefixArray = rowPrefixes.split(",");
+ Arrays.sort(rowPrefixArray);
+ FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
+ for (String prefix : rowPrefixArray) {
+ Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
+ filterList.addFilter(filter);
+ }
+ scan.setFilter(filterList);
+ byte[] startPrefixRow = Bytes.toBytes(rowPrefixArray[0]);
+ byte[] lastPrefixRow = Bytes.toBytes(rowPrefixArray[rowPrefixArray.length -1]);
+ setStartAndStopRows(scan, startPrefixRow, lastPrefixRow);
+ }
+ }
+
+ private static void setStartAndStopRows(Scan scan, byte[] startPrefixRow, byte[] lastPrefixRow) {
+ scan.setStartRow(startPrefixRow);
+ byte[] stopRow = Bytes.add(Bytes.head(lastPrefixRow, lastPrefixRow.length - 1),
+ new byte[]{(byte) (lastPrefixRow[lastPrefixRow.length - 1] + 1)});
+ scan.setStopRow(stopRow);
+ }
+
+ @VisibleForTesting
+ public boolean doCommandLine(final String[] args) {
+ if (args.length < 2) {
+ printUsage(null);
+ return false;
+ }
+ try {
+ for (int i = 0; i < args.length; i++) {
+ String cmd = args[i];
+ if (cmd.equals("-h") || cmd.startsWith("--h")) {
+ printUsage(null);
+ return false;
+ }
+
+ final String startTimeArgKey = "--starttime=";
+ if (cmd.startsWith(startTimeArgKey)) {
+ startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
+ continue;
+ }
+
+ final String endTimeArgKey = "--endtime=";
+ if (cmd.startsWith(endTimeArgKey)) {
+ endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
+ continue;
+ }
+
+ final String includeDeletedCellsArgKey = "--raw";
+ if (cmd.equals(includeDeletedCellsArgKey)) {
+ includeDeletedCells = true;
+ continue;
+ }
+
+ final String versionsArgKey = "--versions=";
+ if (cmd.startsWith(versionsArgKey)) {
+ versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
+ continue;
+ }
+
+ final String batchArgKey = "--batch=";
+ if (cmd.startsWith(batchArgKey)) {
+ batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
+ continue;
+ }
+
+ final String familiesArgKey = "--families=";
+ if (cmd.startsWith(familiesArgKey)) {
+ families = cmd.substring(familiesArgKey.length());
+ continue;
+ }
+
+ final String rowPrefixesKey = "--row-prefixes=";
+ if (cmd.startsWith(rowPrefixesKey)){
+ rowPrefixes = cmd.substring(rowPrefixesKey.length());
+ continue;
+ }
+
+ final String delimiterArgKey = "--delimiter=";
+ if (cmd.startsWith(delimiterArgKey)) {
+ delimiter = cmd.substring(delimiterArgKey.length());
+ continue;
+ }
+
+ final String sleepToReCompareKey = "--recomparesleep=";
+ if (cmd.startsWith(sleepToReCompareKey)) {
+ sleepMsBeforeReCompare = Integer.parseInt(cmd.substring(sleepToReCompareKey.length()));
+ continue;
+ }
+ final String verboseKey = "--verbose";
+ if (cmd.startsWith(verboseKey)) {
+ verbose = true;
+ continue;
+ }
+
+ final String sourceSnapshotNameArgKey = "--sourceSnapshotName=";
+ if (cmd.startsWith(sourceSnapshotNameArgKey)) {
+ sourceSnapshotName = cmd.substring(sourceSnapshotNameArgKey.length());
+ continue;
+ }
+
+ final String sourceSnapshotTmpDirArgKey = "--sourceSnapshotTmpDir=";
+ if (cmd.startsWith(sourceSnapshotTmpDirArgKey)) {
+ sourceSnapshotTmpDir = cmd.substring(sourceSnapshotTmpDirArgKey.length());
+ continue;
+ }
+
+ final String peerSnapshotNameArgKey = "--peerSnapshotName=";
+ if (cmd.startsWith(peerSnapshotNameArgKey)) {
+ peerSnapshotName = cmd.substring(peerSnapshotNameArgKey.length());
+ continue;
+ }
+
+ final String peerSnapshotTmpDirArgKey = "--peerSnapshotTmpDir=";
+ if (cmd.startsWith(peerSnapshotTmpDirArgKey)) {
+ peerSnapshotTmpDir = cmd.substring(peerSnapshotTmpDirArgKey.length());
+ continue;
+ }
+
+ final String peerFSAddressArgKey = "--peerFSAddress=";
+ if (cmd.startsWith(peerFSAddressArgKey)) {
+ peerFSAddress = cmd.substring(peerFSAddressArgKey.length());
+ continue;
+ }
+
+ final String peerHBaseRootAddressArgKey = "--peerHBaseRootAddress=";
+ if (cmd.startsWith(peerHBaseRootAddressArgKey)) {
+ peerHBaseRootAddress = cmd.substring(peerHBaseRootAddressArgKey.length());
+ continue;
+ }
+
+ if (cmd.startsWith("--")) {
+ printUsage("Invalid argument '" + cmd + "'");
+ return false;
+ }
+
+ if (i == args.length-2) {
+ peerId = cmd;
+ }
+
+ if (i == args.length-1) {
+ tableName = cmd;
+ }
+ }
+
+ if ((sourceSnapshotName != null && sourceSnapshotTmpDir == null)
+ || (sourceSnapshotName == null && sourceSnapshotTmpDir != null)) {
+ printUsage("Source snapshot name and snapshot temp location should be provided"
+ + " to use snapshots in source cluster");
+ return false;
+ }
+
+ if (peerSnapshotName != null || peerSnapshotTmpDir != null || peerFSAddress != null
+ || peerHBaseRootAddress != null) {
+ if (peerSnapshotName == null || peerSnapshotTmpDir == null || peerFSAddress == null
+ || peerHBaseRootAddress == null) {
+ printUsage(
+ "Peer snapshot name, peer snapshot temp location, Peer HBase root address and "
+ + "peer FSAddress should be provided to use snapshots in peer cluster");
+ return false;
+ }
+ }
+
+ // This is to avoid making recompare calls to source/peer tables when snapshots are used
+ if ((sourceSnapshotName != null || peerSnapshotName != null) && sleepMsBeforeReCompare > 0) {
+ printUsage(
+ "Using sleepMsBeforeReCompare along with snapshots is not allowed as snapshots are immutable");
+ return false;
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ printUsage("Can't start because " + e.getMessage());
+ return false;
+ }
+ return true;
+ }
+
+ /*
+ * @param errorMsg Error message. Can be null.
+ */
+ private static void printUsage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ System.err.println("Usage: verifyrep [--starttime=X]" +
+ " [--endtime=Y] [--families=A] [--row-prefixes=B] [--delimiter=] [--recomparesleep=] " +
+ "[--batch=] [--verbose] [--sourceSnapshotName=P] [--sourceSnapshotTmpDir=Q] [--peerSnapshotName=R] "
+ + "[--peerSnapshotTmpDir=S] [--peerFSAddress=T] [--peerHBaseRootAddress=U] <peerid> <tablename>");
+ System.err.println();
+ System.err.println("Options:");
+ System.err.println(" starttime beginning of the time range");
+ System.err.println(" without endtime means from starttime to forever");
+ System.err.println(" endtime end of the time range");
+ System.err.println(" versions number of cell versions to verify");
+ System.err.println(" batch batch count for scan, " +
+ "note that result row counts will no longer be actual number of rows when you use this option");
+ System.err.println(" raw includes raw scan if given in options");
+ System.err.println(" families comma-separated list of families to copy");
+ System.err.println(" row-prefixes comma-separated list of row key prefixes to filter on ");
+ System.err.println(" delimiter the delimiter used in display around rowkey");
+ System.err.println(" recomparesleep milliseconds to sleep before recompare row, " +
+ "default value is 0 which disables the recompare.");
+ System.err.println(" verbose logs row keys of good rows");
+ System.err.println(" sourceSnapshotName Source Snapshot Name");
+ System.err.println(" sourceSnapshotTmpDir Tmp location to restore source table snapshot");
+ System.err.println(" peerSnapshotName Peer Snapshot Name");
+ System.err.println(" peerSnapshotTmpDir Tmp location to restore peer table snapshot");
+ System.err.println(" peerFSAddress Peer cluster Hadoop FS address");
+ System.err.println(" peerHBaseRootAddress Peer cluster HBase root location");
+ System.err.println();
+ System.err.println("Args:");
+ System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
+ System.err.println(" tablename Name of the table to verify");
+ System.err.println();
+ System.err.println("Examples:");
+ System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
+ System.err.println(" $ hbase " +
+ "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
+ " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Configuration conf = this.getConf();
+ Job job = createSubmittableJob(conf, args);
+ if (job != null) {
+ return job.waitForCompletion(true) ? 0 : 1;
+ }
+ return 1;
+ }
+
+ /**
+ * Main entry point.
+ *
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
+ System.exit(res);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
new file mode 100644
index 0000000..eb9a5f7
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/regionserver/CompactionTool.java
@@ -0,0 +1,470 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.mapreduce.JobUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
+import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.LineReader;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/*
+ * The CompactionTool allows to execute a compaction specifying a:
+ * <ul>
+ * <li>table folder (all regions and families will be compacted)
+ * <li>region folder (all families in the region will be compacted)
+ * <li>family folder (the store files will be compacted)
+ * </ul>
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class CompactionTool extends Configured implements Tool {
+ private static final Log LOG = LogFactory.getLog(CompactionTool.class);
+
+ private final static String CONF_TMP_DIR = "hbase.tmp.dir";
+ private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
+ private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
+ private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
+ private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
+
+ /**
+ * Class responsible to execute the Compaction on the specified path.
+ * The path can be a table, region or family directory.
+ */
+ private static class CompactionWorker {
+ private final boolean keepCompactedFiles;
+ private final boolean deleteCompacted;
+ private final Configuration conf;
+ private final FileSystem fs;
+ private final Path tmpDir;
+
+ public CompactionWorker(final FileSystem fs, final Configuration conf) {
+ this.conf = conf;
+ this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
+ this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
+ this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
+ this.fs = fs;
+ }
+
+ /**
+ * Execute the compaction on the specified path.
+ *
+ * @param path Directory path on which to run compaction.
+ * @param compactOnce Execute just a single step of compaction.
+ * @param major Request major compaction.
+ */
+ public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
+ if (isFamilyDir(fs, path)) {
+ Path regionDir = path.getParent();
+ Path tableDir = regionDir.getParent();
+ TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+ HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
+ compactStoreFiles(tableDir, htd, hri,
+ path.getName(), compactOnce, major);
+ } else if (isRegionDir(fs, path)) {
+ Path tableDir = path.getParent();
+ TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+ compactRegion(tableDir, htd, path, compactOnce, major);
+ } else if (isTableDir(fs, path)) {
+ compactTable(path, compactOnce, major);
+ } else {
+ throw new IOException(
+ "Specified path is not a table, region or family directory. path=" + path);
+ }
+ }
+
+ private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
+ throws IOException {
+ TableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
+ for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
+ compactRegion(tableDir, htd, regionDir, compactOnce, major);
+ }
+ }
+
+ private void compactRegion(final Path tableDir, final TableDescriptor htd,
+ final Path regionDir, final boolean compactOnce, final boolean major)
+ throws IOException {
+ HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
+ for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
+ compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
+ }
+ }
+
+ /**
+ * Execute the actual compaction job.
+ * If the compact once flag is not specified, execute the compaction until
+ * no more compactions are needed. Uses the Configuration settings provided.
+ */
+ private void compactStoreFiles(final Path tableDir, final TableDescriptor htd,
+ final HRegionInfo hri, final String familyName, final boolean compactOnce,
+ final boolean major) throws IOException {
+ HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
+ LOG.info("Compact table=" + htd.getTableName() +
+ " region=" + hri.getRegionNameAsString() +
+ " family=" + familyName);
+ if (major) {
+ store.triggerMajorCompaction();
+ }
+ do {
+ CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
+ if (compaction == null) break;
+ List<StoreFile> storeFiles =
+ store.compact(compaction, NoLimitThroughputController.INSTANCE);
+ if (storeFiles != null && !storeFiles.isEmpty()) {
+ if (keepCompactedFiles && deleteCompacted) {
+ for (StoreFile storeFile: storeFiles) {
+ fs.delete(storeFile.getPath(), false);
+ }
+ }
+ }
+ } while (store.needsCompaction() && !compactOnce);
+ }
+
+ /**
+ * Create a "mock" HStore that uses the tmpDir specified by the user and
+ * the store dir to compact as source.
+ */
+ private static HStore getStore(final Configuration conf, final FileSystem fs,
+ final Path tableDir, final TableDescriptor htd, final HRegionInfo hri,
+ final String familyName, final Path tempDir) throws IOException {
+ HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
+ @Override
+ public Path getTempDir() {
+ return tempDir;
+ }
+ };
+ HRegion region = new HRegion(regionFs, null, conf, htd, null);
+ return new HStore(region, htd.getColumnFamily(Bytes.toBytes(familyName)), conf);
+ }
+ }
+
+ private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
+ Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
+ return fs.exists(regionInfo);
+ }
+
+ private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
+ return FSTableDescriptors.getTableInfoPath(fs, path) != null;
+ }
+
+ private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
+ return isRegionDir(fs, path.getParent());
+ }
+
+ private static class CompactionMapper
+ extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
+ private CompactionWorker compactor = null;
+ private boolean compactOnce = false;
+ private boolean major = false;
+
+ @Override
+ public void setup(Context context) {
+ Configuration conf = context.getConfiguration();
+ compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
+ major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
+
+ try {
+ FileSystem fs = FileSystem.get(conf);
+ this.compactor = new CompactionWorker(fs, conf);
+ } catch (IOException e) {
+ throw new RuntimeException("Could not get the input FileSystem", e);
+ }
+ }
+
+ @Override
+ public void map(LongWritable key, Text value, Context context)
+ throws InterruptedException, IOException {
+ Path path = new Path(value.toString());
+ this.compactor.compact(path, compactOnce, major);
+ }
+ }
+
+ /**
+ * Input format that uses store files block location as input split locality.
+ */
+ private static class CompactionInputFormat extends TextInputFormat {
+ @Override
+ protected boolean isSplitable(JobContext context, Path file) {
+ return true;
+ }
+
+ /**
+ * Returns a split for each store files directory using the block location
+ * of each file as locality reference.
+ */
+ @Override
+ public List<InputSplit> getSplits(JobContext job) throws IOException {
+ List<InputSplit> splits = new ArrayList<>();
+ List<FileStatus> files = listStatus(job);
+
+ Text key = new Text();
+ for (FileStatus file: files) {
+ Path path = file.getPath();
+ FileSystem fs = path.getFileSystem(job.getConfiguration());
+ LineReader reader = new LineReader(fs.open(path));
+ long pos = 0;
+ int n;
+ try {
+ while ((n = reader.readLine(key)) > 0) {
+ String[] hosts = getStoreDirHosts(fs, path);
+ splits.add(new FileSplit(path, pos, n, hosts));
+ pos += n;
+ }
+ } finally {
+ reader.close();
+ }
+ }
+
+ return splits;
+ }
+
+ /**
+ * return the top hosts of the store files, used by the Split
+ */
+ private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
+ throws IOException {
+ FileStatus[] files = FSUtils.listStatus(fs, path);
+ if (files == null) {
+ return new String[] {};
+ }
+
+ HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
+ for (FileStatus hfileStatus: files) {
+ HDFSBlocksDistribution storeFileBlocksDistribution =
+ FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
+ hdfsBlocksDistribution.add(storeFileBlocksDistribution);
+ }
+
+ List<String> hosts = hdfsBlocksDistribution.getTopHosts();
+ return hosts.toArray(new String[hosts.size()]);
+ }
+
+ /**
+ * Create the input file for the given directories to compact.
+ * The file is a TextFile with each line corrisponding to a
+ * store files directory to compact.
+ */
+ public static void createInputFile(final FileSystem fs, final Path path,
+ final Set<Path> toCompactDirs) throws IOException {
+ // Extract the list of store dirs
+ List<Path> storeDirs = new LinkedList<>();
+ for (Path compactDir: toCompactDirs) {
+ if (isFamilyDir(fs, compactDir)) {
+ storeDirs.add(compactDir);
+ } else if (isRegionDir(fs, compactDir)) {
+ for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
+ storeDirs.add(familyDir);
+ }
+ } else if (isTableDir(fs, compactDir)) {
+ // Lookup regions
+ for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
+ for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
+ storeDirs.add(familyDir);
+ }
+ }
+ } else {
+ throw new IOException(
+ "Specified path is not a table, region or family directory. path=" + compactDir);
+ }
+ }
+
+ // Write Input File
+ FSDataOutputStream stream = fs.create(path);
+ LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
+ try {
+ final byte[] newLine = Bytes.toBytes("\n");
+ for (Path storeDir: storeDirs) {
+ stream.write(Bytes.toBytes(storeDir.toString()));
+ stream.write(newLine);
+ }
+ } finally {
+ stream.close();
+ }
+ }
+ }
+
+ /**
+ * Execute compaction, using a Map-Reduce job.
+ */
+ private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
+ final boolean compactOnce, final boolean major) throws Exception {
+ Configuration conf = getConf();
+ conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
+ conf.setBoolean(CONF_COMPACT_MAJOR, major);
+
+ Job job = new Job(conf);
+ job.setJobName("CompactionTool");
+ job.setJarByClass(CompactionTool.class);
+ job.setMapperClass(CompactionMapper.class);
+ job.setInputFormatClass(CompactionInputFormat.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+ job.setMapSpeculativeExecution(false);
+ job.setNumReduceTasks(0);
+
+ // add dependencies (including HBase ones)
+ TableMapReduceUtil.addDependencyJars(job);
+
+ Path stagingDir = JobUtil.getStagingDir(conf);
+ try {
+ // Create input file with the store dirs
+ Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
+ CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
+ CompactionInputFormat.addInputPath(job, inputPath);
+
+ // Initialize credential for secure cluster
+ TableMapReduceUtil.initCredentials(job);
+
+ // Start the MR Job and wait
+ return job.waitForCompletion(true) ? 0 : 1;
+ } finally {
+ fs.delete(stagingDir, true);
+ }
+ }
+
+ /**
+ * Execute compaction, from this client, one path at the time.
+ */
+ private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
+ final boolean compactOnce, final boolean major) throws IOException {
+ CompactionWorker worker = new CompactionWorker(fs, getConf());
+ for (Path path: toCompactDirs) {
+ worker.compact(path, compactOnce, major);
+ }
+ return 0;
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Set<Path> toCompactDirs = new HashSet<>();
+ boolean compactOnce = false;
+ boolean major = false;
+ boolean mapred = false;
+
+ Configuration conf = getConf();
+ FileSystem fs = FileSystem.get(conf);
+
+ try {
+ for (int i = 0; i < args.length; ++i) {
+ String opt = args[i];
+ if (opt.equals("-compactOnce")) {
+ compactOnce = true;
+ } else if (opt.equals("-major")) {
+ major = true;
+ } else if (opt.equals("-mapred")) {
+ mapred = true;
+ } else if (!opt.startsWith("-")) {
+ Path path = new Path(opt);
+ FileStatus status = fs.getFileStatus(path);
+ if (!status.isDirectory()) {
+ printUsage("Specified path is not a directory. path=" + path);
+ return 1;
+ }
+ toCompactDirs.add(path);
+ } else {
+ printUsage();
+ }
+ }
+ } catch (Exception e) {
+ printUsage(e.getMessage());
+ return 1;
+ }
+
+ if (toCompactDirs.isEmpty()) {
+ printUsage("No directories to compact specified.");
+ return 1;
+ }
+
+ // Execute compaction!
+ if (mapred) {
+ return doMapReduce(fs, toCompactDirs, compactOnce, major);
+ } else {
+ return doClient(fs, toCompactDirs, compactOnce, major);
+ }
+ }
+
+ private void printUsage() {
+ printUsage(null);
+ }
+
+ private void printUsage(final String message) {
+ if (message != null && message.length() > 0) {
+ System.err.println(message);
+ }
+ System.err.println("Usage: java " + this.getClass().getName() + " \\");
+ System.err.println(" [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
+ System.err.println();
+ System.err.println("Options:");
+ System.err.println(" mapred Use MapReduce to run compaction.");
+ System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
+ System.err.println(" major Trigger major compaction.");
+ System.err.println();
+ System.err.println("Note: -D properties will be applied to the conf used. ");
+ System.err.println("For example: ");
+ System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
+ System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
+ System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
+ System.err.println();
+ System.err.println("Examples:");
+ System.err.println(" To compact the full 'TestTable' using MapReduce:");
+ System.err.println(" $ hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
+ System.err.println();
+ System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
+ System.err.println(" $ hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
+ }
+}
[33/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
new file mode 100644
index 0000000..403051f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -0,0 +1,410 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
+import org.apache.hadoop.hbase.client.IsolationLevel;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.Writable;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Hadoop MR API-agnostic implementation for mapreduce over table snapshots.
+ */
+@InterfaceAudience.Private
+public class TableSnapshotInputFormatImpl {
+ // TODO: Snapshots files are owned in fs by the hbase user. There is no
+ // easy way to delegate access.
+
+ public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
+
+ private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
+ // key for specifying the root dir of the restored snapshot
+ protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
+
+ /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
+ private static final String LOCALITY_CUTOFF_MULTIPLIER =
+ "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
+ private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
+
+ /**
+ * Implementation class for InputSplit logic common between mapred and mapreduce.
+ */
+ public static class InputSplit implements Writable {
+
+ private TableDescriptor htd;
+ private HRegionInfo regionInfo;
+ private String[] locations;
+ private String scan;
+ private String restoreDir;
+
+ // constructor for mapreduce framework / Writable
+ public InputSplit() {}
+
+ public InputSplit(TableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
+ Scan scan, Path restoreDir) {
+ this.htd = htd;
+ this.regionInfo = regionInfo;
+ if (locations == null || locations.isEmpty()) {
+ this.locations = new String[0];
+ } else {
+ this.locations = locations.toArray(new String[locations.size()]);
+ }
+ try {
+ this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
+ } catch (IOException e) {
+ LOG.warn("Failed to convert Scan to String", e);
+ }
+
+ this.restoreDir = restoreDir.toString();
+ }
+
+ public TableDescriptor getHtd() {
+ return htd;
+ }
+
+ public String getScan() {
+ return scan;
+ }
+
+ public String getRestoreDir() {
+ return restoreDir;
+ }
+
+ public long getLength() {
+ //TODO: We can obtain the file sizes of the snapshot here.
+ return 0;
+ }
+
+ public String[] getLocations() {
+ return locations;
+ }
+
+ public TableDescriptor getTableDescriptor() {
+ return htd;
+ }
+
+ public HRegionInfo getRegionInfo() {
+ return regionInfo;
+ }
+
+ // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
+ // doing this wrapping with Writables.
+ @Override
+ public void write(DataOutput out) throws IOException {
+ TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
+ .setTable(ProtobufUtil.toTableSchema(htd))
+ .setRegion(HRegionInfo.convert(regionInfo));
+
+ for (String location : locations) {
+ builder.addLocations(location);
+ }
+
+ TableSnapshotRegionSplit split = builder.build();
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ split.writeTo(baos);
+ baos.close();
+ byte[] buf = baos.toByteArray();
+ out.writeInt(buf.length);
+ out.write(buf);
+
+ Bytes.writeByteArray(out, Bytes.toBytes(scan));
+ Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
+
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ int len = in.readInt();
+ byte[] buf = new byte[len];
+ in.readFully(buf);
+ TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
+ this.htd = ProtobufUtil.toTableDescriptor(split.getTable());
+ this.regionInfo = HRegionInfo.convert(split.getRegion());
+ List<String> locationsList = split.getLocationsList();
+ this.locations = locationsList.toArray(new String[locationsList.size()]);
+
+ this.scan = Bytes.toString(Bytes.readByteArray(in));
+ this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
+ }
+ }
+
+ /**
+ * Implementation class for RecordReader logic common between mapred and mapreduce.
+ */
+ public static class RecordReader {
+ private InputSplit split;
+ private Scan scan;
+ private Result result = null;
+ private ImmutableBytesWritable row = null;
+ private ClientSideRegionScanner scanner;
+
+ public ClientSideRegionScanner getScanner() {
+ return scanner;
+ }
+
+ public void initialize(InputSplit split, Configuration conf) throws IOException {
+ this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
+ this.split = split;
+ TableDescriptor htd = split.htd;
+ HRegionInfo hri = this.split.getRegionInfo();
+ FileSystem fs = FSUtils.getCurrentFileSystem(conf);
+
+
+ // region is immutable, this should be fine,
+ // otherwise we have to set the thread read point
+ scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
+ // disable caching of data blocks
+ scan.setCacheBlocks(false);
+
+ scanner =
+ new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
+ }
+
+ public boolean nextKeyValue() throws IOException {
+ result = scanner.next();
+ if (result == null) {
+ //we are done
+ return false;
+ }
+
+ if (this.row == null) {
+ this.row = new ImmutableBytesWritable();
+ }
+ this.row.set(result.getRow());
+ return true;
+ }
+
+ public ImmutableBytesWritable getCurrentKey() {
+ return row;
+ }
+
+ public Result getCurrentValue() {
+ return result;
+ }
+
+ public long getPos() {
+ return 0;
+ }
+
+ public float getProgress() {
+ return 0; // TODO: use total bytes to estimate
+ }
+
+ public void close() {
+ if (this.scanner != null) {
+ this.scanner.close();
+ }
+ }
+ }
+
+ public static List<InputSplit> getSplits(Configuration conf) throws IOException {
+ String snapshotName = getSnapshotName(conf);
+
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+
+ SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
+
+ List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
+
+ // TODO: mapred does not support scan as input API. Work around for now.
+ Scan scan = extractScanFromConf(conf);
+ // the temp dir where the snapshot is restored
+ Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
+
+ return getSplits(scan, manifest, regionInfos, restoreDir, conf);
+ }
+
+ public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
+ List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
+ if (regionManifests == null) {
+ throw new IllegalArgumentException("Snapshot seems empty");
+ }
+
+ List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
+
+ for (SnapshotRegionManifest regionManifest : regionManifests) {
+ HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
+ if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+ continue;
+ }
+ regionInfos.add(hri);
+ }
+ return regionInfos;
+ }
+
+ public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
+ Path rootDir, FileSystem fs) throws IOException {
+ Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
+ SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
+ return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
+ }
+
+ public static Scan extractScanFromConf(Configuration conf) throws IOException {
+ Scan scan = null;
+ if (conf.get(TableInputFormat.SCAN) != null) {
+ scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
+ } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
+ String[] columns =
+ conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
+ scan = new Scan();
+ for (String col : columns) {
+ scan.addFamily(Bytes.toBytes(col));
+ }
+ } else {
+ throw new IllegalArgumentException("Unable to create scan");
+ }
+ return scan;
+ }
+
+ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
+ List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
+ // load table descriptor
+ TableDescriptor htd = manifest.getTableDescriptor();
+
+ Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
+
+ List<InputSplit> splits = new ArrayList<>();
+ for (HRegionInfo hri : regionManifests) {
+ // load region descriptor
+
+ if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
+ hri.getEndKey())) {
+ // compute HDFS locations from snapshot files (which will get the locations for
+ // referred hfiles)
+ List<String> hosts = getBestLocations(conf,
+ HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
+
+ int len = Math.min(3, hosts.size());
+ hosts = hosts.subList(0, len);
+ splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
+ }
+ }
+
+ return splits;
+
+ }
+
+ /**
+ * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
+ * weights into account, thus will treat every location passed from the input split as equal. We
+ * do not want to blindly pass all the locations, since we are creating one split per region, and
+ * the region's blocks are all distributed throughout the cluster unless favorite node assignment
+ * is used. On the expected stable case, only one location will contain most of the blocks as
+ * local.
+ * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
+ * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
+ * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
+ * host with the best locality.
+ */
+ public static List<String> getBestLocations(
+ Configuration conf, HDFSBlocksDistribution blockDistribution) {
+ List<String> locations = new ArrayList<>(3);
+
+ HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
+
+ if (hostAndWeights.length == 0) {
+ return locations;
+ }
+
+ HostAndWeight topHost = hostAndWeights[0];
+ locations.add(topHost.getHost());
+
+ // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
+ double cutoffMultiplier
+ = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
+
+ double filterWeight = topHost.getWeight() * cutoffMultiplier;
+
+ for (int i = 1; i < hostAndWeights.length; i++) {
+ if (hostAndWeights[i].getWeight() >= filterWeight) {
+ locations.add(hostAndWeights[i].getHost());
+ } else {
+ break;
+ }
+ }
+
+ return locations;
+ }
+
+ private static String getSnapshotName(Configuration conf) {
+ String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
+ if (snapshotName == null) {
+ throw new IllegalArgumentException("Snapshot name must be provided");
+ }
+ return snapshotName;
+ }
+
+ /**
+ * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
+ * @param conf the job to configuration
+ * @param snapshotName the name of the snapshot to read from
+ * @param restoreDir a temporary directory to restore the snapshot into. Current user should
+ * have write permissions to this directory, and this should not be a subdirectory of rootdir.
+ * After the job is finished, restoreDir can be deleted.
+ * @throws IOException if an error occurs
+ */
+ public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
+ throws IOException {
+ conf.set(SNAPSHOT_NAME_KEY, snapshotName);
+
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+
+ restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
+
+ // TODO: restore from record readers to parallelize.
+ RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
+
+ conf.set(RESTORE_DIR_KEY, restoreDir.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
new file mode 100644
index 0000000..13c7c67
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
@@ -0,0 +1,395 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+/**
+ * A table split corresponds to a key range (low, high) and an optional scanner.
+ * All references to row below refer to the key of the row.
+ */
+@InterfaceAudience.Public
+public class TableSplit extends InputSplit
+implements Writable, Comparable<TableSplit> {
+ /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
+ @Deprecated
+ public static final Log LOG = LogFactory.getLog(TableSplit.class);
+
+ // should be < 0 (@see #readFields(DataInput))
+ // version 1 supports Scan data member
+ enum Version {
+ UNVERSIONED(0),
+ // Initial number we put on TableSplit when we introduced versioning.
+ INITIAL(-1),
+ // Added an encoded region name field for easier identification of split -> region
+ WITH_ENCODED_REGION_NAME(-2);
+
+ final int code;
+ static final Version[] byCode;
+ static {
+ byCode = Version.values();
+ for (int i = 0; i < byCode.length; i++) {
+ if (byCode[i].code != -1 * i) {
+ throw new AssertionError("Values in this enum should be descending by one");
+ }
+ }
+ }
+
+ Version(int code) {
+ this.code = code;
+ }
+
+ boolean atLeast(Version other) {
+ return code <= other.code;
+ }
+
+ static Version fromCode(int code) {
+ return byCode[code * -1];
+ }
+ }
+
+ private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
+ private TableName tableName;
+ private byte [] startRow;
+ private byte [] endRow;
+ private String regionLocation;
+ private String encodedRegionName = "";
+ private String scan = ""; // stores the serialized form of the Scan
+ private long length; // Contains estimation of region size in bytes
+
+ /** Default constructor. */
+ public TableSplit() {
+ this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
+ HConstants.EMPTY_BYTE_ARRAY, "");
+ }
+
+ /**
+ * Creates a new instance while assigning all variables.
+ * Length of region is set to 0
+ * Encoded name of the region is set to blank
+ *
+ * @param tableName The name of the current table.
+ * @param scan The scan associated with this split.
+ * @param startRow The start row of the split.
+ * @param endRow The end row of the split.
+ * @param location The location of the region.
+ */
+ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+ final String location) {
+ this(tableName, scan, startRow, endRow, location, 0L);
+ }
+
+ /**
+ * Creates a new instance while assigning all variables.
+ * Encoded name of region is set to blank
+ *
+ * @param tableName The name of the current table.
+ * @param scan The scan associated with this split.
+ * @param startRow The start row of the split.
+ * @param endRow The end row of the split.
+ * @param location The location of the region.
+ */
+ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+ final String location, long length) {
+ this(tableName, scan, startRow, endRow, location, "", length);
+ }
+
+ /**
+ * Creates a new instance while assigning all variables.
+ *
+ * @param tableName The name of the current table.
+ * @param scan The scan associated with this split.
+ * @param startRow The start row of the split.
+ * @param endRow The end row of the split.
+ * @param encodedRegionName The region ID.
+ * @param location The location of the region.
+ */
+ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
+ final String location, final String encodedRegionName, long length) {
+ this.tableName = tableName;
+ try {
+ this.scan =
+ (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
+ } catch (IOException e) {
+ LOG.warn("Failed to convert Scan to String", e);
+ }
+ this.startRow = startRow;
+ this.endRow = endRow;
+ this.regionLocation = location;
+ this.encodedRegionName = encodedRegionName;
+ this.length = length;
+ }
+
+ /**
+ * Creates a new instance without a scanner.
+ * Length of region is set to 0
+ *
+ * @param tableName The name of the current table.
+ * @param startRow The start row of the split.
+ * @param endRow The end row of the split.
+ * @param location The location of the region.
+ */
+ public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
+ final String location) {
+ this(tableName, null, startRow, endRow, location);
+ }
+
+ /**
+ * Creates a new instance without a scanner.
+ *
+ * @param tableName The name of the current table.
+ * @param startRow The start row of the split.
+ * @param endRow The end row of the split.
+ * @param location The location of the region.
+ * @param length Size of region in bytes
+ */
+ public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
+ final String location, long length) {
+ this(tableName, null, startRow, endRow, location, length);
+ }
+
+ /**
+ * Returns a Scan object from the stored string representation.
+ *
+ * @return Returns a Scan object based on the stored scanner.
+ * @throws IOException
+ */
+ public Scan getScan() throws IOException {
+ return TableMapReduceUtil.convertStringToScan(this.scan);
+ }
+
+ /**
+ * Returns the table name converted to a byte array.
+ * @see #getTable()
+ * @return The table name.
+ */
+ public byte [] getTableName() {
+ return tableName.getName();
+ }
+
+ /**
+ * Returns the table name.
+ *
+ * @return The table name.
+ */
+ public TableName getTable() {
+ // It is ugly that usually to get a TableName, the method is called getTableName. We can't do
+ // that in here though because there was an existing getTableName in place already since
+ // deprecated.
+ return tableName;
+ }
+
+ /**
+ * Returns the start row.
+ *
+ * @return The start row.
+ */
+ public byte [] getStartRow() {
+ return startRow;
+ }
+
+ /**
+ * Returns the end row.
+ *
+ * @return The end row.
+ */
+ public byte [] getEndRow() {
+ return endRow;
+ }
+
+ /**
+ * Returns the region location.
+ *
+ * @return The region's location.
+ */
+ public String getRegionLocation() {
+ return regionLocation;
+ }
+
+ /**
+ * Returns the region's location as an array.
+ *
+ * @return The array containing the region location.
+ * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
+ */
+ @Override
+ public String[] getLocations() {
+ return new String[] {regionLocation};
+ }
+
+ /**
+ * Returns the region's encoded name.
+ *
+ * @return The region's encoded name.
+ */
+ public String getEncodedRegionName() {
+ return encodedRegionName;
+ }
+
+ /**
+ * Returns the length of the split.
+ *
+ * @return The length of the split.
+ * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
+ */
+ @Override
+ public long getLength() {
+ return length;
+ }
+
+ /**
+ * Reads the values of each field.
+ *
+ * @param in The input to read from.
+ * @throws IOException When reading the input fails.
+ */
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ Version version = Version.UNVERSIONED;
+ // TableSplit was not versioned in the beginning.
+ // In order to introduce it now, we make use of the fact
+ // that tableName was written with Bytes.writeByteArray,
+ // which encodes the array length as a vint which is >= 0.
+ // Hence if the vint is >= 0 we have an old version and the vint
+ // encodes the length of tableName.
+ // If < 0 we just read the version and the next vint is the length.
+ // @see Bytes#readByteArray(DataInput)
+ int len = WritableUtils.readVInt(in);
+ if (len < 0) {
+ // what we just read was the version
+ version = Version.fromCode(len);
+ len = WritableUtils.readVInt(in);
+ }
+ byte[] tableNameBytes = new byte[len];
+ in.readFully(tableNameBytes);
+ tableName = TableName.valueOf(tableNameBytes);
+ startRow = Bytes.readByteArray(in);
+ endRow = Bytes.readByteArray(in);
+ regionLocation = Bytes.toString(Bytes.readByteArray(in));
+ if (version.atLeast(Version.INITIAL)) {
+ scan = Bytes.toString(Bytes.readByteArray(in));
+ }
+ length = WritableUtils.readVLong(in);
+ if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
+ encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
+ }
+ }
+
+ /**
+ * Writes the field values to the output.
+ *
+ * @param out The output to write to.
+ * @throws IOException When writing the values to the output fails.
+ */
+ @Override
+ public void write(DataOutput out) throws IOException {
+ WritableUtils.writeVInt(out, VERSION.code);
+ Bytes.writeByteArray(out, tableName.getName());
+ Bytes.writeByteArray(out, startRow);
+ Bytes.writeByteArray(out, endRow);
+ Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
+ Bytes.writeByteArray(out, Bytes.toBytes(scan));
+ WritableUtils.writeVLong(out, length);
+ Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
+ }
+
+ /**
+ * Returns the details about this instance as a string.
+ *
+ * @return The values of this instance as a string.
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("HBase table split(");
+ sb.append("table name: ").append(tableName);
+ // null scan input is represented by ""
+ String printScan = "";
+ if (!scan.equals("")) {
+ try {
+ // get the real scan here in toString, not the Base64 string
+ printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
+ }
+ catch (IOException e) {
+ printScan = "";
+ }
+ }
+ sb.append(", scan: ").append(printScan);
+ sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
+ sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
+ sb.append(", region location: ").append(regionLocation);
+ sb.append(", encoded region name: ").append(encodedRegionName);
+ sb.append(")");
+ return sb.toString();
+ }
+
+ /**
+ * Compares this split against the given one.
+ *
+ * @param split The split to compare to.
+ * @return The result of the comparison.
+ * @see java.lang.Comparable#compareTo(java.lang.Object)
+ */
+ @Override
+ public int compareTo(TableSplit split) {
+ // If The table name of the two splits is the same then compare start row
+ // otherwise compare based on table names
+ int tableNameComparison =
+ getTable().compareTo(split.getTable());
+ return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
+ getStartRow(), split.getStartRow());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null || !(o instanceof TableSplit)) {
+ return false;
+ }
+ return tableName.equals(((TableSplit)o).tableName) &&
+ Bytes.equals(startRow, ((TableSplit)o).startRow) &&
+ Bytes.equals(endRow, ((TableSplit)o).endRow) &&
+ regionLocation.equals(((TableSplit)o).regionLocation);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = tableName != null ? tableName.hashCode() : 0;
+ result = 31 * result + (scan != null ? scan.hashCode() : 0);
+ result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
+ result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
+ result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
+ result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
new file mode 100644
index 0000000..30cd461
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TextSortReducer.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Emits Sorted KeyValues. Parse the passed text and creates KeyValues. Sorts them before emit.
+ * @see HFileOutputFormat2
+ * @see KeyValueSortReducer
+ * @see PutSortReducer
+ */
+@InterfaceAudience.Public
+public class TextSortReducer extends
+ Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
+
+ /** Timestamp for all inserted rows */
+ private long ts;
+
+ /** Column seperator */
+ private String separator;
+
+ /** Should skip bad lines */
+ private boolean skipBadLines;
+
+ private Counter badLineCount;
+
+ private ImportTsv.TsvParser parser;
+
+ /** Cell visibility expr **/
+ private String cellVisibilityExpr;
+
+ /** Cell TTL */
+ private long ttl;
+
+ private CellCreator kvCreator;
+
+ public long getTs() {
+ return ts;
+ }
+
+ public boolean getSkipBadLines() {
+ return skipBadLines;
+ }
+
+ public Counter getBadLineCount() {
+ return badLineCount;
+ }
+
+ public void incrementBadLineCount(int count) {
+ this.badLineCount.increment(count);
+ }
+
+ /**
+ * Handles initializing this class with objects specific to it (i.e., the parser).
+ * Common initialization that might be leveraged by a subsclass is done in
+ * <code>doSetup</code>. Hence a subclass may choose to override this method
+ * and call <code>doSetup</code> as well before handling it's own custom params.
+ *
+ * @param context
+ */
+ @Override
+ protected void setup(Context context) {
+ Configuration conf = context.getConfiguration();
+ doSetup(context, conf);
+
+ parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
+ if (parser.getRowKeyColumnIndex() == -1) {
+ throw new RuntimeException("No row key column specified");
+ }
+ this.kvCreator = new CellCreator(conf);
+ }
+
+ /**
+ * Handles common parameter initialization that a subclass might want to leverage.
+ * @param context
+ * @param conf
+ */
+ protected void doSetup(Context context, Configuration conf) {
+ // If a custom separator has been used,
+ // decode it back from Base64 encoding.
+ separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+ if (separator == null) {
+ separator = ImportTsv.DEFAULT_SEPARATOR;
+ } else {
+ separator = new String(Base64.decode(separator));
+ }
+
+ // Should never get 0 as we are setting this to a valid value in job configuration.
+ ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
+
+ skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
+ badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+ }
+
+ @Override
+ protected void reduce(
+ ImmutableBytesWritable rowKey,
+ java.lang.Iterable<Text> lines,
+ Reducer<ImmutableBytesWritable, Text,
+ ImmutableBytesWritable, KeyValue>.Context context)
+ throws java.io.IOException, InterruptedException
+ {
+ // although reduce() is called per-row, handle pathological case
+ long threshold = context.getConfiguration().getLong(
+ "reducer.row.threshold", 1L * (1<<30));
+ Iterator<Text> iter = lines.iterator();
+ while (iter.hasNext()) {
+ Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
+ long curSize = 0;
+ // stop at the end or the RAM threshold
+ while (iter.hasNext() && curSize < threshold) {
+ Text line = iter.next();
+ byte[] lineBytes = line.getBytes();
+ try {
+ ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
+ // Retrieve timestamp if exists
+ ts = parsed.getTimestamp(ts);
+ cellVisibilityExpr = parsed.getCellVisibility();
+ ttl = parsed.getCellTTL();
+
+ // create tags for the parsed line
+ List<Tag> tags = new ArrayList<>();
+ if (cellVisibilityExpr != null) {
+ tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
+ cellVisibilityExpr));
+ }
+ // Add TTL directly to the KV so we can vary them when packing more than one KV
+ // into puts
+ if (ttl > 0) {
+ tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
+ }
+ for (int i = 0; i < parsed.getColumnCount(); i++) {
+ if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
+ || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
+ || i == parser.getCellTTLColumnIndex()) {
+ continue;
+ }
+ // Creating the KV which needs to be directly written to HFiles. Using the Facade
+ // KVCreator for creation of kvs.
+ Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(),
+ parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length,
+ parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes,
+ parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
+ KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+ kvs.add(kv);
+ curSize += kv.heapSize();
+ }
+ } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
+ | InvalidLabelException badLine) {
+ if (skipBadLines) {
+ System.err.println("Bad line." + badLine.getMessage());
+ incrementBadLineCount(1);
+ continue;
+ }
+ throw new IOException(badLine);
+ }
+ }
+ context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass()
+ + "(" + StringUtils.humanReadableInt(curSize) + ")");
+ int index = 0;
+ for (KeyValue kv : kvs) {
+ context.write(rowKey, kv);
+ if (++index > 0 && index % 100 == 0)
+ context.setStatus("Wrote " + index + " key values.");
+ }
+
+ // if we have more entries to process
+ if (iter.hasNext()) {
+ // force flush because we cannot guarantee intra-row sorted order
+ context.write(null, null);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
new file mode 100644
index 0000000..3c507b3
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterMapper.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
+import org.apache.hadoop.hbase.security.visibility.CellVisibility;
+import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Mapper;
+
+/**
+ * Write table content out to files in hdfs.
+ */
+@InterfaceAudience.Public
+public class TsvImporterMapper
+extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>
+{
+
+ /** Timestamp for all inserted rows */
+ protected long ts;
+
+ /** Column seperator */
+ private String separator;
+
+ /** Should skip bad lines */
+ private boolean skipBadLines;
+ /** Should skip empty columns*/
+ private boolean skipEmptyColumns;
+ private Counter badLineCount;
+ private boolean logBadLines;
+
+ protected ImportTsv.TsvParser parser;
+
+ protected Configuration conf;
+
+ protected String cellVisibilityExpr;
+
+ protected long ttl;
+
+ protected CellCreator kvCreator;
+
+ private String hfileOutPath;
+
+ /** List of cell tags */
+ private List<Tag> tags;
+
+ public long getTs() {
+ return ts;
+ }
+
+ public boolean getSkipBadLines() {
+ return skipBadLines;
+ }
+
+ public Counter getBadLineCount() {
+ return badLineCount;
+ }
+
+ public void incrementBadLineCount(int count) {
+ this.badLineCount.increment(count);
+ }
+
+ /**
+ * Handles initializing this class with objects specific to it (i.e., the parser).
+ * Common initialization that might be leveraged by a subsclass is done in
+ * <code>doSetup</code>. Hence a subclass may choose to override this method
+ * and call <code>doSetup</code> as well before handling it's own custom params.
+ *
+ * @param context
+ */
+ @Override
+ protected void setup(Context context) {
+ doSetup(context);
+
+ conf = context.getConfiguration();
+ parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY),
+ separator);
+ if (parser.getRowKeyColumnIndex() == -1) {
+ throw new RuntimeException("No row key column specified");
+ }
+ this.kvCreator = new CellCreator(conf);
+ tags = new ArrayList<>();
+ }
+
+ /**
+ * Handles common parameter initialization that a subclass might want to leverage.
+ * @param context
+ */
+ protected void doSetup(Context context) {
+ Configuration conf = context.getConfiguration();
+
+ // If a custom separator has been used,
+ // decode it back from Base64 encoding.
+ separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+ if (separator == null) {
+ separator = ImportTsv.DEFAULT_SEPARATOR;
+ } else {
+ separator = new String(Base64.decode(separator));
+ }
+ // Should never get 0 as we are setting this to a valid value in job
+ // configuration.
+ ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0);
+
+ skipEmptyColumns = context.getConfiguration().getBoolean(
+ ImportTsv.SKIP_EMPTY_COLUMNS, false);
+ skipBadLines = context.getConfiguration().getBoolean(
+ ImportTsv.SKIP_LINES_CONF_KEY, true);
+ badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+ logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
+ hfileOutPath = conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY);
+ }
+
+ /**
+ * Convert a line of TSV text into an HBase table row.
+ */
+ @Override
+ public void map(LongWritable offset, Text value,
+ Context context)
+ throws IOException {
+ byte[] lineBytes = value.getBytes();
+
+ try {
+ ImportTsv.TsvParser.ParsedLine parsed = parser.parse(
+ lineBytes, value.getLength());
+ ImmutableBytesWritable rowKey =
+ new ImmutableBytesWritable(lineBytes,
+ parsed.getRowKeyOffset(),
+ parsed.getRowKeyLength());
+ // Retrieve timestamp if exists
+ ts = parsed.getTimestamp(ts);
+ cellVisibilityExpr = parsed.getCellVisibility();
+ ttl = parsed.getCellTTL();
+
+ // create tags for the parsed line
+ if (hfileOutPath != null) {
+ tags.clear();
+ if (cellVisibilityExpr != null) {
+ tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(
+ cellVisibilityExpr));
+ }
+ // Add TTL directly to the KV so we can vary them when packing more than one KV
+ // into puts
+ if (ttl > 0) {
+ tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
+ }
+ }
+ Put put = new Put(rowKey.copyBytes());
+ for (int i = 0; i < parsed.getColumnCount(); i++) {
+ if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()
+ || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
+ || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns
+ && parsed.getColumnLength(i) == 0)) {
+ continue;
+ }
+ populatePut(lineBytes, parsed, put, i);
+ }
+ context.write(rowKey, put);
+ } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException
+ | InvalidLabelException badLine) {
+ if (logBadLines) {
+ System.err.println(value);
+ }
+ System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
+ if (skipBadLines) {
+ incrementBadLineCount(1);
+ return;
+ }
+ throw new IOException(badLine);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put,
+ int i) throws BadTsvLineException, IOException {
+ Cell cell = null;
+ if (hfileOutPath == null) {
+ cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+ parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+ parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes,
+ parsed.getColumnOffset(i), parsed.getColumnLength(i));
+ if (cellVisibilityExpr != null) {
+ // We won't be validating the expression here. The Visibility CP will do
+ // the validation
+ put.setCellVisibility(new CellVisibility(cellVisibilityExpr));
+ }
+ if (ttl > 0) {
+ put.setTTL(ttl);
+ }
+ } else {
+ // Creating the KV which needs to be directly written to HFiles. Using the Facade
+ // KVCreator for creation of kvs.
+ cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(),
+ parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
+ parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i),
+ parsed.getColumnLength(i), tags);
+ }
+ put.add(cell);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
new file mode 100644
index 0000000..a3b095c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TsvImporterTextMapper.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+import java.io.IOException;
+
+/**
+ * Write table content out to map output files.
+ */
+@InterfaceAudience.Public
+public class TsvImporterTextMapper
+extends Mapper<LongWritable, Text, ImmutableBytesWritable, Text>
+{
+
+ /** Column seperator */
+ private String separator;
+
+ /** Should skip bad lines */
+ private boolean skipBadLines;
+ private Counter badLineCount;
+ private boolean logBadLines;
+
+ private ImportTsv.TsvParser parser;
+
+ public boolean getSkipBadLines() {
+ return skipBadLines;
+ }
+
+ public Counter getBadLineCount() {
+ return badLineCount;
+ }
+
+ public void incrementBadLineCount(int count) {
+ this.badLineCount.increment(count);
+ }
+
+ /**
+ * Handles initializing this class with objects specific to it (i.e., the parser).
+ * Common initialization that might be leveraged by a subsclass is done in
+ * <code>doSetup</code>. Hence a subclass may choose to override this method
+ * and call <code>doSetup</code> as well before handling it's own custom params.
+ *
+ * @param context
+ */
+ @Override
+ protected void setup(Context context) {
+ doSetup(context);
+
+ Configuration conf = context.getConfiguration();
+
+ parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
+ if (parser.getRowKeyColumnIndex() == -1) {
+ throw new RuntimeException("No row key column specified");
+ }
+ }
+
+ /**
+ * Handles common parameter initialization that a subclass might want to leverage.
+ * @param context
+ */
+ protected void doSetup(Context context) {
+ Configuration conf = context.getConfiguration();
+
+ // If a custom separator has been used,
+ // decode it back from Base64 encoding.
+ separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY);
+ if (separator == null) {
+ separator = ImportTsv.DEFAULT_SEPARATOR;
+ } else {
+ separator = new String(Base64.decode(separator));
+ }
+
+ skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true);
+ logBadLines = context.getConfiguration().getBoolean(ImportTsv.LOG_BAD_LINES_CONF_KEY, false);
+ badLineCount = context.getCounter("ImportTsv", "Bad Lines");
+ }
+
+ /**
+ * Convert a line of TSV text into an HBase table row.
+ */
+ @Override
+ public void map(LongWritable offset, Text value, Context context) throws IOException {
+ try {
+ Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
+ ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
+ value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
+ context.write(rowKey, value);
+ } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
+ if (logBadLines) {
+ System.err.println(value);
+ }
+ System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
+ if (skipBadLines) {
+ incrementBadLineCount(1);
+ return;
+ }
+ throw new IOException(badLine);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ Thread.currentThread().interrupt();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
new file mode 100644
index 0000000..a83a88f
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/VisibilityExpressionResolver.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hbase.Tag;
+
+/**
+ * Interface to convert visibility expressions into Tags for storing along with Cells in HFiles.
+ */
+@InterfaceAudience.Public
+public interface VisibilityExpressionResolver extends Configurable {
+
+ /**
+ * Giving a chance for the initialization.
+ */
+ void init();
+
+ /**
+ * Convert visibility expression into tags to be serialized.
+ * @param visExpression the label expression
+ * @return The list of tags corresponds to the visibility expression. These tags will be stored
+ * along with the Cells.
+ */
+ List<Tag> createVisibilityExpTags(String visExpression) throws IOException;
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
new file mode 100644
index 0000000..8b4e967
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
@@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.EOFException;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WAL.Entry;
+import org.apache.hadoop.hbase.wal.WAL.Reader;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Simple {@link InputFormat} for {@link org.apache.hadoop.hbase.wal.WAL} files.
+ */
+@InterfaceAudience.Public
+public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
+ private static final Log LOG = LogFactory.getLog(WALInputFormat.class);
+
+ public static final String START_TIME_KEY = "wal.start.time";
+ public static final String END_TIME_KEY = "wal.end.time";
+
+ /**
+ * {@link InputSplit} for {@link WAL} files. Each split represent
+ * exactly one log file.
+ */
+ static class WALSplit extends InputSplit implements Writable {
+ private String logFileName;
+ private long fileSize;
+ private long startTime;
+ private long endTime;
+
+ /** for serialization */
+ public WALSplit() {}
+
+ /**
+ * Represent an WALSplit, i.e. a single WAL file.
+ * Start- and EndTime are managed by the split, so that WAL files can be
+ * filtered before WALEdits are passed to the mapper(s).
+ * @param logFileName
+ * @param fileSize
+ * @param startTime
+ * @param endTime
+ */
+ public WALSplit(String logFileName, long fileSize, long startTime, long endTime) {
+ this.logFileName = logFileName;
+ this.fileSize = fileSize;
+ this.startTime = startTime;
+ this.endTime = endTime;
+ }
+
+ @Override
+ public long getLength() throws IOException, InterruptedException {
+ return fileSize;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException, InterruptedException {
+ // TODO: Find the data node with the most blocks for this WAL?
+ return new String[] {};
+ }
+
+ public String getLogFileName() {
+ return logFileName;
+ }
+
+ public long getStartTime() {
+ return startTime;
+ }
+
+ public long getEndTime() {
+ return endTime;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ logFileName = in.readUTF();
+ fileSize = in.readLong();
+ startTime = in.readLong();
+ endTime = in.readLong();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeUTF(logFileName);
+ out.writeLong(fileSize);
+ out.writeLong(startTime);
+ out.writeLong(endTime);
+ }
+
+ @Override
+ public String toString() {
+ return logFileName + " (" + startTime + ":" + endTime + ") length:" + fileSize;
+ }
+ }
+
+ /**
+ * {@link RecordReader} for an {@link WAL} file.
+ * Implementation shared with deprecated HLogInputFormat.
+ */
+ static abstract class WALRecordReader<K extends WALKey> extends RecordReader<K, WALEdit> {
+ private Reader reader = null;
+ // visible until we can remove the deprecated HLogInputFormat
+ Entry currentEntry = new Entry();
+ private long startTime;
+ private long endTime;
+ private Configuration conf;
+ private Path logFile;
+ private long currentPos;
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ WALSplit hsplit = (WALSplit)split;
+ logFile = new Path(hsplit.getLogFileName());
+ conf = context.getConfiguration();
+ LOG.info("Opening reader for "+split);
+ openReader(logFile);
+ this.startTime = hsplit.getStartTime();
+ this.endTime = hsplit.getEndTime();
+ }
+
+ private void openReader(Path path) throws IOException
+ {
+ closeReader();
+ reader = AbstractFSWALProvider.openReader(path, conf);
+ seek();
+ setCurrentPath(path);
+ }
+
+ private void setCurrentPath(Path path) {
+ this.logFile = path;
+ }
+
+ private void closeReader() throws IOException {
+ if (reader != null) {
+ reader.close();
+ reader = null;
+ }
+ }
+
+ private void seek() throws IOException {
+ if (currentPos != 0) {
+ reader.seek(currentPos);
+ }
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (reader == null) return false;
+ this.currentPos = reader.getPosition();
+ Entry temp;
+ long i = -1;
+ try {
+ do {
+ // skip older entries
+ try {
+ temp = reader.next(currentEntry);
+ i++;
+ } catch (EOFException x) {
+ LOG.warn("Corrupted entry detected. Ignoring the rest of the file."
+ + " (This is normal when a RegionServer crashed.)");
+ return false;
+ }
+ } while (temp != null && temp.getKey().getWriteTime() < startTime);
+
+ if (temp == null) {
+ if (i > 0) LOG.info("Skipped " + i + " entries.");
+ LOG.info("Reached end of file.");
+ return false;
+ } else if (i > 0) {
+ LOG.info("Skipped " + i + " entries, until ts: " + temp.getKey().getWriteTime() + ".");
+ }
+ boolean res = temp.getKey().getWriteTime() <= endTime;
+ if (!res) {
+ LOG.info("Reached ts: " + temp.getKey().getWriteTime()
+ + " ignoring the rest of the file.");
+ }
+ return res;
+ } catch (IOException e) {
+ Path archivedLog = AbstractFSWALProvider.getArchivedLogPath(logFile, conf);
+ if (logFile != archivedLog) {
+ openReader(archivedLog);
+ // Try call again in recursion
+ return nextKeyValue();
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ @Override
+ public WALEdit getCurrentValue() throws IOException, InterruptedException {
+ return currentEntry.getEdit();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ // N/A depends on total number of entries, which is unknown
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+ LOG.info("Closing reader");
+ if (reader != null) this.reader.close();
+ }
+ }
+
+ /**
+ * handler for non-deprecated WALKey version. fold into WALRecordReader once we no longer
+ * need to support HLogInputFormat.
+ */
+ static class WALKeyRecordReader extends WALRecordReader<WALKey> {
+ @Override
+ public WALKey getCurrentKey() throws IOException, InterruptedException {
+ return currentEntry.getKey();
+ }
+ }
+
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException,
+ InterruptedException {
+ return getSplits(context, START_TIME_KEY, END_TIME_KEY);
+ }
+
+ /**
+ * implementation shared with deprecated HLogInputFormat
+ */
+ List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
+ throws IOException, InterruptedException {
+ Configuration conf = context.getConfiguration();
+ boolean ignoreMissing = conf.getBoolean(WALPlayer.IGNORE_MISSING_FILES, false);
+ Path[] inputPaths = getInputPaths(conf);
+ long startTime = conf.getLong(startKey, Long.MIN_VALUE);
+ long endTime = conf.getLong(endKey, Long.MAX_VALUE);
+
+ List<FileStatus> allFiles = new ArrayList<FileStatus>();
+ for(Path inputPath: inputPaths){
+ FileSystem fs = inputPath.getFileSystem(conf);
+ try {
+ List<FileStatus> files = getFiles(fs, inputPath, startTime, endTime);
+ allFiles.addAll(files);
+ } catch (FileNotFoundException e) {
+ if (ignoreMissing) {
+ LOG.warn("File "+ inputPath +" is missing. Skipping it.");
+ continue;
+ }
+ throw e;
+ }
+ }
+ List<InputSplit> splits = new ArrayList<InputSplit>(allFiles.size());
+ for (FileStatus file : allFiles) {
+ splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
+ }
+ return splits;
+ }
+
+ private Path[] getInputPaths(Configuration conf) {
+ String inpDirs = conf.get(FileInputFormat.INPUT_DIR);
+ return StringUtils.stringToPath(
+ inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ",")));
+ }
+
+ private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
+ throws IOException {
+ List<FileStatus> result = new ArrayList<>();
+ LOG.debug("Scanning " + dir.toString() + " for WAL files");
+
+ RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
+ if (!iter.hasNext()) return Collections.emptyList();
+ while (iter.hasNext()) {
+ LocatedFileStatus file = iter.next();
+ if (file.isDirectory()) {
+ // recurse into sub directories
+ result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
+ } else {
+ String name = file.getPath().toString();
+ int idx = name.lastIndexOf('.');
+ if (idx > 0) {
+ try {
+ long fileStartTime = Long.parseLong(name.substring(idx+1));
+ if (fileStartTime <= endTime) {
+ LOG.info("Found: " + file);
+ result.add(file);
+ }
+ } catch (NumberFormatException x) {
+ idx = 0;
+ }
+ }
+ if (idx == 0) {
+ LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
+ }
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public RecordReader<WALKey, WALEdit> createRecordReader(InputSplit split,
+ TaskAttemptContext context) throws IOException, InterruptedException {
+ return new WALKeyRecordReader();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
new file mode 100644
index 0000000..b1e655c
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
@@ -0,0 +1,384 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A tool to replay WAL files as a M/R job.
+ * The WAL can be replayed for a set of tables or all tables,
+ * and a time range can be provided (in milliseconds).
+ * The WAL is filtered to the passed set of tables and the output
+ * can optionally be mapped to another set of tables.
+ *
+ * WAL replay can also generate HFiles for later bulk importing,
+ * in that case the WAL is replayed for a single table only.
+ */
+@InterfaceAudience.Public
+public class WALPlayer extends Configured implements Tool {
+ private static final Log LOG = LogFactory.getLog(WALPlayer.class);
+ final static String NAME = "WALPlayer";
+ public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
+ public final static String TABLES_KEY = "wal.input.tables";
+ public final static String TABLE_MAP_KEY = "wal.input.tablesmap";
+ public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator";
+ public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files";
+
+
+ // This relies on Hadoop Configuration to handle warning about deprecated configs and
+ // to set the correct non-deprecated configs when an old one shows up.
+ static {
+ Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
+ Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
+ Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
+ }
+
+ private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
+
+ public WALPlayer(){
+ }
+
+ protected WALPlayer(final Configuration c) {
+ super(c);
+ }
+
+ /**
+ * A mapper that just writes out KeyValues.
+ * This one can be used together with {@link KeyValueSortReducer}
+ */
+ static class WALKeyValueMapper
+ extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
+ private byte[] table;
+
+ @Override
+ public void map(WALKey key, WALEdit value,
+ Context context)
+ throws IOException {
+ try {
+ // skip all other tables
+ if (Bytes.equals(table, key.getTablename().getName())) {
+ for (Cell cell : value.getCells()) {
+ KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+ if (WALEdit.isMetaEditFamily(kv)) {
+ continue;
+ }
+ context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv);
+ }
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void setup(Context context) throws IOException {
+ // only a single table is supported when HFiles are generated with HFileOutputFormat
+ String[] tables = context.getConfiguration().getStrings(TABLES_KEY);
+ if (tables == null || tables.length != 1) {
+ // this can only happen when WALMapper is used directly by a class other than WALPlayer
+ throw new IOException("Exactly one table must be specified for bulk HFile case.");
+ }
+ table = Bytes.toBytes(tables[0]);
+
+ }
+
+ }
+
+ /**
+ * A mapper that writes out {@link Mutation} to be directly applied to
+ * a running HBase instance.
+ */
+ protected static class WALMapper
+ extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
+ private Map<TableName, TableName> tables = new TreeMap<>();
+
+ @Override
+ public void map(WALKey key, WALEdit value, Context context)
+ throws IOException {
+ try {
+ if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
+ TableName targetTable = tables.isEmpty() ?
+ key.getTablename() :
+ tables.get(key.getTablename());
+ ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
+ Put put = null;
+ Delete del = null;
+ Cell lastCell = null;
+ for (Cell cell : value.getCells()) {
+ // filtering WAL meta entries
+ if (WALEdit.isMetaEditFamily(cell)) {
+ continue;
+ }
+
+ // Allow a subclass filter out this cell.
+ if (filter(context, cell)) {
+ // A WALEdit may contain multiple operations (HBASE-3584) and/or
+ // multiple rows (HBASE-5229).
+ // Aggregate as much as possible into a single Put/Delete
+ // operation before writing to the context.
+ if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
+ || !CellUtil.matchingRow(lastCell, cell)) {
+ // row or type changed, write out aggregate KVs.
+ if (put != null) {
+ context.write(tableOut, put);
+ }
+ if (del != null) {
+ context.write(tableOut, del);
+ }
+ if (CellUtil.isDelete(cell)) {
+ del = new Delete(CellUtil.cloneRow(cell));
+ } else {
+ put = new Put(CellUtil.cloneRow(cell));
+ }
+ }
+ if (CellUtil.isDelete(cell)) {
+ del.add(cell);
+ } else {
+ put.add(cell);
+ }
+ }
+ lastCell = cell;
+ }
+ // write residual KVs
+ if (put != null) {
+ context.write(tableOut, put);
+ }
+ if (del != null) {
+ context.write(tableOut, del);
+ }
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ protected boolean filter(Context context, final Cell cell) {
+ return true;
+ }
+
+ @Override
+ protected void
+ cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
+
+ @Override
+ public void setup(Context context) throws IOException {
+ String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
+ String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
+ if (tableMap == null) {
+ tableMap = tablesToUse;
+ }
+ if (tablesToUse == null) {
+ // Then user wants all tables.
+ } else if (tablesToUse.length != tableMap.length) {
+ // this can only happen when WALMapper is used directly by a class other than WALPlayer
+ throw new IOException("Incorrect table mapping specified .");
+ }
+ int i = 0;
+ if (tablesToUse != null) {
+ for (String table : tablesToUse) {
+ tables.put(TableName.valueOf(table),
+ TableName.valueOf(tableMap[i++]));
+ }
+ }
+ }
+ }
+
+ void setupTime(Configuration conf, String option) throws IOException {
+ String val = conf.get(option);
+ if (null == val) {
+ return;
+ }
+ long ms;
+ try {
+ // first try to parse in user friendly form
+ ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
+ } catch (ParseException pe) {
+ try {
+ // then see if just a number of ms's was specified
+ ms = Long.parseLong(val);
+ } catch (NumberFormatException nfe) {
+ throw new IOException(option
+ + " must be specified either in the form 2001-02-20T16:35:06.99 "
+ + "or as number of milliseconds");
+ }
+ }
+ conf.setLong(option, ms);
+ }
+
+ /**
+ * Sets up the actual job.
+ *
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ */
+ public Job createSubmittableJob(String[] args) throws IOException {
+ Configuration conf = getConf();
+ setupTime(conf, WALInputFormat.START_TIME_KEY);
+ setupTime(conf, WALInputFormat.END_TIME_KEY);
+ String inputDirs = args[0];
+ String[] tables = args[1].split(",");
+ String[] tableMap;
+ if (args.length > 2) {
+ tableMap = args[2].split(",");
+ if (tableMap.length != tables.length) {
+ throw new IOException("The same number of tables and mapping must be provided.");
+ }
+ } else {
+ // if not mapping is specified map each table to itself
+ tableMap = tables;
+ }
+ conf.setStrings(TABLES_KEY, tables);
+ conf.setStrings(TABLE_MAP_KEY, tableMap);
+ conf.set(FileInputFormat.INPUT_DIR, inputDirs);
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
+ job.setJarByClass(WALPlayer.class);
+
+ job.setInputFormatClass(WALInputFormat.class);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+
+ String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
+ if (hfileOutPath != null) {
+ LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
+
+ // the bulk HFile case
+ if (tables.length != 1) {
+ throw new IOException("Exactly one table must be specified for the bulk export option");
+ }
+ TableName tableName = TableName.valueOf(tables[0]);
+ job.setMapperClass(WALKeyValueMapper.class);
+ job.setReducerClass(KeyValueSortReducer.class);
+ Path outputDir = new Path(hfileOutPath);
+ FileOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputValueClass(KeyValue.class);
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ Table table = conn.getTable(tableName);
+ RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
+ }
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions.class);
+ } else {
+ // output to live cluster
+ job.setMapperClass(WALMapper.class);
+ job.setOutputFormatClass(MultiTableOutputFormat.class);
+ TableMapReduceUtil.addDependencyJars(job);
+ TableMapReduceUtil.initCredentials(job);
+ // No reducers.
+ job.setNumReduceTasks(0);
+ }
+ String codecCls = WALCellCodec.getWALCellCodecClass(conf);
+ try {
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls));
+ } catch (Exception e) {
+ throw new IOException("Cannot determine wal codec class " + codecCls, e);
+ }
+ return job;
+ }
+
+
+ /**
+ * Print usage
+ * @param errorMsg Error message. Can be null.
+ */
+ private void usage(final String errorMsg) {
+ if (errorMsg != null && errorMsg.length() > 0) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+ System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
+ System.err.println("Read all WAL entries for <tables>.");
+ System.err.println("If no tables (\"\") are specific, all tables are imported.");
+ System.err.println("(Careful, even hbase:meta entries will be imported"+
+ " in that case.)");
+ System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
+ System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
+ System.err.println("<tableMapping> is a command separated list of targettables.");
+ System.err.println("If specified, each table in <tables> must have a mapping.\n");
+ System.err.println("By default " + NAME + " will load data directly into HBase.");
+ System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
+ System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
+ System.err.println(" (Only one table can be specified, and no mapping is allowed!)");
+ System.err.println("Other options: (specify time range to WAL edit to consider)");
+ System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
+ System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
+ System.err.println(" -D " + JOB_NAME_CONF_KEY
+ + "=jobName - use the specified mapreduce job name for the wal player");
+ System.err.println("For performance also consider the following options:\n"
+ + " -Dmapreduce.map.speculative=false\n"
+ + " -Dmapreduce.reduce.speculative=false");
+ }
+
+ /**
+ * Main entry point.
+ *
+ * @param args The command line parameters.
+ * @throws Exception When running the job fails.
+ */
+ public static void main(String[] args) throws Exception {
+ int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
+ System.exit(ret);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ usage("Wrong number of arguments: " + args.length);
+ System.exit(-1);
+ }
+ Job job = createSubmittableJob(args);
+ return job.waitForCompletion(true) ? 0 : 1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
new file mode 100644
index 0000000..b1f15ba
--- /dev/null
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
@@ -0,0 +1,26 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
+Input/OutputFormats, a table indexing MapReduce job, and utility methods.
+
+<p>See <a href="http://hbase.apache.org/book.html#mapreduce">HBase and MapReduce</a>
+in the HBase Reference Guide for mapreduce over hbase documentation.
+*/
+package org.apache.hadoop.hbase.mapreduce;
[05/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
deleted file mode 100644
index efcf91e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
+++ /dev/null
@@ -1,571 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.TableNotFoundException;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestImportTsv implements Configurable {
-
- private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
- protected static final String NAME = TestImportTsv.class.getSimpleName();
- protected static HBaseTestingUtility util = new HBaseTestingUtility();
-
- // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.
- protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
-
- /**
- * Force use of combiner in doMROnTableTest. Boolean. Default is true.
- */
- protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
-
- private final String FAMILY = "FAM";
- private TableName tn;
- private Map<String, String> args;
-
- @Rule
- public ExpectedException exception = ExpectedException.none();
-
- public Configuration getConf() {
- return util.getConfiguration();
- }
-
- public void setConf(Configuration conf) {
- throw new IllegalArgumentException("setConf not supported");
- }
-
- @BeforeClass
- public static void provisionCluster() throws Exception {
- util.startMiniCluster();
- }
-
- @AfterClass
- public static void releaseCluster() throws Exception {
- util.shutdownMiniCluster();
- }
-
- @Before
- public void setup() throws Exception {
- tn = TableName.valueOf("test-" + UUID.randomUUID());
- args = new HashMap<>();
- // Prepare the arguments required for the test.
- args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");
- args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");
- }
-
- @Test
- public void testMROnTable() throws Exception {
- util.createTable(tn, FAMILY);
- doMROnTableTest(null, 1);
- util.deleteTable(tn);
- }
-
- @Test
- public void testMROnTableWithTimestamp() throws Exception {
- util.createTable(tn, FAMILY);
- args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
- args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
- String data = "KEY,1234,VALUE1,VALUE2\n";
-
- doMROnTableTest(data, 1);
- util.deleteTable(tn);
- }
-
- @Test
- public void testMROnTableWithCustomMapper()
- throws Exception {
- util.createTable(tn, FAMILY);
- args.put(ImportTsv.MAPPER_CONF_KEY,
- "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");
-
- doMROnTableTest(null, 3);
- util.deleteTable(tn);
- }
-
- @Test
- public void testBulkOutputWithoutAnExistingTable() throws Exception {
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-
- doMROnTableTest(null, 3);
- util.deleteTable(tn);
- }
-
- @Test
- public void testBulkOutputWithAnExistingTable() throws Exception {
- util.createTable(tn, FAMILY);
-
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
-
- doMROnTableTest(null, 3);
- util.deleteTable(tn);
- }
-
- @Test
- public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {
- util.createTable(tn, FAMILY);
-
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
- args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");
- doMROnTableTest(null, 3);
- util.deleteTable(tn);
- }
-
- @Test
- public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
- String INPUT_FILE = "InputFile1.csv";
- // Prepare the arguments required for the test.
- String[] args =
- new String[] {
- "-D" + ImportTsv.MAPPER_CONF_KEY
- + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
- "-D" + ImportTsv.COLUMNS_CONF_KEY
- + "=HBASE_ROW_KEY,FAM:A,FAM:B",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
- tn.getNameAsString(),
- INPUT_FILE
- };
- assertEquals("running test job configuration failed.", 0, ToolRunner.run(
- new Configuration(util.getConfiguration()),
- new ImportTsv() {
- @Override
- public int run(String[] args) throws Exception {
- Job job = createSubmittableJob(getConf(), args);
- assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));
- assertTrue(job.getReducerClass().equals(TextSortReducer.class));
- assertTrue(job.getMapOutputValueClass().equals(Text.class));
- return 0;
- }
- }, args));
- // Delete table created by createSubmittableJob.
- util.deleteTable(tn);
- }
-
- @Test
- public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
- args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
- String data = "KEY\u001bVALUE4\u001bVALUE8\n";
- doMROnTableTest(data, 4);
- util.deleteTable(tn);
- }
-
- @Test
- public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
- String[] args = new String[] { tn.getNameAsString(), "/inputFile" };
-
- Configuration conf = new Configuration(util.getConfiguration());
- conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
- conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
- conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
- exception.expect(TableNotFoundException.class);
- assertEquals("running test job configuration failed.", 0,
- ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {
- @Override public int run(String[] args) throws Exception {
- createSubmittableJob(getConf(), args);
- return 0;
- }
- }, args));
- }
-
- @Test
- public void testMRWithoutAnExistingTable() throws Exception {
- String[] args =
- new String[] { tn.getNameAsString(), "/inputFile" };
-
- exception.expect(TableNotFoundException.class);
- assertEquals("running test job configuration failed.", 0, ToolRunner.run(
- new Configuration(util.getConfiguration()),
- new ImportTsv() {
- @Override
- public int run(String[] args) throws Exception {
- createSubmittableJob(getConf(), args);
- return 0;
- }
- }, args));
- }
-
- @Test
- public void testJobConfigurationsWithDryMode() throws Exception {
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
- String INPUT_FILE = "InputFile1.csv";
- // Prepare the arguments required for the test.
- String[] argsArray = new String[] {
- "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
- "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
- "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
- "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
- tn.getNameAsString(),
- INPUT_FILE };
- assertEquals("running test job configuration failed.", 0, ToolRunner.run(
- new Configuration(util.getConfiguration()),
- new ImportTsv() {
- @Override
- public int run(String[] args) throws Exception {
- Job job = createSubmittableJob(getConf(), args);
- assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
- return 0;
- }
- }, argsArray));
- // Delete table created by createSubmittableJob.
- util.deleteTable(tn);
- }
-
- @Test
- public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {
- util.createTable(tn, FAMILY);
- args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
- doMROnTableTest(null, 1);
- // Dry mode should not delete an existing table. If it's not present,
- // this will throw TableNotFoundException.
- util.deleteTable(tn);
- }
-
- /**
- * If table is not present in non-bulk mode, dry run should fail just like
- * normal mode.
- */
- @Test
- public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {
- args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
- exception.expect(TableNotFoundException.class);
- doMROnTableTest(null, 1);
- }
-
- @Test public void testDryModeWithBulkOutputAndTableExists() throws Exception {
- util.createTable(tn, FAMILY);
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
- args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
- doMROnTableTest(null, 1);
- // Dry mode should not delete an existing table. If it's not present,
- // this will throw TableNotFoundException.
- util.deleteTable(tn);
- }
-
- /**
- * If table is not present in bulk mode and create.table is not set to yes,
- * import should fail with TableNotFoundException.
- */
- @Test
- public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws
- Exception {
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
- args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
- args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
- exception.expect(TableNotFoundException.class);
- doMROnTableTest(null, 1);
- }
-
- @Test
- public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {
- // Prepare the arguments required for the test.
- Path hfiles = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
- args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
- args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");
- doMROnTableTest(null, 1);
- // Verify temporary table was deleted.
- exception.expect(TableNotFoundException.class);
- util.deleteTable(tn);
- }
-
- /**
- * If there are invalid data rows as inputs, then only those rows should be ignored.
- */
- @Test
- public void testTsvImporterTextMapperWithInvalidData() throws Exception {
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
- args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
- args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
- // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS
- String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";
- doMROnTableTest(util, tn, FAMILY, data, args, 1, 4);
- util.deleteTable(tn);
- }
-
- @Test
- public void testSkipEmptyColumns() throws Exception {
- Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()), "hfiles");
- args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
- args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
- args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
- args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");
- // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4
- String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";
- doMROnTableTest(util, tn, FAMILY, data, args, 1, 3);
- util.deleteTable(tn);
- }
-
- private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {
- return doMROnTableTest(util, tn, FAMILY, data, args, valueMultiplier,-1);
- }
-
- protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
- String family, String data, Map<String, String> args) throws Exception {
- return doMROnTableTest(util, table, family, data, args, 1,-1);
- }
-
- /**
- * Run an ImportTsv job and perform basic validation on the results.
- * Returns the ImportTsv <code>Tool</code> instance so that other tests can
- * inspect it for further validation as necessary. This method is static to
- * insure non-reliance on instance's util/conf facilities.
- * @param args Any arguments to pass BEFORE inputFile path is appended.
- * @return The Tool instance used to run the test.
- */
- protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
- String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
- throws Exception {
- Configuration conf = new Configuration(util.getConfiguration());
-
- // populate input file
- FileSystem fs = FileSystem.get(conf);
- Path inputPath = fs.makeQualified(
- new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
- FSDataOutputStream op = fs.create(inputPath, true);
- if (data == null) {
- data = "KEY\u001bVALUE1\u001bVALUE2\n";
- }
- op.write(Bytes.toBytes(data));
- op.close();
- LOG.debug(String.format("Wrote test data to file: %s", inputPath));
-
- if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
- LOG.debug("Forcing combiner.");
- conf.setInt("mapreduce.map.combine.minspills", 1);
- }
-
- // Build args array.
- String[] argsArray = new String[args.size() + 2];
- Iterator it = args.entrySet().iterator();
- int i = 0;
- while (it.hasNext()) {
- Map.Entry pair = (Map.Entry) it.next();
- argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
- i++;
- }
- argsArray[i] = table.getNameAsString();
- argsArray[i + 1] = inputPath.toString();
-
- // run the import
- Tool tool = new ImportTsv();
- LOG.debug("Running ImportTsv with arguments: " + argsArray);
- assertEquals(0, ToolRunner.run(conf, tool, argsArray));
-
- // Perform basic validation. If the input args did not include
- // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
- // Otherwise, validate presence of hfiles.
- boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
- "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
- if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
- if (isDryRun) {
- assertFalse(String.format("Dry run mode, %s should not have been created.",
- ImportTsv.BULK_OUTPUT_CONF_KEY),
- fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
- } else {
- validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
- }
- } else {
- validateTable(conf, table, family, valueMultiplier, isDryRun);
- }
-
- if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
- LOG.debug("Deleting test subdirectory");
- util.cleanupDataTestDirOnTestFS(table.getNameAsString());
- }
- return tool;
- }
-
- /**
- * Confirm ImportTsv via data in online table.
- */
- private static void validateTable(Configuration conf, TableName tableName,
- String family, int valueMultiplier, boolean isDryRun) throws IOException {
-
- LOG.debug("Validating table.");
- Connection connection = ConnectionFactory.createConnection(conf);
- Table table = connection.getTable(tableName);
- boolean verified = false;
- long pause = conf.getLong("hbase.client.pause", 5 * 1000);
- int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
- for (int i = 0; i < numRetries; i++) {
- try {
- Scan scan = new Scan();
- // Scan entire family.
- scan.addFamily(Bytes.toBytes(family));
- ResultScanner resScanner = table.getScanner(scan);
- int numRows = 0;
- for (Result res : resScanner) {
- numRows++;
- assertEquals(2, res.size());
- List<Cell> kvs = res.listCells();
- assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
- assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
- assertTrue(CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
- // Only one result set is expected, so let it loop.
- }
- if (isDryRun) {
- assertEquals(0, numRows);
- } else {
- assertEquals(1, numRows);
- }
- verified = true;
- break;
- } catch (NullPointerException e) {
- // If here, a cell was empty. Presume its because updates came in
- // after the scanner had been opened. Wait a while and retry.
- }
- try {
- Thread.sleep(pause);
- } catch (InterruptedException e) {
- // continue
- }
- }
- table.close();
- connection.close();
- assertTrue(verified);
- }
-
- /**
- * Confirm ImportTsv via HFiles on fs.
- */
- private static void validateHFiles(FileSystem fs, String outputPath, String family,
- int expectedKVCount) throws IOException {
- // validate number and content of output columns
- LOG.debug("Validating HFiles.");
- Set<String> configFamilies = new HashSet<>();
- configFamilies.add(family);
- Set<String> foundFamilies = new HashSet<>();
- int actualKVCount = 0;
- for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
- String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
- String cf = elements[elements.length - 1];
- foundFamilies.add(cf);
- assertTrue(
- String.format(
- "HFile output contains a column family (%s) not present in input families (%s)",
- cf, configFamilies),
- configFamilies.contains(cf));
- for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
- assertTrue(
- String.format("HFile %s appears to contain no data.", hfile.getPath()),
- hfile.getLen() > 0);
- // count the number of KVs from all the hfiles
- if (expectedKVCount > -1) {
- actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
- }
- }
- }
- assertTrue(String.format("HFile output does not contain the input family '%s'.", family),
- foundFamilies.contains(family));
- if (expectedKVCount > -1) {
- assertTrue(String.format(
- "KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
- expectedKVCount), actualKVCount == expectedKVCount);
- }
- }
-
- /**
- * Method returns the total KVs in given hfile
- * @param fs File System
- * @param p HFile path
- * @return KV count in the given hfile
- * @throws IOException
- */
- private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
- Configuration conf = util.getConfiguration();
- HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
- reader.loadFileInfo();
- HFileScanner scanner = reader.getScanner(false, false);
- scanner.seekTo();
- int count = 0;
- do {
- count++;
- } while (scanner.next());
- reader.close();
- return count;
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
deleted file mode 100644
index 3c38102..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsvParser.java
+++ /dev/null
@@ -1,314 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.util.ArrayList;
-
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Iterables;
-
-/**
- * Tests for {@link TsvParser}.
- */
-@Category({MapReduceTests.class, SmallTests.class})
-public class TestImportTsvParser {
-
- private void assertBytesEquals(byte[] a, byte[] b) {
- assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
- }
-
- private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
- ArrayList<String> parsedCols = new ArrayList<>();
- for (int i = 0; i < parsed.getColumnCount(); i++) {
- parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i),
- parsed.getColumnLength(i)));
- }
- if (!Iterables.elementsEqual(parsedCols, expected)) {
- fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:"
- + Joiner.on(",").join(parsedCols));
- }
- }
-
- @Test
- public void testTsvParserSpecParsing() {
- TsvParser parser;
-
- parser = new TsvParser("HBASE_ROW_KEY", "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertFalse(parser.hasTimestamp());
-
- parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertFalse(parser.hasTimestamp());
-
- parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
- assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertFalse(parser.hasTimestamp());
-
- parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
- assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertTrue(parser.hasTimestamp());
- assertEquals(2, parser.getTimestampKeyColumnIndex());
-
- parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY",
- "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
- assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertTrue(parser.hasTimestamp());
- assertEquals(2, parser.getTimestampKeyColumnIndex());
- assertEquals(4, parser.getAttributesKeyColumnIndex());
-
- parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY",
- "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
- assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
- assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
- assertEquals(4, parser.getRowKeyColumnIndex());
- assertTrue(parser.hasTimestamp());
- assertEquals(2, parser.getTimestampKeyColumnIndex());
- assertEquals(0, parser.getAttributesKeyColumnIndex());
- }
-
- @Test
- public void testTsvParser() throws BadTsvLineException {
- TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
- assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
- assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
- assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
- assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
- assertNull(parser.getFamily(2));
- assertNull(parser.getQualifier(2));
- assertEquals(2, parser.getRowKeyColumnIndex());
-
- assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex());
-
- byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
- ParsedLine parsed = parser.parse(line, line.length);
- checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
- }
-
- @Test
- public void testTsvParserWithTimestamp() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
- assertNull(parser.getFamily(0));
- assertNull(parser.getQualifier(0));
- assertNull(parser.getFamily(1));
- assertNull(parser.getQualifier(1));
- assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
- assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertEquals(1, parser.getTimestampKeyColumnIndex());
-
- byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
- ParsedLine parsed = parser.parse(line, line.length);
- assertEquals(1234l, parsed.getTimestamp(-1));
- checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
- }
-
- /**
- * Test cases that throw BadTsvLineException
- */
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
- byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
- parser.parse(line, line.length);
- }
-
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
- byte[] line = Bytes.toBytes("");
- parser.parse(line, line.length);
- }
-
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
- byte[] line = Bytes.toBytes("key_only");
- parser.parse(line, line.length);
- }
-
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
- TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
- byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
- parser.parse(line, line.length);
- }
-
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
- assertEquals(1, parser.getTimestampKeyColumnIndex());
- byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
- ParsedLine parsed = parser.parse(line, line.length);
- assertEquals(-1, parsed.getTimestamp(-1));
- checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
- }
-
- @Test(expected = BadTsvLineException.class)
- public void testTsvParserNoTimestampValue() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
- assertEquals(2, parser.getTimestampKeyColumnIndex());
- byte[] line = Bytes.toBytes("rowkey\tval_a");
- parser.parse(line, line.length);
- }
-
- @Test
- public void testTsvParserParseRowKey() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
- assertEquals(0, parser.getRowKeyColumnIndex());
- byte[] line = Bytes.toBytes("rowkey\tval_a\t1234");
- Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length);
- assertEquals(0, rowKeyOffsets.getFirst().intValue());
- assertEquals(6, rowKeyOffsets.getSecond().intValue());
- try {
- line = Bytes.toBytes("\t\tval_a\t1234");
- parser.parseRowKey(line, line.length);
- fail("Should get BadTsvLineException on empty rowkey.");
- } catch (BadTsvLineException b) {
-
- }
- parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
- assertEquals(1, parser.getRowKeyColumnIndex());
- line = Bytes.toBytes("val_a\trowkey\t1234");
- rowKeyOffsets = parser.parseRowKey(line, line.length);
- assertEquals(6, rowKeyOffsets.getFirst().intValue());
- assertEquals(6, rowKeyOffsets.getSecond().intValue());
- try {
- line = Bytes.toBytes("val_a");
- rowKeyOffsets = parser.parseRowKey(line, line.length);
- fail("Should get BadTsvLineException when number of columns less than rowkey position.");
- } catch (BadTsvLineException b) {
-
- }
- parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
- assertEquals(2, parser.getRowKeyColumnIndex());
- line = Bytes.toBytes("val_a\t1234\trowkey");
- rowKeyOffsets = parser.parseRowKey(line, line.length);
- assertEquals(11, rowKeyOffsets.getFirst().intValue());
- assertEquals(6, rowKeyOffsets.getSecond().intValue());
- }
-
- @Test
- public void testTsvParseAttributesKey() throws BadTsvLineException {
- TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t");
- assertEquals(0, parser.getRowKeyColumnIndex());
- byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value");
- ParsedLine parse = parser.parse(line, line.length);
- assertEquals(18, parse.getAttributeKeyOffset());
- assertEquals(3, parser.getAttributesKeyColumnIndex());
- String attributes[] = parse.getIndividualAttributes();
- assertEquals(attributes[0], "key=>value");
- try {
- line = Bytes.toBytes("rowkey\tval_a\t1234");
- parser.parse(line, line.length);
- fail("Should get BadTsvLineException on empty rowkey.");
- } catch (BadTsvLineException b) {
-
- }
- parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
- assertEquals(2, parser.getRowKeyColumnIndex());
- line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234");
- parse = parser.parse(line, line.length);
- assertEquals(0, parse.getAttributeKeyOffset());
- assertEquals(0, parser.getAttributesKeyColumnIndex());
- attributes = parse.getIndividualAttributes();
- assertEquals(attributes[0], "key=>value");
- try {
- line = Bytes.toBytes("val_a");
- ParsedLine parse2 = parser.parse(line, line.length);
- fail("Should get BadTsvLineException when number of columns less than rowkey position.");
- } catch (BadTsvLineException b) {
-
- }
- parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
- assertEquals(3, parser.getRowKeyColumnIndex());
- line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey");
- parse = parser.parse(line, line.length);
- assertEquals(1, parser.getAttributesKeyColumnIndex());
- assertEquals(6, parse.getAttributeKeyOffset());
- String[] attr = parse.getIndividualAttributes();
- int i = 0;
- for(String str : attr) {
- assertEquals(("key"+i+"=>"+"value"+i), str );
- i++;
- }
- }
-
- @Test
- public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException {
- TsvParser parser = new TsvParser(
- "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t");
- assertEquals(0, parser.getRowKeyColumnIndex());
- assertEquals(4, parser.getCellVisibilityColumnIndex());
- byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET");
- ParsedLine parse = parser.parse(line, line.length);
- assertEquals(18, parse.getAttributeKeyOffset());
- assertEquals(3, parser.getAttributesKeyColumnIndex());
- String attributes[] = parse.getIndividualAttributes();
- assertEquals(attributes[0], "key=>value");
- assertEquals(29, parse.getCellVisibilityColumnOffset());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
deleted file mode 100644
index 8187b73..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestJarFinder.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.Writer;
-import java.text.MessageFormat;
-import java.util.Properties;
-import java.util.jar.JarInputStream;
-import java.util.jar.JarOutputStream;
-import java.util.jar.Manifest;
-
-/**
- * This file was forked from hadoop/common/branches/branch-2@1350012.
- */
-@Category(SmallTests.class)
-public class TestJarFinder {
-
- @Test
- public void testJar() throws Exception {
-
- //picking a class that is for sure in a JAR in the classpath
- String jar = JarFinder.getJar(LogFactory.class);
- Assert.assertTrue(new File(jar).exists());
- }
-
- private static void delete(File file) throws IOException {
- if (file.getAbsolutePath().length() < 5) {
- throw new IllegalArgumentException(
- MessageFormat.format("Path [{0}] is too short, not deleting",
- file.getAbsolutePath()));
- }
- if (file.exists()) {
- if (file.isDirectory()) {
- File[] children = file.listFiles();
- if (children != null) {
- for (File child : children) {
- delete(child);
- }
- }
- }
- if (!file.delete()) {
- throw new RuntimeException(
- MessageFormat.format("Could not delete path [{0}]",
- file.getAbsolutePath()));
- }
- }
- }
-
- @Test
- public void testExpandedClasspath() throws Exception {
- //picking a class that is for sure in a directory in the classpath
- //in this case the JAR is created on the fly
- String jar = JarFinder.getJar(TestJarFinder.class);
- Assert.assertTrue(new File(jar).exists());
- }
-
- @Test
- public void testExistingManifest() throws Exception {
- File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
- TestJarFinder.class.getName() + "-testExistingManifest");
- delete(dir);
- dir.mkdirs();
-
- File metaInfDir = new File(dir, "META-INF");
- metaInfDir.mkdirs();
- File manifestFile = new File(metaInfDir, "MANIFEST.MF");
- Manifest manifest = new Manifest();
- OutputStream os = new FileOutputStream(manifestFile);
- manifest.write(os);
- os.close();
-
- File propsFile = new File(dir, "props.properties");
- Writer writer = new FileWriter(propsFile);
- new Properties().store(writer, "");
- writer.close();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- JarOutputStream zos = new JarOutputStream(baos);
- JarFinder.jarDir(dir, "", zos);
- JarInputStream jis =
- new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
- Assert.assertNotNull(jis.getManifest());
- jis.close();
- }
-
- @Test
- public void testNoManifest() throws Exception {
- File dir = new File(System.getProperty("test.build.dir", "target/test-dir"),
- TestJarFinder.class.getName() + "-testNoManifest");
- delete(dir);
- dir.mkdirs();
- File propsFile = new File(dir, "props.properties");
- Writer writer = new FileWriter(propsFile);
- new Properties().store(writer, "");
- writer.close();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- JarOutputStream zos = new JarOutputStream(baos);
- JarFinder.jarDir(dir, "", zos);
- JarInputStream jis =
- new JarInputStream(new ByteArrayInputStream(baos.toByteArray()));
- Assert.assertNotNull(jis.getManifest());
- jis.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
index b6ad2c9..b5b7a0c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
@@ -59,7 +59,6 @@ import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
deleted file mode 100644
index 529a448..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ /dev/null
@@ -1,669 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Deque;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableExistsException;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.ClientServiceCallable;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
-import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.mockito.Mockito;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
-
-import org.apache.hadoop.hbase.shaded.com.google.protobuf.RpcController;
-import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
-
-/**
- * Test cases for the atomic load error handling of the bulk load functionality.
- */
-@Category({MapReduceTests.class, LargeTests.class})
-public class TestLoadIncrementalHFilesSplitRecovery {
- private static final Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
-
- static HBaseTestingUtility util;
- //used by secure subclass
- static boolean useSecure = false;
-
- final static int NUM_CFS = 10;
- final static byte[] QUAL = Bytes.toBytes("qual");
- final static int ROWCOUNT = 100;
-
- private final static byte[][] families = new byte[NUM_CFS][];
-
- @Rule
- public TestName name = new TestName();
-
- static {
- for (int i = 0; i < NUM_CFS; i++) {
- families[i] = Bytes.toBytes(family(i));
- }
- }
-
- static byte[] rowkey(int i) {
- return Bytes.toBytes(String.format("row_%08d", i));
- }
-
- static String family(int i) {
- return String.format("family_%04d", i);
- }
-
- static byte[] value(int i) {
- return Bytes.toBytes(String.format("%010d", i));
- }
-
- public static void buildHFiles(FileSystem fs, Path dir, int value)
- throws IOException {
- byte[] val = value(value);
- for (int i = 0; i < NUM_CFS; i++) {
- Path testIn = new Path(dir, family(i));
-
- TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
- Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
- }
- }
-
- /**
- * Creates a table with given table name and specified number of column
- * families if the table does not already exist.
- */
- private void setupTable(final Connection connection, TableName table, int cfs)
- throws IOException {
- try {
- LOG.info("Creating table " + table);
- HTableDescriptor htd = new HTableDescriptor(table);
- for (int i = 0; i < cfs; i++) {
- htd.addFamily(new HColumnDescriptor(family(i)));
- }
- try (Admin admin = connection.getAdmin()) {
- admin.createTable(htd);
- }
- } catch (TableExistsException tee) {
- LOG.info("Table " + table + " already exists");
- }
- }
-
- /**
- * Creates a table with given table name,specified number of column families<br>
- * and splitkeys if the table does not already exist.
- * @param table
- * @param cfs
- * @param SPLIT_KEYS
- */
- private void setupTableWithSplitkeys(TableName table, int cfs, byte[][] SPLIT_KEYS)
- throws IOException {
- try {
- LOG.info("Creating table " + table);
- HTableDescriptor htd = new HTableDescriptor(table);
- for (int i = 0; i < cfs; i++) {
- htd.addFamily(new HColumnDescriptor(family(i)));
- }
-
- util.createTable(htd, SPLIT_KEYS);
- } catch (TableExistsException tee) {
- LOG.info("Table " + table + " already exists");
- }
- }
-
- private Path buildBulkFiles(TableName table, int value) throws Exception {
- Path dir = util.getDataTestDirOnTestFS(table.getNameAsString());
- Path bulk1 = new Path(dir, table.getNameAsString() + value);
- FileSystem fs = util.getTestFileSystem();
- buildHFiles(fs, bulk1, value);
- return bulk1;
- }
-
- /**
- * Populate table with known values.
- */
- private void populateTable(final Connection connection, TableName table, int value)
- throws Exception {
- // create HFiles for different column families
- LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
- Path bulk1 = buildBulkFiles(table, value);
- try (Table t = connection.getTable(table);
- RegionLocator locator = connection.getRegionLocator(table);
- Admin admin = connection.getAdmin()) {
- lih.doBulkLoad(bulk1, admin, t, locator);
- }
- }
-
- /**
- * Split the known table in half. (this is hard coded for this test suite)
- */
- private void forceSplit(TableName table) {
- try {
- // need to call regions server to by synchronous but isn't visible.
- HRegionServer hrs = util.getRSForFirstRegionInTable(table);
-
- for (HRegionInfo hri :
- ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
- if (hri.getTable().equals(table)) {
- util.getAdmin().splitRegionAsync(hri.getRegionName(), rowkey(ROWCOUNT / 2));
- //ProtobufUtil.split(null, hrs.getRSRpcServices(), hri, rowkey(ROWCOUNT / 2));
- }
- }
-
- // verify that split completed.
- int regions;
- do {
- regions = 0;
- for (HRegionInfo hri :
- ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
- if (hri.getTable().equals(table)) {
- regions++;
- }
- }
- if (regions != 2) {
- LOG.info("Taking some time to complete split...");
- Thread.sleep(250);
- }
- } while (regions != 2);
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- util = new HBaseTestingUtility();
- util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
- util.startMiniCluster(1);
- }
-
- @AfterClass
- public static void teardownCluster() throws Exception {
- util.shutdownMiniCluster();
- }
-
- /**
- * Checks that all columns have the expected value and that there is the
- * expected number of rows.
- * @throws IOException
- */
- void assertExpectedTable(TableName table, int count, int value) throws IOException {
- HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
- assertEquals(htds.length, 1);
- Table t = null;
- try {
- t = util.getConnection().getTable(table);
- Scan s = new Scan();
- ResultScanner sr = t.getScanner(s);
- int i = 0;
- for (Result r : sr) {
- i++;
- for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
- for (byte[] val : nm.values()) {
- assertTrue(Bytes.equals(val, value(value)));
- }
- }
- }
- assertEquals(count, i);
- } catch (IOException e) {
- fail("Failed due to exception");
- } finally {
- if (t != null) t.close();
- }
- }
-
- /**
- * Test that shows that exception thrown from the RS side will result in an
- * exception on the LIHFile client.
- */
- @Test(expected=IOException.class, timeout=120000)
- public void testBulkLoadPhaseFailure() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName());
- final AtomicInteger attmptedCalls = new AtomicInteger();
- final AtomicInteger failedCalls = new AtomicInteger();
- util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
- try (Connection connection = ConnectionFactory.createConnection(util
- .getConfiguration())) {
- setupTable(connection, table, 10);
- LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
- util.getConfiguration()) {
- @Override
- protected List<LoadQueueItem> tryAtomicRegionLoad(
- ClientServiceCallable<byte[]> serviceCallable, TableName tableName, final byte[] first,
- Collection<LoadQueueItem> lqis) throws IOException {
- int i = attmptedCalls.incrementAndGet();
- if (i == 1) {
- Connection errConn;
- try {
- errConn = getMockedConnection(util.getConfiguration());
- serviceCallable = this.buildClientServiceCallable(errConn, table, first, lqis, true);
- } catch (Exception e) {
- LOG.fatal("mocking cruft, should never happen", e);
- throw new RuntimeException("mocking cruft, should never happen");
- }
- failedCalls.incrementAndGet();
- return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
- }
-
- return super.tryAtomicRegionLoad(serviceCallable, tableName, first, lqis);
- }
- };
- try {
- // create HFiles for different column families
- Path dir = buildBulkFiles(table, 1);
- try (Table t = connection.getTable(table);
- RegionLocator locator = connection.getRegionLocator(table);
- Admin admin = connection.getAdmin()) {
- lih.doBulkLoad(dir, admin, t, locator);
- }
- } finally {
- util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
- HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
- }
- fail("doBulkLoad should have thrown an exception");
- }
- }
-
- /**
- * Test that shows that exception thrown from the RS side will result in the
- * expected number of retries set by ${@link HConstants#HBASE_CLIENT_RETRIES_NUMBER}
- * when ${@link LoadIncrementalHFiles#RETRY_ON_IO_EXCEPTION} is set
- */
- @Test
- public void testRetryOnIOException() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName());
- final AtomicInteger calls = new AtomicInteger(1);
- final Connection conn = ConnectionFactory.createConnection(util
- .getConfiguration());
- util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
- util.getConfiguration().setBoolean(
- LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, true);
- final LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
- util.getConfiguration()) {
- @Override
- protected List<LoadQueueItem> tryAtomicRegionLoad(
- ClientServiceCallable<byte[]> serverCallable, TableName tableName,
- final byte[] first, Collection<LoadQueueItem> lqis)
- throws IOException {
- if (calls.getAndIncrement() < util.getConfiguration().getInt(
- HConstants.HBASE_CLIENT_RETRIES_NUMBER,
- HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER) - 1) {
- ClientServiceCallable<byte[]> newServerCallable = new ClientServiceCallable<byte[]>(
- conn, tableName, first, new RpcControllerFactory(
- util.getConfiguration()).newController(), HConstants.PRIORITY_UNSET) {
- @Override
- public byte[] rpcCall() throws Exception {
- throw new IOException("Error calling something on RegionServer");
- }
- };
- return super.tryAtomicRegionLoad(newServerCallable, tableName, first, lqis);
- } else {
- return super.tryAtomicRegionLoad(serverCallable, tableName, first, lqis);
- }
- }
- };
- setupTable(conn, table, 10);
- Path dir = buildBulkFiles(table, 1);
- lih.doBulkLoad(dir, conn.getAdmin(), conn.getTable(table),
- conn.getRegionLocator(table));
- util.getConfiguration().setBoolean(
- LoadIncrementalHFiles.RETRY_ON_IO_EXCEPTION, false);
-
- }
-
- @SuppressWarnings("deprecation")
- private ClusterConnection getMockedConnection(final Configuration conf)
- throws IOException, org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException {
- ClusterConnection c = Mockito.mock(ClusterConnection.class);
- Mockito.when(c.getConfiguration()).thenReturn(conf);
- Mockito.doNothing().when(c).close();
- // Make it so we return a particular location when asked.
- final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
- ServerName.valueOf("example.org", 1234, 0));
- Mockito.when(c.getRegionLocation((TableName) Mockito.any(),
- (byte[]) Mockito.any(), Mockito.anyBoolean())).
- thenReturn(loc);
- Mockito.when(c.locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any())).
- thenReturn(loc);
- ClientProtos.ClientService.BlockingInterface hri =
- Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
- Mockito.when(hri.bulkLoadHFile((RpcController)Mockito.any(), (BulkLoadHFileRequest)Mockito.any())).
- thenThrow(new ServiceException(new IOException("injecting bulk load error")));
- Mockito.when(c.getClient(Mockito.any(ServerName.class))).
- thenReturn(hri);
- return c;
- }
-
- /**
- * This test exercises the path where there is a split after initial
- * validation but before the atomic bulk load call. We cannot use presplitting
- * to test this path, so we actually inject a split just before the atomic
- * region load.
- */
- @Test (timeout=120000)
- public void testSplitWhileBulkLoadPhase() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName());
- try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
- setupTable(connection, table, 10);
- populateTable(connection, table,1);
- assertExpectedTable(table, ROWCOUNT, 1);
-
- // Now let's cause trouble. This will occur after checks and cause bulk
- // files to fail when attempt to atomically import. This is recoverable.
- final AtomicInteger attemptedCalls = new AtomicInteger();
- LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(util.getConfiguration()) {
- @Override
- protected void bulkLoadPhase(final Table htable, final Connection conn,
- ExecutorService pool, Deque<LoadQueueItem> queue,
- final Multimap<ByteBuffer, LoadQueueItem> regionGroups, boolean copyFile,
- Map<LoadQueueItem, ByteBuffer> item2RegionMap)
- throws IOException {
- int i = attemptedCalls.incrementAndGet();
- if (i == 1) {
- // On first attempt force a split.
- forceSplit(table);
- }
- super.bulkLoadPhase(htable, conn, pool, queue, regionGroups, copyFile, item2RegionMap);
- }
- };
-
- // create HFiles for different column families
- try (Table t = connection.getTable(table);
- RegionLocator locator = connection.getRegionLocator(table);
- Admin admin = connection.getAdmin()) {
- Path bulk = buildBulkFiles(table, 2);
- lih2.doBulkLoad(bulk, admin, t, locator);
- }
-
- // check that data was loaded
- // The three expected attempts are 1) failure because need to split, 2)
- // load of split top 3) load of split bottom
- assertEquals(attemptedCalls.get(), 3);
- assertExpectedTable(table, ROWCOUNT, 2);
- }
- }
-
- /**
- * This test splits a table and attempts to bulk load. The bulk import files
- * should be split before atomically importing.
- */
- @Test (timeout=120000)
- public void testGroupOrSplitPresplit() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName());
- try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
- setupTable(connection, table, 10);
- populateTable(connection, table, 1);
- assertExpectedTable(connection, table, ROWCOUNT, 1);
- forceSplit(table);
-
- final AtomicInteger countedLqis= new AtomicInteger();
- LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
- util.getConfiguration()) {
- @Override
- protected Pair<List<LoadQueueItem>, String> groupOrSplit(
- Multimap<ByteBuffer, LoadQueueItem> regionGroups,
- final LoadQueueItem item, final Table htable,
- final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
- Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
- startEndKeys);
- if (lqis != null && lqis.getFirst() != null) {
- countedLqis.addAndGet(lqis.getFirst().size());
- }
- return lqis;
- }
- };
-
- // create HFiles for different column families
- Path bulk = buildBulkFiles(table, 2);
- try (Table t = connection.getTable(table);
- RegionLocator locator = connection.getRegionLocator(table);
- Admin admin = connection.getAdmin()) {
- lih.doBulkLoad(bulk, admin, t, locator);
- }
- assertExpectedTable(connection, table, ROWCOUNT, 2);
- assertEquals(20, countedLqis.get());
- }
- }
-
- /**
- * This test creates a table with many small regions. The bulk load files
- * would be splitted multiple times before all of them can be loaded successfully.
- */
- @Test (timeout=120000)
- public void testSplitTmpFileCleanUp() throws Exception {
- final TableName table = TableName.valueOf(name.getMethodName());
- byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"),
- Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"),
- Bytes.toBytes("row_00000040"), Bytes.toBytes("row_00000050")};
- try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
- setupTableWithSplitkeys(table, 10, SPLIT_KEYS);
-
- LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
-
- // create HFiles
- Path bulk = buildBulkFiles(table, 2);
- try (Table t = connection.getTable(table);
- RegionLocator locator = connection.getRegionLocator(table);
- Admin admin = connection.getAdmin()) {
- lih.doBulkLoad(bulk, admin, t, locator);
- }
- // family path
- Path tmpPath = new Path(bulk, family(0));
- // TMP_DIR under family path
- tmpPath = new Path(tmpPath, LoadIncrementalHFiles.TMP_DIR);
- FileSystem fs = bulk.getFileSystem(util.getConfiguration());
- // HFiles have been splitted, there is TMP_DIR
- assertTrue(fs.exists(tmpPath));
- // TMP_DIR should have been cleaned-up
- assertNull(LoadIncrementalHFiles.TMP_DIR + " should be empty.",
- FSUtils.listStatus(fs, tmpPath));
- assertExpectedTable(connection, table, ROWCOUNT, 2);
- }
- }
-
- /**
- * This simulates an remote exception which should cause LIHF to exit with an
- * exception.
- */
- @Test(expected = IOException.class, timeout=120000)
- public void testGroupOrSplitFailure() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
- setupTable(connection, tableName, 10);
-
- LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
- util.getConfiguration()) {
- int i = 0;
-
- @Override
- protected Pair<List<LoadQueueItem>, String> groupOrSplit(
- Multimap<ByteBuffer, LoadQueueItem> regionGroups,
- final LoadQueueItem item, final Table table,
- final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
- i++;
-
- if (i == 5) {
- throw new IOException("failure");
- }
- return super.groupOrSplit(regionGroups, item, table, startEndKeys);
- }
- };
-
- // create HFiles for different column families
- Path dir = buildBulkFiles(tableName,1);
- try (Table t = connection.getTable(tableName);
- RegionLocator locator = connection.getRegionLocator(tableName);
- Admin admin = connection.getAdmin()) {
- lih.doBulkLoad(dir, admin, t, locator);
- }
- }
-
- fail("doBulkLoad should have thrown an exception");
- }
-
- @Test (timeout=120000)
- public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000100") };
- // Share connection. We were failing to find the table with our new reverse scan because it
- // looks for first region, not any region -- that is how it works now. The below removes first
- // region in test. Was reliant on the Connection caching having first region.
- Connection connection = ConnectionFactory.createConnection(util.getConfiguration());
- Table table = connection.getTable(tableName);
-
- setupTableWithSplitkeys(tableName, 10, SPLIT_KEYS);
- Path dir = buildBulkFiles(tableName, 2);
-
- final AtomicInteger countedLqis = new AtomicInteger();
- LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) {
-
- @Override
- protected Pair<List<LoadQueueItem>, String> groupOrSplit(
- Multimap<ByteBuffer, LoadQueueItem> regionGroups,
- final LoadQueueItem item, final Table htable,
- final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
- Pair<List<LoadQueueItem>, String> lqis = super.groupOrSplit(regionGroups, item, htable,
- startEndKeys);
- if (lqis != null && lqis.getFirst() != null) {
- countedLqis.addAndGet(lqis.getFirst().size());
- }
- return lqis;
- }
- };
-
- // do bulkload when there is no region hole in hbase:meta.
- try (Table t = connection.getTable(tableName);
- RegionLocator locator = connection.getRegionLocator(tableName);
- Admin admin = connection.getAdmin()) {
- loader.doBulkLoad(dir, admin, t, locator);
- } catch (Exception e) {
- LOG.error("exeception=", e);
- }
- // check if all the data are loaded into the table.
- this.assertExpectedTable(tableName, ROWCOUNT, 2);
-
- dir = buildBulkFiles(tableName, 3);
-
- // Mess it up by leaving a hole in the hbase:meta
- List<HRegionInfo> regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
- for (HRegionInfo regionInfo : regionInfos) {
- if (Bytes.equals(regionInfo.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
- MetaTableAccessor.deleteRegion(connection, regionInfo);
- break;
- }
- }
-
- try (Table t = connection.getTable(tableName);
- RegionLocator locator = connection.getRegionLocator(tableName);
- Admin admin = connection.getAdmin()) {
- loader.doBulkLoad(dir, admin, t, locator);
- } catch (Exception e) {
- LOG.error("exception=", e);
- assertTrue("IOException expected", e instanceof IOException);
- }
-
- table.close();
-
- // Make sure at least the one region that still exists can be found.
- regionInfos = MetaTableAccessor.getTableRegions(connection, tableName);
- assertTrue(regionInfos.size() >= 1);
-
- this.assertExpectedTable(connection, tableName, ROWCOUNT, 2);
- connection.close();
- }
-
- /**
- * Checks that all columns have the expected value and that there is the
- * expected number of rows.
- * @throws IOException
- */
- void assertExpectedTable(final Connection connection, TableName table, int count, int value)
- throws IOException {
- HTableDescriptor [] htds = util.getAdmin().listTables(table.getNameAsString());
- assertEquals(htds.length, 1);
- Table t = null;
- try {
- t = connection.getTable(table);
- Scan s = new Scan();
- ResultScanner sr = t.getScanner(s);
- int i = 0;
- for (Result r : sr) {
- i++;
- for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
- for (byte[] val : nm.values()) {
- assertTrue(Bytes.equals(val, value(value)));
- }
- }
- }
- assertEquals(count, i);
- } catch (IOException e) {
- fail("Failed due to exception");
- } finally {
- if (t != null) t.close();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
deleted file mode 100644
index 0c5207b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableInputFormat.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Tests various scan start and stop row scenarios. This is set in a scan and
- * tested in a MapReduce job to see if that is handed over and done properly
- * too.
- */
-@Category({VerySlowMapReduceTests.class, LargeTests.class})
-public class TestMultiTableInputFormat extends MultiTableInputFormatTestBase {
-
- @BeforeClass
- public static void setupLogging() {
- TEST_UTIL.enableDebug(MultiTableInputFormat.class);
- }
-
- @Override
- protected void initJob(List<Scan> scans, Job job) throws IOException {
- TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
deleted file mode 100644
index 32f511b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormat.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Function;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimaps;
-import edu.umd.cs.findbugs.annotations.Nullable;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.experimental.categories.Category;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-@Category({ VerySlowMapReduceTests.class, LargeTests.class })
-public class TestMultiTableSnapshotInputFormat extends MultiTableInputFormatTestBase {
-
- protected Path restoreDir;
-
- @BeforeClass
- public static void setUpSnapshots() throws Exception {
-
- TEST_UTIL.enableDebug(MultiTableSnapshotInputFormat.class);
- TEST_UTIL.enableDebug(MultiTableSnapshotInputFormatImpl.class);
-
- // take a snapshot of every table we have.
- for (String tableName : TABLES) {
- SnapshotTestingUtils
- .createSnapshotAndValidate(TEST_UTIL.getAdmin(), TableName.valueOf(tableName),
- ImmutableList.of(MultiTableInputFormatTestBase.INPUT_FAMILY), null,
- snapshotNameForTable(tableName), FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
- TEST_UTIL.getTestFileSystem(), true);
- }
- }
-
- @Before
- public void setUp() throws Exception {
- this.restoreDir = TEST_UTIL.getRandomDir();
- }
-
- @Override
- protected void initJob(List<Scan> scans, Job job) throws IOException {
- TableMapReduceUtil
- .initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), ScanMapper.class,
- ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
- }
-
- protected Map<String, Collection<Scan>> getSnapshotScanMapping(final List<Scan> scans) {
- return Multimaps.index(scans, new Function<Scan, String>() {
- @Nullable
- @Override
- public String apply(Scan input) {
- return snapshotNameForTable(
- Bytes.toStringBinary(input.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME)));
- }
- }).asMap();
- }
-
- public static String snapshotNameForTable(String tableName) {
- return tableName + "_snapshot";
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
deleted file mode 100644
index 1c33848..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultiTableSnapshotInputFormatImpl.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.mapreduce;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableList;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.doNothing;
-import static org.mockito.Mockito.verify;
-
-@Category({ SmallTests.class })
-public class TestMultiTableSnapshotInputFormatImpl {
-
- private MultiTableSnapshotInputFormatImpl subject;
- private Map<String, Collection<Scan>> snapshotScans;
- private Path restoreDir;
- private Configuration conf;
- private Path rootDir;
-
- @Before
- public void setUp() throws Exception {
- this.subject = Mockito.spy(new MultiTableSnapshotInputFormatImpl());
-
- // mock out restoreSnapshot
- // TODO: this is kind of meh; it'd be much nicer to just inject the RestoreSnapshotHelper
- // dependency into the
- // input format. However, we need a new RestoreSnapshotHelper per snapshot in the current
- // design, and it *also*
- // feels weird to introduce a RestoreSnapshotHelperFactory and inject that, which would
- // probably be the more "pure"
- // way of doing things. This is the lesser of two evils, perhaps?
- doNothing().when(this.subject).
- restoreSnapshot(any(Configuration.class), any(String.class), any(Path.class),
- any(Path.class), any(FileSystem.class));
-
- this.conf = new Configuration();
- this.rootDir = new Path("file:///test-root-dir");
- FSUtils.setRootDir(conf, rootDir);
- this.snapshotScans = ImmutableMap.<String, Collection<Scan>>of("snapshot1",
- ImmutableList.of(new Scan(Bytes.toBytes("1"), Bytes.toBytes("2"))), "snapshot2",
- ImmutableList.of(new Scan(Bytes.toBytes("3"), Bytes.toBytes("4")),
- new Scan(Bytes.toBytes("5"), Bytes.toBytes("6"))));
-
- this.restoreDir = new Path(FSUtils.getRootDir(conf), "restore-dir");
-
- }
-
- public void callSetInput() throws IOException {
- subject.setInput(this.conf, snapshotScans, restoreDir);
- }
-
- public Map<String, Collection<ScanWithEquals>> toScanWithEquals(
- Map<String, Collection<Scan>> snapshotScans) throws IOException {
- Map<String, Collection<ScanWithEquals>> rtn = Maps.newHashMap();
-
- for (Map.Entry<String, Collection<Scan>> entry : snapshotScans.entrySet()) {
- List<ScanWithEquals> scans = Lists.newArrayList();
-
- for (Scan scan : entry.getValue()) {
- scans.add(new ScanWithEquals(scan));
- }
- rtn.put(entry.getKey(), scans);
- }
-
- return rtn;
- }
-
- public static class ScanWithEquals {
-
- private final String startRow;
- private final String stopRow;
-
- /**
- * Creates a new instance of this class while copying all values.
- *
- * @param scan The scan instance to copy from.
- * @throws java.io.IOException When copying the values fails.
- */
- public ScanWithEquals(Scan scan) throws IOException {
- this.startRow = Bytes.toStringBinary(scan.getStartRow());
- this.stopRow = Bytes.toStringBinary(scan.getStopRow());
- }
-
- @Override
- public boolean equals(Object obj) {
- if (!(obj instanceof ScanWithEquals)) {
- return false;
- }
- ScanWithEquals otherScan = (ScanWithEquals) obj;
- return Objects.equals(this.startRow, otherScan.startRow) && Objects
- .equals(this.stopRow, otherScan.stopRow);
- }
-
- @Override
- public String toString() {
- return org.apache.hadoop.hbase.shaded.com.google.common.base.MoreObjects.
- toStringHelper(this).add("startRow", startRow)
- .add("stopRow", stopRow).toString();
- }
- }
-
- @Test
- public void testSetInputSetsSnapshotToScans() throws Exception {
-
- callSetInput();
-
- Map<String, Collection<Scan>> actual = subject.getSnapshotsToScans(conf);
-
- // convert to scans we can use .equals on
- Map<String, Collection<ScanWithEquals>> actualWithEquals = toScanWithEquals(actual);
- Map<String, Collection<ScanWithEquals>> expectedWithEquals = toScanWithEquals(snapshotScans);
-
- assertEquals(expectedWithEquals, actualWithEquals);
- }
-
- @Test
- public void testSetInputPushesRestoreDirectories() throws Exception {
- callSetInput();
-
- Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
-
- assertEquals(this.snapshotScans.keySet(), restoreDirs.keySet());
- }
-
- @Test
- public void testSetInputCreatesRestoreDirectoriesUnderRootRestoreDir() throws Exception {
- callSetInput();
-
- Map<String, Path> restoreDirs = subject.getSnapshotDirs(conf);
-
- for (Path snapshotDir : restoreDirs.values()) {
- assertEquals("Expected " + snapshotDir + " to be a child of " + restoreDir, restoreDir,
- snapshotDir.getParent());
- }
- }
-
- @Test
- public void testSetInputRestoresSnapshots() throws Exception {
- callSetInput();
-
- Map<String, Path> snapshotDirs = subject.getSnapshotDirs(conf);
-
- for (Map.Entry<String, Path> entry : snapshotDirs.entrySet()) {
- verify(this.subject).restoreSnapshot(eq(this.conf), eq(entry.getKey()), eq(this.rootDir),
- eq(entry.getValue()), any(FileSystem.class));
- }
- }
-}
[27/41] hbase git commit: HBASE-18640 Move mapreduce out of
hbase-server into separate module.
Posted by ap...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
new file mode 100644
index 0000000..c6a8761
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
@@ -0,0 +1,1496 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.HadoopShims;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.PerformanceEvaluation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
+import org.apache.hadoop.hbase.TagUtil;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.Store;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestRule;
+import org.mockito.Mockito;
+
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Simple test for {@link HFileOutputFormat2}.
+ * Sets up and runs a mapreduce job that writes hfile output.
+ * Creates a few inner classes to implement splits and an inputformat that
+ * emits keys and values like those of {@link PerformanceEvaluation}.
+ */
+@Category({VerySlowMapReduceTests.class, LargeTests.class})
+public class TestHFileOutputFormat2 {
+ @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+ withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+ private final static int ROWSPERSPLIT = 1024;
+
+ public static final byte[] FAMILY_NAME = TestHRegionFileSystem.FAMILY_NAME;
+ private static final byte[][] FAMILIES = {
+ Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))};
+ private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2",
+ "TestTable3").map(TableName::valueOf).toArray(TableName[]::new);
+
+ private HBaseTestingUtility util = new HBaseTestingUtility();
+
+ private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
+
+ /**
+ * Simple mapper that makes KeyValue output.
+ */
+ static class RandomKVGeneratingMapper
+ extends Mapper<NullWritable, NullWritable,
+ ImmutableBytesWritable, Cell> {
+
+ private int keyLength;
+ private static final int KEYLEN_DEFAULT=10;
+ private static final String KEYLEN_CONF="randomkv.key.length";
+
+ private int valLength;
+ private static final int VALLEN_DEFAULT=10;
+ private static final String VALLEN_CONF="randomkv.val.length";
+ private static final byte [] QUALIFIER = Bytes.toBytes("data");
+ private boolean multiTableMapper = false;
+ private TableName[] tables = null;
+
+
+ @Override
+ protected void setup(Context context) throws IOException,
+ InterruptedException {
+ super.setup(context);
+
+ Configuration conf = context.getConfiguration();
+ keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
+ valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
+ multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
+ false);
+ if (multiTableMapper) {
+ tables = TABLE_NAMES;
+ } else {
+ tables = new TableName[]{TABLE_NAMES[0]};
+ }
+ }
+
+ @Override
+ protected void map(
+ NullWritable n1, NullWritable n2,
+ Mapper<NullWritable, NullWritable,
+ ImmutableBytesWritable,Cell>.Context context)
+ throws java.io.IOException ,InterruptedException
+ {
+
+ byte keyBytes[] = new byte[keyLength];
+ byte valBytes[] = new byte[valLength];
+
+ int taskId = context.getTaskAttemptID().getTaskID().getId();
+ assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+ Random random = new Random();
+ byte[] key;
+ for (int j = 0; j < tables.length; ++j) {
+ for (int i = 0; i < ROWSPERSPLIT; i++) {
+ random.nextBytes(keyBytes);
+ // Ensure that unique tasks generate unique keys
+ keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
+ random.nextBytes(valBytes);
+ key = keyBytes;
+ if (multiTableMapper) {
+ key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
+ }
+
+ for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
+ Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
+ context.write(new ImmutableBytesWritable(key), kv);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Simple mapper that makes Put output.
+ */
+ static class RandomPutGeneratingMapper
+ extends Mapper<NullWritable, NullWritable,
+ ImmutableBytesWritable, Put> {
+
+ private int keyLength;
+ private static final int KEYLEN_DEFAULT = 10;
+ private static final String KEYLEN_CONF = "randomkv.key.length";
+
+ private int valLength;
+ private static final int VALLEN_DEFAULT = 10;
+ private static final String VALLEN_CONF = "randomkv.val.length";
+ private static final byte[] QUALIFIER = Bytes.toBytes("data");
+ private boolean multiTableMapper = false;
+ private TableName[] tables = null;
+
+ @Override
+ protected void setup(Context context) throws IOException,
+ InterruptedException {
+ super.setup(context);
+
+ Configuration conf = context.getConfiguration();
+ keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
+ valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
+ multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
+ false);
+ if (multiTableMapper) {
+ tables = TABLE_NAMES;
+ } else {
+ tables = new TableName[]{TABLE_NAMES[0]};
+ }
+ }
+
+ @Override
+ protected void map(
+ NullWritable n1, NullWritable n2,
+ Mapper<NullWritable, NullWritable,
+ ImmutableBytesWritable, Put>.Context context)
+ throws java.io.IOException, InterruptedException {
+
+ byte keyBytes[] = new byte[keyLength];
+ byte valBytes[] = new byte[valLength];
+
+ int taskId = context.getTaskAttemptID().getTaskID().getId();
+ assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+
+ Random random = new Random();
+ byte[] key;
+ for (int j = 0; j < tables.length; ++j) {
+ for (int i = 0; i < ROWSPERSPLIT; i++) {
+ random.nextBytes(keyBytes);
+ // Ensure that unique tasks generate unique keys
+ keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
+ random.nextBytes(valBytes);
+ key = keyBytes;
+ if (multiTableMapper) {
+ key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
+ }
+
+ for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
+ Put p = new Put(keyBytes);
+ p.addColumn(family, QUALIFIER, valBytes);
+ // set TTL to very low so that the scan does not return any value
+ p.setTTL(1l);
+ context.write(new ImmutableBytesWritable(key), p);
+ }
+ }
+ }
+ }
+ }
+
+ private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
+ if (putSortReducer) {
+ job.setInputFormatClass(NMapInputFormat.class);
+ job.setMapperClass(RandomPutGeneratingMapper.class);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+ job.setMapOutputValueClass(Put.class);
+ } else {
+ job.setInputFormatClass(NMapInputFormat.class);
+ job.setMapperClass(RandomKVGeneratingMapper.class);
+ job.setMapOutputKeyClass(ImmutableBytesWritable.class);
+ job.setMapOutputValueClass(KeyValue.class);
+ }
+ }
+
+ /**
+ * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
+ * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
+ * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void test_LATEST_TIMESTAMP_isReplaced()
+ throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+ TaskAttemptContext context = null;
+ Path dir =
+ util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
+ try {
+ Job job = new Job(conf);
+ FileOutputFormat.setOutputPath(job, dir);
+ context = createTestTaskAttemptContext(job);
+ HFileOutputFormat2 hof = new HFileOutputFormat2();
+ writer = hof.getRecordWriter(context);
+ final byte [] b = Bytes.toBytes("b");
+
+ // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be
+ // changed by call to write. Check all in kv is same but ts.
+ KeyValue kv = new KeyValue(b, b, b);
+ KeyValue original = kv.clone();
+ writer.write(new ImmutableBytesWritable(), kv);
+ assertFalse(original.equals(kv));
+ assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
+ assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
+ assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
+ assertNotSame(original.getTimestamp(), kv.getTimestamp());
+ assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
+
+ // Test 2. Now test passing a kv that has explicit ts. It should not be
+ // changed by call to record write.
+ kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
+ original = kv.clone();
+ writer.write(new ImmutableBytesWritable(), kv);
+ assertTrue(original.equals(kv));
+ } finally {
+ if (writer != null && context != null) writer.close(context);
+ dir.getFileSystem(conf).delete(dir, true);
+ }
+ }
+
+ private TaskAttemptContext createTestTaskAttemptContext(final Job job)
+ throws Exception {
+ HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
+ TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
+ job, "attempt_201402131733_0001_m_000000_0");
+ return context;
+ }
+
+ /*
+ * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
+ * metadata used by time-restricted scans.
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void test_TIMERANGE() throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+ TaskAttemptContext context = null;
+ Path dir =
+ util.getDataTestDir("test_TIMERANGE_present");
+ LOG.info("Timerange dir writing to dir: "+ dir);
+ try {
+ // build a record writer using HFileOutputFormat2
+ Job job = new Job(conf);
+ FileOutputFormat.setOutputPath(job, dir);
+ context = createTestTaskAttemptContext(job);
+ HFileOutputFormat2 hof = new HFileOutputFormat2();
+ writer = hof.getRecordWriter(context);
+
+ // Pass two key values with explicit times stamps
+ final byte [] b = Bytes.toBytes("b");
+
+ // value 1 with timestamp 2000
+ KeyValue kv = new KeyValue(b, b, b, 2000, b);
+ KeyValue original = kv.clone();
+ writer.write(new ImmutableBytesWritable(), kv);
+ assertEquals(original,kv);
+
+ // value 2 with timestamp 1000
+ kv = new KeyValue(b, b, b, 1000, b);
+ original = kv.clone();
+ writer.write(new ImmutableBytesWritable(), kv);
+ assertEquals(original, kv);
+
+ // verify that the file has the proper FileInfo.
+ writer.close(context);
+
+ // the generated file lives 1 directory down from the attempt directory
+ // and is the only file, e.g.
+ // _attempt__0000_r_000000_0/b/1979617994050536795
+ FileSystem fs = FileSystem.get(conf);
+ Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
+ FileStatus[] sub1 = fs.listStatus(attemptDirectory);
+ FileStatus[] file = fs.listStatus(sub1[0].getPath());
+
+ // open as HFile Reader and pull out TIMERANGE FileInfo.
+ HFile.Reader rd =
+ HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
+ Map<byte[],byte[]> finfo = rd.loadFileInfo();
+ byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8"));
+ assertNotNull(range);
+
+ // unmarshall and check values.
+ TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
+ Writables.copyWritable(range, timeRangeTracker);
+ LOG.info(timeRangeTracker.getMin() +
+ "...." + timeRangeTracker.getMax());
+ assertEquals(1000, timeRangeTracker.getMin());
+ assertEquals(2000, timeRangeTracker.getMax());
+ rd.close();
+ } finally {
+ if (writer != null && context != null) writer.close(context);
+ dir.getFileSystem(conf).delete(dir, true);
+ }
+ }
+
+ /**
+ * Run small MR job.
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testWritingPEData() throws Exception {
+ Configuration conf = util.getConfiguration();
+ Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
+ FileSystem fs = testDir.getFileSystem(conf);
+
+ // Set down this value or we OOME in eclipse.
+ conf.setInt("mapreduce.task.io.sort.mb", 20);
+ // Write a few files.
+ conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
+
+ Job job = new Job(conf, "testWritingPEData");
+ setupRandomGeneratorMapper(job, false);
+ // This partitioner doesn't work well for number keys but using it anyways
+ // just to demonstrate how to configure it.
+ byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
+ byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
+
+ Arrays.fill(startKey, (byte)0);
+ Arrays.fill(endKey, (byte)0xff);
+
+ job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
+ // Set start and end rows for partitioner.
+ SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
+ SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
+ job.setReducerClass(KeyValueSortReducer.class);
+ job.setOutputFormatClass(HFileOutputFormat2.class);
+ job.setNumReduceTasks(4);
+ job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+ KeyValueSerialization.class.getName());
+
+ FileOutputFormat.setOutputPath(job, testDir);
+ assertTrue(job.waitForCompletion(false));
+ FileStatus [] files = fs.listStatus(testDir);
+ assertTrue(files.length > 0);
+ }
+
+ /**
+ * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
+ * hfile.
+ */
+ @Test
+ public void test_WritingTagData()
+ throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
+ conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
+ RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+ TaskAttemptContext context = null;
+ Path dir =
+ util.getDataTestDir("WritingTagData");
+ try {
+ conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
+ // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
+ conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
+ Job job = new Job(conf);
+ FileOutputFormat.setOutputPath(job, dir);
+ context = createTestTaskAttemptContext(job);
+ HFileOutputFormat2 hof = new HFileOutputFormat2();
+ writer = hof.getRecordWriter(context);
+ final byte [] b = Bytes.toBytes("b");
+
+ List< Tag > tags = new ArrayList<>();
+ tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
+ KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
+ writer.write(new ImmutableBytesWritable(), kv);
+ writer.close(context);
+ writer = null;
+ FileSystem fs = dir.getFileSystem(conf);
+ RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
+ while(iterator.hasNext()) {
+ LocatedFileStatus keyFileStatus = iterator.next();
+ HFile.Reader reader =
+ HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
+ HFileScanner scanner = reader.getScanner(false, false, false);
+ scanner.seekTo();
+ Cell cell = scanner.getCell();
+ List<Tag> tagsFromCell = TagUtil.asList(cell.getTagsArray(), cell.getTagsOffset(),
+ cell.getTagsLength());
+ assertTrue(tagsFromCell.size() > 0);
+ for (Tag tag : tagsFromCell) {
+ assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
+ }
+ }
+ } finally {
+ if (writer != null && context != null) writer.close(context);
+ dir.getFileSystem(conf).delete(dir, true);
+ }
+ }
+
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testJobConfiguration() throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
+ .toString());
+ Job job = new Job(conf);
+ job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
+ Table table = Mockito.mock(Table.class);
+ RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
+ setupMockStartKeys(regionLocator);
+ setupMockTableName(regionLocator);
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
+ assertEquals(job.getNumReduceTasks(), 4);
+ }
+
+ private byte [][] generateRandomStartKeys(int numKeys) {
+ Random random = new Random();
+ byte[][] ret = new byte[numKeys][];
+ // first region start key is always empty
+ ret[0] = HConstants.EMPTY_BYTE_ARRAY;
+ for (int i = 1; i < numKeys; i++) {
+ ret[i] =
+ PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
+ }
+ return ret;
+ }
+
+ private byte[][] generateRandomSplitKeys(int numKeys) {
+ Random random = new Random();
+ byte[][] ret = new byte[numKeys][];
+ for (int i = 0; i < numKeys; i++) {
+ ret[i] =
+ PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
+ }
+ return ret;
+ }
+
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testMRIncrementalLoad() throws Exception {
+ LOG.info("\nStarting test testMRIncrementalLoad\n");
+ doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
+ }
+
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testMRIncrementalLoadWithSplit() throws Exception {
+ LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
+ doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
+ }
+
+ /**
+ * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true
+ * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY
+ * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports),
+ * it's not possible to check the region locality by comparing region locations and DN hostnames.
+ * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does),
+ * we could test region locality features more easily.
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testMRIncrementalLoadWithLocality() throws Exception {
+ LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
+ doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
+ doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
+ }
+
+ //@Ignore("Wahtevs")
+ @Test
+ public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
+ LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
+ doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
+ }
+
+ private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
+ boolean putSortReducer, String tableStr) throws Exception {
+ doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer,
+ Arrays.asList(tableStr));
+ }
+
+ @Test
+ public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception {
+ LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n");
+ doIncrementalLoadTest(false, false, true,
+ Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList
+ ()));
+ }
+
+ private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
+ boolean putSortReducer, List<String> tableStr) throws Exception {
+ util = new HBaseTestingUtility();
+ Configuration conf = util.getConfiguration();
+ conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
+ int hostCount = 1;
+ int regionNum = 5;
+ if (shouldKeepLocality) {
+ // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
+ // explicit hostnames parameter just like MiniDFSCluster does.
+ hostCount = 3;
+ regionNum = 20;
+ }
+
+ String[] hostnames = new String[hostCount];
+ for (int i = 0; i < hostCount; ++i) {
+ hostnames[i] = "datanode_" + i;
+ }
+ util.startMiniCluster(1, hostCount, hostnames);
+
+ Map<String, Table> allTables = new HashMap<>(tableStr.size());
+ List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
+ boolean writeMultipleTables = tableStr.size() > 1;
+ for (String tableStrSingle : tableStr) {
+ byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
+ TableName tableName = TableName.valueOf(tableStrSingle);
+ Table table = util.createTable(tableName, FAMILIES, splitKeys);
+
+ RegionLocator r = util.getConnection().getRegionLocator(tableName);
+ assertEquals("Should start with empty table", 0, util.countRows(table));
+ int numRegions = r.getStartKeys().length;
+ assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
+
+ allTables.put(tableStrSingle, table);
+ tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r));
+ }
+ Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
+ // Generate the bulk load files
+ runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
+
+ for (Table tableSingle : allTables.values()) {
+ // This doesn't write into the table, just makes files
+ assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
+ }
+ int numTableDirs = 0;
+ for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) {
+ Path tablePath = testDir;
+
+ if (writeMultipleTables) {
+ if (allTables.containsKey(tf.getPath().getName())) {
+ ++numTableDirs;
+ tablePath = tf.getPath();
+ }
+ else {
+ continue;
+ }
+ }
+
+ // Make sure that a directory was created for every CF
+ int dir = 0;
+ for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) {
+ for (byte[] family : FAMILIES) {
+ if (Bytes.toString(family).equals(f.getPath().getName())) {
+ ++dir;
+ }
+ }
+ }
+ assertEquals("Column family not found in FS.", FAMILIES.length, dir);
+ }
+ if (writeMultipleTables) {
+ assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
+ }
+
+ Admin admin = util.getConnection().getAdmin();
+ try {
+ // handle the split case
+ if (shouldChangeRegions) {
+ Table chosenTable = allTables.values().iterator().next();
+ // Choose a semi-random table if multiple tables are available
+ LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
+ admin.disableTable(chosenTable.getName());
+ util.waitUntilNoRegionsInTransition();
+
+ util.deleteTable(chosenTable.getName());
+ byte[][] newSplitKeys = generateRandomSplitKeys(14);
+ Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
+
+ while (util.getConnection().getRegionLocator(chosenTable.getName())
+ .getAllRegionLocations().size() != 15 ||
+ !admin.isTableAvailable(table.getName())) {
+ Thread.sleep(200);
+ LOG.info("Waiting for new region assignment to happen");
+ }
+ }
+
+ // Perform the actual load
+ for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
+ Path tableDir = testDir;
+ String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString();
+ LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr);
+ if (writeMultipleTables) {
+ tableDir = new Path(testDir, tableNameStr);
+ }
+ Table currentTable = allTables.get(tableNameStr);
+ TableName currentTableName = currentTable.getName();
+ new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo
+ .getRegionLocator());
+
+ // Ensure data shows up
+ int expectedRows = 0;
+ if (putSortReducer) {
+ // no rows should be extracted
+ assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
+ util.countRows(currentTable));
+ } else {
+ expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+ assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
+ util.countRows(currentTable));
+ Scan scan = new Scan();
+ ResultScanner results = currentTable.getScanner(scan);
+ for (Result res : results) {
+ assertEquals(FAMILIES.length, res.rawCells().length);
+ Cell first = res.rawCells()[0];
+ for (Cell kv : res.rawCells()) {
+ assertTrue(CellUtil.matchingRow(first, kv));
+ assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
+ }
+ }
+ results.close();
+ }
+ String tableDigestBefore = util.checksumRows(currentTable);
+ // Check region locality
+ HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
+ for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
+ hbd.add(region.getHDFSBlocksDistribution());
+ }
+ for (String hostname : hostnames) {
+ float locality = hbd.getBlockLocalityIndex(hostname);
+ LOG.info("locality of [" + hostname + "]: " + locality);
+ assertEquals(100, (int) (locality * 100));
+ }
+
+ // Cause regions to reopen
+ admin.disableTable(currentTableName);
+ while (!admin.isTableDisabled(currentTableName)) {
+ Thread.sleep(200);
+ LOG.info("Waiting for table to disable");
+ }
+ admin.enableTable(currentTableName);
+ util.waitTableAvailable(currentTableName);
+ assertEquals("Data should remain after reopening of regions",
+ tableDigestBefore, util.checksumRows(currentTable));
+ }
+ } finally {
+ for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
+ tableInfoSingle.getRegionLocator().close();
+ }
+ for (Entry<String, Table> singleTable : allTables.entrySet() ) {
+ singleTable.getValue().close();
+ util.deleteTable(singleTable.getValue().getName());
+ }
+ testDir.getFileSystem(conf).delete(testDir, true);
+ util.shutdownMiniCluster();
+ }
+ }
+
+ private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
+ boolean putSortReducer) throws IOException,
+ InterruptedException, ClassNotFoundException {
+ Job job = new Job(conf, "testLocalMRIncrementalLoad");
+ job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
+ job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
+ MutationSerialization.class.getName(), ResultSerialization.class.getName(),
+ KeyValueSerialization.class.getName());
+ setupRandomGeneratorMapper(job, putSortReducer);
+ if (tableInfo.size() > 1) {
+ MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
+ int sum = 0;
+ for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
+ sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
+ }
+ assertEquals(sum, job.getNumReduceTasks());
+ }
+ else {
+ RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
+ HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
+ regionLocator);
+ assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
+ }
+
+ FileOutputFormat.setOutputPath(job, outDir);
+
+ assertFalse(util.getTestFileSystem().exists(outDir)) ;
+
+ assertTrue(job.waitForCompletion(true));
+ }
+
+ /**
+ * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and
+ * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
+ * Tests that the compression map is correctly serialized into
+ * and deserialized from configuration
+ *
+ * @throws IOException
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
+ for (int numCfs = 0; numCfs <= 3; numCfs++) {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ Map<String, Compression.Algorithm> familyToCompression =
+ getMockColumnFamiliesForCompression(numCfs);
+ Table table = Mockito.mock(Table.class);
+ setupMockColumnFamiliesForCompression(table, familyToCompression);
+ conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY,
+ HFileOutputFormat2.serializeColumnFamilyAttribute
+ (HFileOutputFormat2.compressionDetails,
+ Arrays.asList(table.getTableDescriptor())));
+
+ // read back family specific compression setting from the configuration
+ Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
+ .createFamilyCompressionMap(conf);
+
+ // test that we have a value for all column families that matches with the
+ // used mock values
+ for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
+ assertEquals("Compression configuration incorrect for column family:"
+ + entry.getKey(), entry.getValue(),
+ retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8")));
+ }
+ }
+ }
+
+ private void setupMockColumnFamiliesForCompression(Table table,
+ Map<String, Compression.Algorithm> familyToCompression) throws IOException {
+ HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+ for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
+ mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+ .setMaxVersions(1)
+ .setCompressionType(entry.getValue())
+ .setBlockCacheEnabled(false)
+ .setTimeToLive(0));
+ }
+ Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+ }
+
+ /**
+ * @return a map from column family names to compression algorithms for
+ * testing column family compression. Column family names have special characters
+ */
+ private Map<String, Compression.Algorithm>
+ getMockColumnFamiliesForCompression (int numCfs) {
+ Map<String, Compression.Algorithm> familyToCompression = new HashMap<>();
+ // use column family names having special characters
+ if (numCfs-- > 0) {
+ familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
+ }
+ if (numCfs-- > 0) {
+ familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
+ }
+ if (numCfs-- > 0) {
+ familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
+ }
+ if (numCfs-- > 0) {
+ familyToCompression.put("Family3", Compression.Algorithm.NONE);
+ }
+ return familyToCompression;
+ }
+
+
+ /**
+ * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and
+ * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}.
+ * Tests that the compression map is correctly serialized into
+ * and deserialized from configuration
+ *
+ * @throws IOException
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
+ for (int numCfs = 0; numCfs <= 2; numCfs++) {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ Map<String, BloomType> familyToBloomType =
+ getMockColumnFamiliesForBloomType(numCfs);
+ Table table = Mockito.mock(Table.class);
+ setupMockColumnFamiliesForBloomType(table,
+ familyToBloomType);
+ conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY,
+ HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails,
+ Arrays.asList(table.getTableDescriptor())));
+
+ // read back family specific data block encoding settings from the
+ // configuration
+ Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
+ HFileOutputFormat2
+ .createFamilyBloomTypeMap(conf);
+
+ // test that we have a value for all column families that matches with the
+ // used mock values
+ for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
+ assertEquals("BloomType configuration incorrect for column family:"
+ + entry.getKey(), entry.getValue(),
+ retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8")));
+ }
+ }
+ }
+
+ private void setupMockColumnFamiliesForBloomType(Table table,
+ Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
+ HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+ for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
+ mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+ .setMaxVersions(1)
+ .setBloomFilterType(entry.getValue())
+ .setBlockCacheEnabled(false)
+ .setTimeToLive(0));
+ }
+ Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+ }
+
+ /**
+ * @return a map from column family names to compression algorithms for
+ * testing column family compression. Column family names have special characters
+ */
+ private Map<String, BloomType>
+ getMockColumnFamiliesForBloomType (int numCfs) {
+ Map<String, BloomType> familyToBloomType = new HashMap<>();
+ // use column family names having special characters
+ if (numCfs-- > 0) {
+ familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
+ }
+ if (numCfs-- > 0) {
+ familyToBloomType.put("Family2=asdads&!AASD",
+ BloomType.ROWCOL);
+ }
+ if (numCfs-- > 0) {
+ familyToBloomType.put("Family3", BloomType.NONE);
+ }
+ return familyToBloomType;
+ }
+
+ /**
+ * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and
+ * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}.
+ * Tests that the compression map is correctly serialized into
+ * and deserialized from configuration
+ *
+ * @throws IOException
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
+ for (int numCfs = 0; numCfs <= 3; numCfs++) {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ Map<String, Integer> familyToBlockSize =
+ getMockColumnFamiliesForBlockSize(numCfs);
+ Table table = Mockito.mock(Table.class);
+ setupMockColumnFamiliesForBlockSize(table,
+ familyToBlockSize);
+ conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY,
+ HFileOutputFormat2.serializeColumnFamilyAttribute
+ (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table
+ .getTableDescriptor())));
+
+ // read back family specific data block encoding settings from the
+ // configuration
+ Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
+ HFileOutputFormat2
+ .createFamilyBlockSizeMap(conf);
+
+ // test that we have a value for all column families that matches with the
+ // used mock values
+ for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
+ ) {
+ assertEquals("BlockSize configuration incorrect for column family:"
+ + entry.getKey(), entry.getValue(),
+ retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8")));
+ }
+ }
+ }
+
+ private void setupMockColumnFamiliesForBlockSize(Table table,
+ Map<String, Integer> familyToDataBlockEncoding) throws IOException {
+ HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+ for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
+ mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+ .setMaxVersions(1)
+ .setBlocksize(entry.getValue())
+ .setBlockCacheEnabled(false)
+ .setTimeToLive(0));
+ }
+ Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+ }
+
+ /**
+ * @return a map from column family names to compression algorithms for
+ * testing column family compression. Column family names have special characters
+ */
+ private Map<String, Integer>
+ getMockColumnFamiliesForBlockSize (int numCfs) {
+ Map<String, Integer> familyToBlockSize = new HashMap<>();
+ // use column family names having special characters
+ if (numCfs-- > 0) {
+ familyToBlockSize.put("Family1!@#!@#&", 1234);
+ }
+ if (numCfs-- > 0) {
+ familyToBlockSize.put("Family2=asdads&!AASD",
+ Integer.MAX_VALUE);
+ }
+ if (numCfs-- > 0) {
+ familyToBlockSize.put("Family2=asdads&!AASD",
+ Integer.MAX_VALUE);
+ }
+ if (numCfs-- > 0) {
+ familyToBlockSize.put("Family3", 0);
+ }
+ return familyToBlockSize;
+ }
+
+ /**
+ * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
+ * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
+ * Tests that the compression map is correctly serialized into
+ * and deserialized from configuration
+ *
+ * @throws IOException
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
+ for (int numCfs = 0; numCfs <= 3; numCfs++) {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ Map<String, DataBlockEncoding> familyToDataBlockEncoding =
+ getMockColumnFamiliesForDataBlockEncoding(numCfs);
+ Table table = Mockito.mock(Table.class);
+ setupMockColumnFamiliesForDataBlockEncoding(table,
+ familyToDataBlockEncoding);
+ HTableDescriptor tableDescriptor = table.getTableDescriptor();
+ conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
+ HFileOutputFormat2.serializeColumnFamilyAttribute
+ (HFileOutputFormat2.dataBlockEncodingDetails, Arrays
+ .asList(tableDescriptor)));
+
+ // read back family specific data block encoding settings from the
+ // configuration
+ Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
+ HFileOutputFormat2
+ .createFamilyDataBlockEncodingMap(conf);
+
+ // test that we have a value for all column families that matches with the
+ // used mock values
+ for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
+ assertEquals("DataBlockEncoding configuration incorrect for column family:"
+ + entry.getKey(), entry.getValue(),
+ retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8")));
+ }
+ }
+ }
+
+ private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
+ Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
+ HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
+ for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
+ mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
+ .setMaxVersions(1)
+ .setDataBlockEncoding(entry.getValue())
+ .setBlockCacheEnabled(false)
+ .setTimeToLive(0));
+ }
+ Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
+ }
+
+ /**
+ * @return a map from column family names to compression algorithms for
+ * testing column family compression. Column family names have special characters
+ */
+ private Map<String, DataBlockEncoding>
+ getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
+ Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>();
+ // use column family names having special characters
+ if (numCfs-- > 0) {
+ familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
+ }
+ if (numCfs-- > 0) {
+ familyToDataBlockEncoding.put("Family2=asdads&!AASD",
+ DataBlockEncoding.FAST_DIFF);
+ }
+ if (numCfs-- > 0) {
+ familyToDataBlockEncoding.put("Family2=asdads&!AASD",
+ DataBlockEncoding.PREFIX);
+ }
+ if (numCfs-- > 0) {
+ familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
+ }
+ return familyToDataBlockEncoding;
+ }
+
+ private void setupMockStartKeys(RegionLocator table) throws IOException {
+ byte[][] mockKeys = new byte[][] {
+ HConstants.EMPTY_BYTE_ARRAY,
+ Bytes.toBytes("aaa"),
+ Bytes.toBytes("ggg"),
+ Bytes.toBytes("zzz")
+ };
+ Mockito.doReturn(mockKeys).when(table).getStartKeys();
+ }
+
+ private void setupMockTableName(RegionLocator table) throws IOException {
+ TableName mockTableName = TableName.valueOf("mock_table");
+ Mockito.doReturn(mockTableName).when(table).getName();
+ }
+
+ /**
+ * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
+ * bloom filter settings from the column family descriptor
+ */
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testColumnFamilySettings() throws Exception {
+ Configuration conf = new Configuration(this.util.getConfiguration());
+ RecordWriter<ImmutableBytesWritable, Cell> writer = null;
+ TaskAttemptContext context = null;
+ Path dir = util.getDataTestDir("testColumnFamilySettings");
+
+ // Setup table descriptor
+ Table table = Mockito.mock(Table.class);
+ RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
+ HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]);
+ Mockito.doReturn(htd).when(table).getTableDescriptor();
+ for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
+ htd.addFamily(hcd);
+ }
+
+ // set up the table to return some mock keys
+ setupMockStartKeys(regionLocator);
+
+ try {
+ // partial map red setup to get an operational writer for testing
+ // We turn off the sequence file compression, because DefaultCodec
+ // pollutes the GZip codec pool with an incompatible compressor.
+ conf.set("io.seqfile.compression.type", "NONE");
+ conf.set("hbase.fs.tmp.dir", dir.toString());
+ // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
+ conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
+
+ Job job = new Job(conf, "testLocalMRIncrementalLoad");
+ job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
+ setupRandomGeneratorMapper(job, false);
+ HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
+ FileOutputFormat.setOutputPath(job, dir);
+ context = createTestTaskAttemptContext(job);
+ HFileOutputFormat2 hof = new HFileOutputFormat2();
+ writer = hof.getRecordWriter(context);
+
+ // write out random rows
+ writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
+ writer.close(context);
+
+ // Make sure that a directory was created for every CF
+ FileSystem fs = dir.getFileSystem(conf);
+
+ // commit so that the filesystem has one directory per column family
+ hof.getOutputCommitter(context).commitTask(context);
+ hof.getOutputCommitter(context).commitJob(context);
+ FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
+ assertEquals(htd.getFamilies().size(), families.length);
+ for (FileStatus f : families) {
+ String familyStr = f.getPath().getName();
+ HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
+ // verify that the compression on this file matches the configured
+ // compression
+ Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
+ Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
+ Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+
+ byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
+ if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
+ assertEquals("Incorrect bloom filter used for column family " + familyStr +
+ "(reader: " + reader + ")",
+ hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
+ assertEquals("Incorrect compression used for column family " + familyStr +
+ "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
+ }
+ } finally {
+ dir.getFileSystem(conf).delete(dir, true);
+ }
+ }
+
+ /**
+ * Write random values to the writer assuming a table created using
+ * {@link #FAMILIES} as column family descriptors
+ */
+ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
+ TaskAttemptContext context, Set<byte[]> families, int numRows)
+ throws IOException, InterruptedException {
+ byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
+ int valLength = 10;
+ byte valBytes[] = new byte[valLength];
+
+ int taskId = context.getTaskAttemptID().getTaskID().getId();
+ assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
+ final byte [] qualifier = Bytes.toBytes("data");
+ Random random = new Random();
+ for (int i = 0; i < numRows; i++) {
+
+ Bytes.putInt(keyBytes, 0, i);
+ random.nextBytes(valBytes);
+ ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
+
+ for (byte[] family : families) {
+ Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
+ writer.write(key, kv);
+ }
+ }
+ }
+
+ /**
+ * This test is to test the scenario happened in HBASE-6901.
+ * All files are bulk loaded and excluded from minor compaction.
+ * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
+ * will be thrown.
+ */
+ @Ignore ("Flakey: See HBASE-9051") @Test
+ public void testExcludeAllFromMinorCompaction() throws Exception {
+ Configuration conf = util.getConfiguration();
+ conf.setInt("hbase.hstore.compaction.min", 2);
+ generateRandomStartKeys(5);
+
+ util.startMiniCluster();
+ try (Connection conn = ConnectionFactory.createConnection();
+ Admin admin = conn.getAdmin();
+ Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
+ RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) {
+ final FileSystem fs = util.getDFSCluster().getFileSystem();
+ assertEquals("Should start with empty table", 0, util.countRows(table));
+
+ // deep inspection: get the StoreFile dir
+ final Path storePath = new Path(
+ FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
+ new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
+ Bytes.toString(FAMILIES[0])));
+ assertEquals(0, fs.listStatus(storePath).length);
+
+ // Generate two bulk load files
+ conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
+ true);
+
+ for (int i = 0; i < 2; i++) {
+ Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
+ runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
+ .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false);
+ // Perform the actual load
+ new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
+ }
+
+ // Ensure data shows up
+ int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+ assertEquals("LoadIncrementalHFiles should put expected data in table",
+ expectedRows, util.countRows(table));
+
+ // should have a second StoreFile now
+ assertEquals(2, fs.listStatus(storePath).length);
+
+ // minor compactions shouldn't get rid of the file
+ admin.compact(TABLE_NAMES[0]);
+ try {
+ quickPoll(new Callable<Boolean>() {
+ @Override
+ public Boolean call() throws Exception {
+ List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
+ for (HRegion region : regions) {
+ for (Store store : region.getStores()) {
+ store.closeAndArchiveCompactedFiles();
+ }
+ }
+ return fs.listStatus(storePath).length == 1;
+ }
+ }, 5000);
+ throw new IOException("SF# = " + fs.listStatus(storePath).length);
+ } catch (AssertionError ae) {
+ // this is expected behavior
+ }
+
+ // a major compaction should work though
+ admin.majorCompact(TABLE_NAMES[0]);
+ quickPoll(new Callable<Boolean>() {
+ @Override
+ public Boolean call() throws Exception {
+ List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
+ for (HRegion region : regions) {
+ for (Store store : region.getStores()) {
+ store.closeAndArchiveCompactedFiles();
+ }
+ }
+ return fs.listStatus(storePath).length == 1;
+ }
+ }, 5000);
+
+ } finally {
+ util.shutdownMiniCluster();
+ }
+ }
+
+ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
+ public void testExcludeMinorCompaction() throws Exception {
+ Configuration conf = util.getConfiguration();
+ conf.setInt("hbase.hstore.compaction.min", 2);
+ generateRandomStartKeys(5);
+
+ util.startMiniCluster();
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ Admin admin = conn.getAdmin()){
+ Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
+ final FileSystem fs = util.getDFSCluster().getFileSystem();
+ Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
+ assertEquals("Should start with empty table", 0, util.countRows(table));
+
+ // deep inspection: get the StoreFile dir
+ final Path storePath = new Path(
+ FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
+ new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
+ Bytes.toString(FAMILIES[0])));
+ assertEquals(0, fs.listStatus(storePath).length);
+
+ // put some data in it and flush to create a storefile
+ Put p = new Put(Bytes.toBytes("test"));
+ p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
+ table.put(p);
+ admin.flush(TABLE_NAMES[0]);
+ assertEquals(1, util.countRows(table));
+ quickPoll(new Callable<Boolean>() {
+ @Override
+ public Boolean call() throws Exception {
+ return fs.listStatus(storePath).length == 1;
+ }
+ }, 5000);
+
+ // Generate a bulk load file with more rows
+ conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
+ true);
+
+ RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]);
+ runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
+ .getTableDescriptor(), regionLocator)), testDir, false);
+
+ // Perform the actual load
+ new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
+
+ // Ensure data shows up
+ int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
+ assertEquals("LoadIncrementalHFiles should put expected data in table",
+ expectedRows + 1, util.countRows(table));
+
+ // should have a second StoreFile now
+ assertEquals(2, fs.listStatus(storePath).length);
+
+ // minor compactions shouldn't get rid of the file
+ admin.compact(TABLE_NAMES[0]);
+ try {
+ quickPoll(new Callable<Boolean>() {
+ @Override
+ public Boolean call() throws Exception {
+ return fs.listStatus(storePath).length == 1;
+ }
+ }, 5000);
+ throw new IOException("SF# = " + fs.listStatus(storePath).length);
+ } catch (AssertionError ae) {
+ // this is expected behavior
+ }
+
+ // a major compaction should work though
+ admin.majorCompact(TABLE_NAMES[0]);
+ quickPoll(new Callable<Boolean>() {
+ @Override
+ public Boolean call() throws Exception {
+ return fs.listStatus(storePath).length == 1;
+ }
+ }, 5000);
+
+ } finally {
+ util.shutdownMiniCluster();
+ }
+ }
+
+ private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
+ int sleepMs = 10;
+ int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
+ while (retries-- > 0) {
+ if (c.call().booleanValue()) {
+ return;
+ }
+ Thread.sleep(sleepMs);
+ }
+ fail();
+ }
+
+ public static void main(String args[]) throws Exception {
+ new TestHFileOutputFormat2().manualTest(args);
+ }
+
+ public void manualTest(String args[]) throws Exception {
+ Configuration conf = HBaseConfiguration.create();
+ util = new HBaseTestingUtility(conf);
+ if ("newtable".equals(args[0])) {
+ TableName tname = TableName.valueOf(args[1]);
+ byte[][] splitKeys = generateRandomSplitKeys(4);
+ Table table = util.createTable(tname, FAMILIES, splitKeys);
+ } else if ("incremental".equals(args[0])) {
+ TableName tname = TableName.valueOf(args[1]);
+ try(Connection c = ConnectionFactory.createConnection(conf);
+ Admin admin = c.getAdmin();
+ RegionLocator regionLocator = c.getRegionLocator(tname)) {
+ Path outDir = new Path("incremental-out");
+ runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin
+ .getTableDescriptor(tname), regionLocator)), outDir, false);
+ }
+ } else {
+ throw new RuntimeException(
+ "usage: TestHFileOutputFormat2 newtable | incremental");
+ }
+ }
+
+ @Test
+ public void testBlockStoragePolicy() throws Exception {
+ util = new HBaseTestingUtility();
+ Configuration conf = util.getConfiguration();
+ conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD");
+
+ conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX +
+ Bytes.toString(HFileOutputFormat2.combineTableNameSuffix(
+ TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD");
+ Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0]));
+ Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1]));
+ util.startMiniDFSCluster(3);
+ FileSystem fs = util.getDFSCluster().getFileSystem();
+ try {
+ fs.mkdirs(cf1Dir);
+ fs.mkdirs(cf2Dir);
+
+ // the original block storage policy would be HOT
+ String spA = getStoragePolicyName(fs, cf1Dir);
+ String spB = getStoragePolicyName(fs, cf2Dir);
+ LOG.debug("Storage policy of cf 0: [" + spA + "].");
+ LOG.debug("Storage policy of cf 1: [" + spB + "].");
+ assertEquals("HOT", spA);
+ assertEquals("HOT", spB);
+
+ // alter table cf schema to change storage policies
+ HFileOutputFormat2.configureStoragePolicy(conf, fs,
+ HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir);
+ HFileOutputFormat2.configureStoragePolicy(conf, fs,
+ HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir);
+ spA = getStoragePolicyName(fs, cf1Dir);
+ spB = getStoragePolicyName(fs, cf2Dir);
+ LOG.debug("Storage policy of cf 0: [" + spA + "].");
+ LOG.debug("Storage policy of cf 1: [" + spB + "].");
+ assertNotNull(spA);
+ assertEquals("ONE_SSD", spA);
+ assertNotNull(spB);
+ assertEquals("ALL_SSD", spB);
+ } finally {
+ fs.delete(cf1Dir, true);
+ fs.delete(cf2Dir, true);
+ util.shutdownMiniDFSCluster();
+ }
+ }
+
+ private String getStoragePolicyName(FileSystem fs, Path path) {
+ try {
+ Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path);
+ return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
+ } catch (Exception e) {
+ // Maybe fail because of using old HDFS version, try the old way
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Failed to get policy directly", e);
+ }
+ String policy = getStoragePolicyNameForOldHDFSVersion(fs, path);
+ return policy == null ? "HOT" : policy;// HOT by default
+ }
+ }
+
+ private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) {
+ try {
+ if (fs instanceof DistributedFileSystem) {
+ DistributedFileSystem dfs = (DistributedFileSystem) fs;
+ HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
+ if (null != status) {
+ byte storagePolicyId = status.getStoragePolicy();
+ Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
+ if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) {
+ BlockStoragePolicy[] policies = dfs.getStoragePolicies();
+ for (BlockStoragePolicy policy : policies) {
+ if (policy.getId() == storagePolicyId) {
+ return policy.getName();
+ }
+ }
+ }
+ }
+ }
+ } catch (Throwable e) {
+ LOG.warn("failed to get block storage policy of [" + path + "]", e);
+ }
+
+ return null;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
new file mode 100644
index 0000000..c0debb4
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHRegionPartitioner.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with the License. You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mapreduce;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.testclassification.MapReduceTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import static org.junit.Assert.assertEquals;
+
+@Category({MapReduceTests.class, MediumTests.class})
+public class TestHRegionPartitioner {
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test HRegionPartitioner
+ */
+ @Test (timeout=300000)
+ public void testHRegionPartitioner() throws Exception {
+
+ byte[][] families = { Bytes.toBytes("familyA"), Bytes.toBytes("familyB") };
+
+ UTIL.createTable(TableName.valueOf(name.getMethodName()), families, 1,
+ Bytes.toBytes("aa"), Bytes.toBytes("cc"), 3);
+
+ HRegionPartitioner<Long, Long> partitioner = new HRegionPartitioner<>();
+ Configuration configuration = UTIL.getConfiguration();
+ configuration.set(TableOutputFormat.OUTPUT_TABLE, name.getMethodName());
+ partitioner.setConf(configuration);
+ ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes("bb"));
+
+ assertEquals(1, partitioner.getPartition(writable, 10L, 3));
+ assertEquals(0, partitioner.getPartition(writable, 10L, 1));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
new file mode 100644
index 0000000..87e7852
--- /dev/null
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHashTable.java
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapFile;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
+import org.junit.rules.TestName;
+
+/**
+ * Basic test for the HashTable M/R tool
+ */
+@Category(LargeTests.class)
+public class TestHashTable {
+
+ private static final Log LOG = LogFactory.getLog(TestHashTable.class);
+
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster(3);
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testHashTable() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final byte[] family = Bytes.toBytes("family");
+ final byte[] column1 = Bytes.toBytes("c1");
+ final byte[] column2 = Bytes.toBytes("c2");
+ final byte[] column3 = Bytes.toBytes("c3");
+
+ int numRows = 100;
+ int numRegions = 10;
+ int numHashFiles = 3;
+
+ byte[][] splitRows = new byte[numRegions-1][];
+ for (int i = 1; i < numRegions; i++) {
+ splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
+ }
+
+ long timestamp = 1430764183454L;
+ // put rows into the first table
+ Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
+ for (int i = 0; i < numRows; i++) {
+ Put p = new Put(Bytes.toBytes(i), timestamp);
+ p.addColumn(family, column1, column1);
+ p.addColumn(family, column2, column2);
+ p.addColumn(family, column3, column3);
+ t1.put(p);
+ }
+ t1.close();
+
+ HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
+
+ Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
+
+ long batchSize = 300;
+ int code = hashTable.run(new String[] {
+ "--batchsize=" + batchSize,
+ "--numhashfiles=" + numHashFiles,
+ "--scanbatch=2",
+ tableName.getNameAsString(),
+ testDir.toString()});
+ assertEquals("test job failed", 0, code);
+
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+
+ HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
+ assertEquals(tableName.getNameAsString(), tableHash.tableName);
+ assertEquals(batchSize, tableHash.batchSize);
+ assertEquals(numHashFiles, tableHash.numHashFiles);
+ assertEquals(numHashFiles - 1, tableHash.partitions.size());
+ for (ImmutableBytesWritable bytes : tableHash.partitions) {
+ LOG.debug("partition: " + Bytes.toInt(bytes.get()));
+ }
+
+ ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes
+ = ImmutableMap.<Integer, ImmutableBytesWritable>builder()
+ .put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
+ .put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
+ .put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
+ .put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881")))
+ .put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352")))
+ .put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
+ .put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
+ .put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
+ .put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
+ .put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb")))
+ .put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc")))
+ .put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
+ .put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b")))
+ .put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59")))
+ .put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
+ .put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
+ .put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095")))
+ .put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91")))
+ .put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38")))
+ .put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
+ .build();
+
+ Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
+ Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
+ for (int i = 0; i < numHashFiles; i++) {
+ Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
+
+ MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
+ ImmutableBytesWritable key = new ImmutableBytesWritable();
+ ImmutableBytesWritable hash = new ImmutableBytesWritable();
+ while(reader.next(key, hash)) {
+ String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
+ LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16))
+ + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
+
+ int intKey = -1;
+ if (key.getLength() > 0) {
+ intKey = Bytes.toInt(key.get(), key.getOffset(), key.getLength());
+ }
+ if (actualHashes.containsKey(intKey)) {
+ Assert.fail("duplicate key in data files: " + intKey);
+ }
+ actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
+ }
+ reader.close();
+ }
+
+ FileStatus[] files = fs.listStatus(testDir);
+ for (FileStatus file : files) {
+ LOG.debug("Output file: " + file.getPath());
+ }
+
+ files = fs.listStatus(dataDir);
+ for (FileStatus file : files) {
+ LOG.debug("Data file: " + file.getPath());
+ }
+
+ if (!expectedHashes.equals(actualHashes)) {
+ LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
+ }
+ Assert.assertEquals(expectedHashes, actualHashes);
+
+ TEST_UTIL.deleteTable(tableName);
+ TEST_UTIL.cleanupDataTestDirOnTestFS();
+ }
+
+
+}