You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/06/23 15:23:03 UTC
[hbase] branch master updated: HBASE-21773 - Addendum - Bring back "public static Job createSubmitta… (#1953)
This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new b556343 HBASE-21773 - Addendum - Bring back "public static Job createSubmitta… (#1953)
b556343 is described below
commit b5563432922268c7a16deacbb51bfba89c0a2aba
Author: Wellington Ramos Chevreuil <wc...@apache.org>
AuthorDate: Tue Jun 23 15:13:53 2020 +0100
HBASE-21773 - Addendum - Bring back "public static Job createSubmitta… (#1953)
Signed-off-by: Nick Dimiduk <nd...@apache.org>
Signed-off-by: Viraj Jasani <vj...@apache.org>
(cherry picked from commit edf75f2535b6f6853a997525efe3f6f4ef4984b1)
---
.../apache/hadoop/hbase/mapreduce/RowCounter.java | 97 ++++++++
.../hadoop/hbase/mapreduce/TestRowCounter.java | 272 +++++++++++++++++++--
2 files changed, 343 insertions(+), 26 deletions(-)
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
index 0c87751..9c3ab48 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -137,6 +137,103 @@ public class RowCounter extends AbstractHBaseTool {
return job;
}
+ /**
+ * Sets up the actual job.
+ *
+ * @param conf The current configuration.
+ * @param args The command line parameters.
+ * @return The newly created job.
+ * @throws IOException When setting up the job fails.
+ * @deprecated as of release 2.3.0. Will be removed on 4.0.0. Please use main method instead.
+ */
+ @Deprecated
+ public static Job createSubmittableJob(Configuration conf, String[] args)
+ throws IOException {
+ String tableName = args[0];
+ List<MultiRowRangeFilter.RowRange> rowRangeList = null;
+ long startTime = 0;
+ long endTime = 0;
+
+ StringBuilder sb = new StringBuilder();
+
+ final String rangeSwitch = "--range=";
+ final String startTimeArgKey = "--starttime=";
+ final String endTimeArgKey = "--endtime=";
+ final String expectedCountArg = "--expected-count=";
+
+ // First argument is table name, starting from second
+ for (int i = 1; i < args.length; i++) {
+ if (args[i].startsWith(rangeSwitch)) {
+ try {
+ rowRangeList = parseRowRangeParameter(
+ args[i].substring(args[1].indexOf(rangeSwitch)+rangeSwitch.length()));
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ continue;
+ }
+ if (args[i].startsWith(startTimeArgKey)) {
+ startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
+ continue;
+ }
+ if (args[i].startsWith(endTimeArgKey)) {
+ endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
+ continue;
+ }
+ if (args[i].startsWith(expectedCountArg)) {
+ conf.setLong(EXPECTED_COUNT_KEY,
+ Long.parseLong(args[i].substring(expectedCountArg.length())));
+ continue;
+ }
+ // if no switch, assume column names
+ sb.append(args[i]);
+ sb.append(" ");
+ }
+ if (endTime < startTime) {
+ printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
+ return null;
+ }
+
+ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
+ job.setJarByClass(RowCounter.class);
+ Scan scan = new Scan();
+ scan.setCacheBlocks(false);
+ setScanFilter(scan, rowRangeList);
+ if (sb.length() > 0) {
+ for (String columnName : sb.toString().trim().split(" ")) {
+ String family = StringUtils.substringBefore(columnName, ":");
+ String qualifier = StringUtils.substringAfter(columnName, ":");
+
+ if (StringUtils.isBlank(qualifier)) {
+ scan.addFamily(Bytes.toBytes(family));
+ }
+ else {
+ scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
+ }
+ }
+ }
+ scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
+ job.setOutputFormatClass(NullOutputFormat.class);
+ TableMapReduceUtil.initTableMapperJob(tableName, scan,
+ RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
+ job.setNumReduceTasks(0);
+ return job;
+ }
+
+ /**
+ * Prints usage without error message.
+ * Note that we don't document --expected-count, because it's intended for test.
+ */
+ private static void printUsage(String errorMessage) {
+ System.err.println("ERROR: " + errorMessage);
+ System.err.println("Usage: hbase rowcounter [options] <tablename> "
+ + "[--starttime=<start> --endtime=<end>] "
+ + "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
+ System.err.println("For performance consider the following options:\n"
+ + "-Dhbase.client.scanner.caching=100\n"
+ + "-Dmapreduce.map.speculative=false");
+ }
+
private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(String arg) {
final List<String> rangesSplit = Splitter.on(";").splitToList(arg);
final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
index 12d121c..add1b58 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -36,6 +36,8 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.LauncherSecurityManager;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
@@ -52,7 +54,7 @@ public class TestRowCounter {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestRowCounter.class);
+ HBaseClassTestRule.forClass(TestRowCounter.class);
private static final Logger LOG = LoggerFactory.getLogger(TestRowCounter.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@@ -92,7 +94,7 @@ public class TestRowCounter {
@Test
public void testRowCounterNoColumn() throws Exception {
String[] args = new String[] {
- TABLE_NAME
+ TABLE_NAME
};
runRowCount(args, 10);
}
@@ -106,7 +108,7 @@ public class TestRowCounter {
@Test
public void testRowCounterExclusiveColumn() throws Exception {
String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COL1
+ TABLE_NAME, COL_FAM + ":" + COL1
};
runRowCount(args, 8);
}
@@ -120,7 +122,7 @@ public class TestRowCounter {
@Test
public void testRowCounterColumnWithColonInQualifier() throws Exception {
String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
+ TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
};
runRowCount(args, 8);
}
@@ -134,7 +136,7 @@ public class TestRowCounter {
@Test
public void testRowCounterHiddenColumn() throws Exception {
String[] args = new String[] {
- TABLE_NAME, COL_FAM + ":" + COL2
+ TABLE_NAME, COL_FAM + ":" + COL2
};
runRowCount(args, 10);
}
@@ -149,7 +151,7 @@ public class TestRowCounter {
@Test
public void testRowCounterColumnAndRowRange() throws Exception {
String[] args = new String[] {
- TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
+ TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
};
runRowCount(args, 8);
}
@@ -161,7 +163,7 @@ public class TestRowCounter {
@Test
public void testRowCounterRowSingleRange() throws Exception {
String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3"
+ TABLE_NAME, "--range=\\x00row1,\\x00row3"
};
runRowCount(args, 2);
}
@@ -197,7 +199,7 @@ public class TestRowCounter {
@Test
public void testRowCounterRowMultiRange() throws Exception {
String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
};
runRowCount(args, 5);
}
@@ -210,7 +212,7 @@ public class TestRowCounter {
@Test
public void testRowCounterRowMultiEmptyRange() throws Exception {
String[] args = new String[] {
- TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
};
runRowCount(args, 2);
}
@@ -260,30 +262,30 @@ public class TestRowCounter {
table.close();
String[] args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + 0,
- "--endtime=" + ts
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + ts
};
runRowCount(args, 1);
args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + 0,
- "--endtime=" + (ts - 10)
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + (ts - 10)
};
runRowCount(args, 1);
args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + ts,
- "--endtime=" + (ts + 1000)
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + ts,
+ "--endtime=" + (ts + 1000)
};
runRowCount(args, 2);
args = new String[] {
- TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
- "--starttime=" + (ts - 30 * 1000),
- "--endtime=" + (ts + 30 * 1000),
+ TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
+ "--starttime=" + (ts - 30 * 1000),
+ "--endtime=" + (ts + 30 * 1000),
};
runRowCount(args, 3);
}
@@ -308,6 +310,224 @@ public class TestRowCounter {
}
/**
+ * Run the RowCounter map reduce job and verify the row count.
+ *
+ * @param args the command line arguments to be used for rowcounter job.
+ * @param expectedCount the expected row count (result of map reduce job).
+ * @throws Exception in case of any unexpected error.
+ */
+ private void runCreateSubmittableJobWithArgs(String[] args, int expectedCount) throws Exception {
+ Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
+ long start = System.currentTimeMillis();
+ job.waitForCompletion(true);
+ long duration = System.currentTimeMillis() - start;
+ LOG.debug("row count duration (ms): " + duration);
+ assertTrue(job.isSuccessful());
+ Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
+ assertEquals(expectedCount, counter.getValue());
+ }
+
+ @Test
+ public void testCreateSubmittableJobWithArgsNoColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME
+ };
+ runCreateSubmittableJobWithArgs(args, 10);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * exclusive for few rows.
+ *
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsExclusiveColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COL1
+ };
+ runCreateSubmittableJobWithArgs(args, 8);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * one for which the qualifier contains colons.
+ *
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsColumnWithColonInQualifier() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN
+ };
+ runCreateSubmittableJobWithArgs(args, 8);
+ }
+
+ /**
+ * Test a case when the column specified in command line arguments is not part
+ * of first KV for a row.
+ *
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsHiddenColumn() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, COL_FAM + ":" + COL2
+ };
+ runCreateSubmittableJobWithArgs(args, 10);
+ }
+
+
+ /**
+ * Test a case when the column specified in command line arguments is
+ * exclusive for few rows and also a row range filter is specified
+ *
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsColumnAndRowRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
+ };
+ runCreateSubmittableJobWithArgs(args, 8);
+ }
+
+ /**
+ * Test a case when a range is specified with single range of start-end keys
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsRowSingleRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3"
+ };
+ runCreateSubmittableJobWithArgs(args, 2);
+ }
+
+ /**
+ * Test a case when a range is specified with single range with end key only
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsRowSingleRangeUpperBound() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=,\\x00row3"
+ };
+ runCreateSubmittableJobWithArgs(args, 3);
+ }
+
+ /**
+ * Test a case when a range is specified with two ranges where one range is with end key only
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsRowMultiRangeUpperBound() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7"
+ };
+ runCreateSubmittableJobWithArgs(args, 5);
+ }
+
+ /**
+ * Test a case when a range is specified with multiple ranges of start-end keys
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsRowMultiRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8"
+ };
+ runCreateSubmittableJobWithArgs(args, 5);
+ }
+
+ /**
+ * Test a case when a range is specified with multiple ranges of start-end keys;
+ * one range is filled, another two are not
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsRowMultiEmptyRange() throws Exception {
+ String[] args = new String[] {
+ TABLE_NAME, "--range=\\x00row1,\\x00row3;;"
+ };
+ runCreateSubmittableJobWithArgs(args, 2);
+ }
+
+ @Test
+ public void testCreateSubmittableJobWithArgs10kRowRange() throws Exception {
+ String tableName = TABLE_NAME + "CreateSubmittableJobWithArgs10kRowRange";
+
+ try (Table table = TEST_UTIL.createTable(
+ TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
+ writeRows(table, 10000, 0);
+ }
+ String[] args = new String[] {
+ tableName, "--range=\\x00row9872,\\x00row9875"
+ };
+ runCreateSubmittableJobWithArgs(args, 3);
+ }
+
+ /**
+ * Test a case when the timerange is specified with --starttime and --endtime options
+ *
+ * @throws Exception in case of any unexpected error.
+ */
+ @Test
+ public void testCreateSubmittableJobWithArgsTimeRange() throws Exception {
+ final byte[] family = Bytes.toBytes(COL_FAM);
+ final byte[] col1 = Bytes.toBytes(COL1);
+ Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
+ Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
+ Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
+
+ long ts;
+
+ String tableName = TABLE_NAME_TS_RANGE+"CreateSubmittableJobWithArgs";
+ // clean up content of TABLE_NAME
+ Table table = TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(COL_FAM));
+
+ ts = System.currentTimeMillis();
+ put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
+ table.put(put1);
+ Thread.sleep(100);
+
+ ts = System.currentTimeMillis();
+ put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
+ put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
+ table.put(put2);
+ table.put(put3);
+ table.close();
+
+ String[] args = new String[] {
+ tableName, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + ts
+ };
+ runCreateSubmittableJobWithArgs(args, 1);
+
+ args = new String[] {
+ tableName, COL_FAM + ":" + COL1,
+ "--starttime=" + 0,
+ "--endtime=" + (ts - 10)
+ };
+ runCreateSubmittableJobWithArgs(args, 1);
+
+ args = new String[] {
+ tableName, COL_FAM + ":" + COL1,
+ "--starttime=" + ts,
+ "--endtime=" + (ts + 1000)
+ };
+ runCreateSubmittableJobWithArgs(args, 2);
+
+ args = new String[] {
+ tableName, COL_FAM + ":" + COL1,
+ "--starttime=" + (ts - 30 * 1000),
+ "--endtime=" + (ts + 30 * 1000),
+ };
+ runCreateSubmittableJobWithArgs(args, 3);
+ }
+
+ /**
* Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have
* two columns, Few have one.
*
@@ -405,17 +625,17 @@ public class TestRowCounter {
private void assertUsageContent(String usage) {
assertTrue(usage.contains("usage: hbase rowcounter "
- + "<tablename> [options] [<column1> <column2>...]"));
+ + "<tablename> [options] [<column1> <column2>...]"));
assertTrue(usage.contains("Options:\n"));
assertTrue(usage.contains("--starttime=<arg> "
- + "starting time filter to start counting rows from.\n"));
+ + "starting time filter to start counting rows from.\n"));
assertTrue(usage.contains("--endtime=<arg> "
- + "end time filter limit, to only count rows up to this timestamp.\n"));
+ + "end time filter limit, to only count rows up to this timestamp.\n"));
assertTrue(usage.contains("--range=<arg> "
- + "[startKey],[endKey][;[startKey],[endKey]...]]\n"));
+ + "[startKey],[endKey][;[startKey],[endKey]...]]\n"));
assertTrue(usage.contains("--expectedCount=<arg> expected number of rows to be count.\n"));
assertTrue(usage.contains("For performance, "
- + "consider the following configuration properties:\n"));
+ + "consider the following configuration properties:\n"));
assertTrue(usage.contains("-Dhbase.client.scanner.caching=100\n"));
assertTrue(usage.contains("-Dmapreduce.map.speculative=false\n"));
}