You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ps...@apache.org on 2018/04/12 08:01:03 UTC
hbase git commit: HBASE-20376 RowCounter and CellCounter
documentations are incorrect
Repository: hbase
Updated Branches:
refs/heads/master 5a69465ea -> c4ebf666b
HBASE-20376 RowCounter and CellCounter documentations are incorrect
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/c4ebf666
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/c4ebf666
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/c4ebf666
Branch: refs/heads/master
Commit: c4ebf666b78f92a6d02652eece8dd95360bd0482
Parents: 5a69465
Author: Peter Somogyi <ps...@apache.org>
Authored: Tue Apr 10 15:16:03 2018 +0200
Committer: Peter Somogyi <ps...@apache.org>
Committed: Thu Apr 12 10:00:38 2018 +0200
----------------------------------------------------------------------
bin/hbase | 6 +++
.../hadoop/hbase/mapreduce/CellCounter.java | 47 +++++++++++---------
.../hadoop/hbase/mapreduce/RowCounter.java | 6 +--
.../hadoop/hbase/mapreduce/TestRowCounter.java | 22 +++++----
src/main/asciidoc/_chapters/ops_mgt.adoc | 31 ++++++++-----
5 files changed, 64 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/bin/hbase
----------------------------------------------------------------------
diff --git a/bin/hbase b/bin/hbase
index 8e37f5f..f1e2306 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -106,6 +106,8 @@ if [ $# = 0 ]; then
echo " backup Backup tables for recovery"
echo " restore Restore tables from existing backup image"
echo " regionsplitter Run RegionSplitter tool"
+ echo " rowcounter Run RowCounter tool"
+ echo " cellcounter Run CellCounter tool"
echo " CLASSNAME Run the class named CLASSNAME"
exit 1
fi
@@ -465,6 +467,10 @@ elif [ "$COMMAND" = "version" ] ; then
CLASS='org.apache.hadoop.hbase.util.VersionInfo'
elif [ "$COMMAND" = "regionsplitter" ] ; then
CLASS='org.apache.hadoop.hbase.util.RegionSplitter'
+elif [ "$COMMAND" = "rowcounter" ] ; then
+ CLASS='org.apache.hadoop.hbase.mapreduce.RowCounter'
+elif [ "$COMMAND" = "cellcounter" ] ; then
+ CLASS='org.apache.hadoop.hbase.mapreduce.CellCounter'
else
CLASS=$COMMAND
fi
http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
index aa79aac..ff0f01c 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
@@ -292,33 +292,38 @@ public class CellCounter extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
if (args.length < 2) {
- System.err.println("ERROR: Wrong number of parameters: " + args.length);
- System.err.println("Usage: CellCounter ");
- System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
- "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
- System.err.println(" Note: -D properties will be applied to the conf used. ");
- System.err.println(" Additionally, all of the SCAN properties from TableInputFormat");
- System.err.println(" can be specified to get fine grained control on what is counted..");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
- System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
- System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
- System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<family1>,<family2>, ...");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
- System.err.println(" -D " + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
- System.err.println(" -D " + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
- System.err.println(" -D " + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
- System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
- "string : used to separate the rowId/column family name and qualifier name.");
- System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
- "operation to a limited subset of rows from the table based on regex or prefix pattern.");
+ printUsage(args.length);
return -1;
}
Job job = createSubmittableJob(getConf(), args);
return (job.waitForCompletion(true) ? 0 : 1);
}
+ private void printUsage(int parameterCount) {
+ System.err.println("ERROR: Wrong number of parameters: " + parameterCount);
+ System.err.println("Usage: hbase cellcounter <tablename> <outputDir> [reportSeparator] "
+ + "[^[regex pattern] or [Prefix]] [--starttime=<starttime> --endtime=<endtime>]");
+ System.err.println(" Note: -D properties will be applied to the conf used.");
+ System.err.println(" Additionally, all of the SCAN properties from TableInputFormat can be "
+ + "specified to get fine grained control on what is counted.");
+ System.err.println(" -D" + TableInputFormat.SCAN_ROW_START + "=<rowkey>");
+ System.err.println(" -D" + TableInputFormat.SCAN_ROW_STOP + "=<rowkey>");
+ System.err.println(" -D" + TableInputFormat.SCAN_COLUMNS + "=\"<col1> <col2>...\"");
+ System.err.println(" -D" + TableInputFormat.SCAN_COLUMN_FAMILY
+ + "=<family1>,<family2>, ...");
+ System.err.println(" -D" + TableInputFormat.SCAN_TIMESTAMP + "=<timestamp>");
+ System.err.println(" -D" + TableInputFormat.SCAN_TIMERANGE_START + "=<timestamp>");
+ System.err.println(" -D" + TableInputFormat.SCAN_TIMERANGE_END + "=<timestamp>");
+ System.err.println(" -D" + TableInputFormat.SCAN_MAXVERSIONS + "=<count>");
+ System.err.println(" -D" + TableInputFormat.SCAN_CACHEDROWS + "=<count>");
+ System.err.println(" -D" + TableInputFormat.SCAN_BATCHSIZE + "=<count>");
+ System.err.println(" <reportSeparator> parameter can be used to override the default report "
+ + "separator string : used to separate the rowId/column family name and qualifier name.");
+ System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell "
+ + "counter count operation to a limited subset of rows from the table based on regex or "
+ + "prefix pattern.");
+ }
+
/**
* Main entry point.
* @param args The command line parameters.
http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
index 9c7b489..7fa5dec 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -221,9 +221,9 @@ public class RowCounter extends Configured implements Tool {
* Note that we don't document --expected-count, because it's intended for test.
*/
private static void printUsage() {
- System.err.println("Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
+ System.err.println("Usage: hbase rowcounter [options] <tablename> "
+ + "[--starttime=<start> --endtime=<end>] "
+ + "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
System.err.println("For performance consider the following options:\n"
+ "-Dhbase.client.scanner.caching=100\n"
+ "-Dmapreduce.map.speculative=false");
http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
index b07de7f..18c1874 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -363,13 +363,7 @@ public class TestRowCounter {
} catch (SecurityException e) {
assertEquals(-1, newSecurityManager.getExitCode());
assertTrue(data.toString().contains("Wrong number of parameters:"));
- assertTrue(data.toString().contains(
- "Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
- "[<column1> <column2>...]"));
- assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100"));
- assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false"));
+ assertUsageContent(data.toString());
}
data.reset();
try {
@@ -383,18 +377,22 @@ public class TestRowCounter {
assertTrue(data.toString().contains(
"Please specify range in such format as \"--range=a,b\" or, with only one boundary," +
" \"--range=,b\" or \"--range=a,\""));
- assertTrue(data.toString().contains(
- "Usage: RowCounter [options] <tablename> " +
- "[--starttime=[start] --endtime=[end] " +
- "[--range=[startKey],[endKey][;[startKey],[endKey]...]] " +
- "[<column1> <column2>...]"));
+ assertUsageContent(data.toString());
}
} finally {
System.setErr(oldPrintStream);
System.setSecurityManager(SECURITY_MANAGER);
}
+ }
+ private void assertUsageContent(String usage) {
+ assertTrue(usage.contains("Usage: hbase rowcounter [options] <tablename> "
+ + "[--starttime=<start> --endtime=<end>] "
+ + "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]"));
+ assertTrue(usage.contains("For performance consider the following options:"));
+ assertTrue(usage.contains("-Dhbase.client.scanner.caching=100"));
+ assertTrue(usage.contains("-Dmapreduce.map.speculative=false"));
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/c4ebf666/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 82badb4..38a7dff 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -68,8 +68,12 @@ Some commands take arguments. Pass no args or -h for usage.
pe Run PerformanceEvaluation
ltt Run LoadTestTool
canary Run the Canary tool
- regionsplitter Run the RegionSplitter tool
version Print the version
+ backup Backup tables for recovery
+ restore Restore tables from existing backup image
+ regionsplitter Run RegionSplitter tool
+ rowcounter Run RowCounter tool
+ cellcounter Run CellCounter tool
CLASSNAME Run the class named CLASSNAME
----
@@ -744,24 +748,28 @@ For performance also consider the following options:
----
[[rowcounter]]
-=== RowCounter and CellCounter
+=== RowCounter
-link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter] is a mapreduce job to count all the rows of a table.
+link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter] is a mapreduce job to count all the rows of a table.
This is a good utility to use as a sanity check to ensure that HBase can read all the blocks of a table if there are any concerns of metadata inconsistency.
-It will run the mapreduce all in a single process but it will run faster if you have a MapReduce cluster in place for it to exploit. It is also possible to limit
-the time range of data to be scanned by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags.
+It will run the mapreduce all in a single process but it will run faster if you have a MapReduce cluster in place for it to exploit.
+It is possible to limit the time range of data to be scanned by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags.
+The scanned data can be limited based on keys using the `--range=[startKey],[endKey][;[startKey],[endKey]...]` option.
----
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter <tablename> [<column1> <column2>...]
+$ bin/hbase rowcounter [options] <tablename> [--starttime=<start> --endtime=<end>] [--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]
----
RowCounter only counts one version per cell.
-Note: caching for the input Scan is configured via `hbase.client.scanner.caching` in the job configuration.
+For performance consider to use `-Dhbase.client.scanner.caching=100` and `-Dmapreduce.map.speculative=false` options.
+
+[[cellcounter]]
+=== CellCounter
HBase ships another diagnostic mapreduce job called link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/CellCounter.html[CellCounter].
Like RowCounter, it gathers more fine-grained statistics about your table.
-The statistics gathered by RowCounter are more fine-grained and include:
+The statistics gathered by CellCounter are more fine-grained and include:
* Total number of rows in the table.
* Total number of CFs across all rows.
@@ -772,12 +780,12 @@ The statistics gathered by RowCounter are more fine-grained and include:
The program allows you to limit the scope of the run.
Provide a row regex or prefix to limit the rows to analyze.
-Specify a time range to scan the table by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags.
+Specify a time range to scan the table by using the `--starttime=<starttime>` and `--endtime=<endtime>` flags.
Use `hbase.mapreduce.scan.column.family` to specify scanning a single column family.
----
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter <tablename> <outputDir> [regex or prefix]
+$ bin/hbase cellcounter <tablename> <outputDir> [reportSeparator] [regex or prefix] [--starttime=<starttime> --endtime=<endtime>]
----
Note: just like RowCounter, caching for the input Scan is configured via `hbase.client.scanner.caching` in the job configuration.
@@ -785,8 +793,7 @@ Note: just like RowCounter, caching for the input Scan is configured via `hbase.
=== mlockall
It is possible to optionally pin your servers in physical memory making them less likely to be swapped out in oversubscribed environments by having the servers call link:http://linux.die.net/man/2/mlockall[mlockall] on startup.
-See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add ability to
- start RS as root and call mlockall] for how to build the optional library and have it run on startup.
+See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add ability to start RS as root and call mlockall] for how to build the optional library and have it run on startup.
[[compaction.tool]]
=== Offline Compaction Tool