You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2020/10/09 16:57:28 UTC

[hbase] branch branch-2.2 updated: HBASE-25109 Add MR Counters to WALPlayer; currently hard to tell if it is doing anything (#2468)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.2 by this push:
     new b4dd754  HBASE-25109 Add MR Counters to WALPlayer; currently hard to tell if it is doing anything (#2468)
b4dd754 is described below

commit b4dd75429ebebfd67a895fd75de4d6b6349f5cc2
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Wed Sep 30 09:34:15 2020 -0700

    HBASE-25109 Add MR Counters to WALPlayer; currently hard to tell if it is doing anything (#2468)
    
    Add MR counters so operator can see if WALPlayer run actually did
    anything. Fix bugs in usage (it enforced two args though usage
    describes allowing one arg only). Clean up usage output. In
    particular add note on wal file separator as hbase by default uses
    the ',' in its WAL file names which could befuddle operator
    trying to do simple import.
    
    Signed-off-by: Huaxiang Sun <hu...@apache.com>
---
 .../org/apache/hadoop/hbase/mapreduce/Driver.java  |  3 +-
 .../hadoop/hbase/mapreduce/WALInputFormat.java     |  5 +-
 .../apache/hadoop/hbase/mapreduce/WALPlayer.java   | 72 ++++++++++++++--------
 .../hbase/mapreduce/TestCellBasedWALPlayer2.java   |  4 +-
 .../hadoop/hbase/mapreduce/TestWALPlayer.java      |  4 +-
 src/main/asciidoc/_chapters/ops_mgt.adoc           | 44 +++++++------
 6 files changed, 77 insertions(+), 55 deletions(-)

diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
index d52a310..ed31c84 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java
@@ -1,4 +1,4 @@
-/**
+/*
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -34,6 +34,7 @@ import org.apache.yetus.audience.InterfaceStability;
 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
 @InterfaceStability.Stable
 public class Driver {
+  private Driver() {}
 
   public static void main(String[] args) throws Throwable {
     ProgramDriver pgd = new ProgramDriver();
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
index 1815412..7c4be83 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java
@@ -154,14 +154,13 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
       WALSplit hsplit = (WALSplit)split;
       logFile = new Path(hsplit.getLogFileName());
       conf = context.getConfiguration();
-      LOG.info("Opening reader for "+split);
+      LOG.info("Opening {} for {}", logFile, split);
       openReader(logFile);
       this.startTime = hsplit.getStartTime();
       this.endTime = hsplit.getEndTime();
     }
 
-    private void openReader(Path path) throws IOException
-    {
+    private void openReader(Path path) throws IOException {
       closeReader();
       reader = AbstractFSWALProvider.openReader(path, conf);
       seek();
diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
index 58e8448..a5cc4c4 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java
@@ -55,6 +55,8 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+
+
 /**
  * A tool to replay WAL files as a M/R job.
  * The WAL can be replayed for a set of tables or all tables,
@@ -179,6 +181,20 @@ public class WALPlayer extends Configured implements Tool {
   }
 
   /**
+   * Enum for map metrics.  Keep it out here rather than inside in the Map
+   * inner-class so we can find associated properties.
+   */
+  protected static enum Counter {
+    /** Number of aggregated writes */
+    PUTS,
+    /** Number of aggregated deletes */
+    DELETES,
+    CELLS_READ,
+    CELLS_WRITTEN,
+    WALEDITS
+  }
+
+  /**
    * A mapper that writes out {@link Mutation} to be directly applied to
    * a running HBase instance.
    */
@@ -189,6 +205,7 @@ public class WALPlayer extends Configured implements Tool {
     @Override
     public void map(WALKey key, WALEdit value, Context context)
     throws IOException {
+      context.getCounter(Counter.WALEDITS).increment(1);
       try {
         if (tables.isEmpty() || tables.containsKey(key.getTableName())) {
           TableName targetTable = tables.isEmpty() ?
@@ -199,6 +216,7 @@ public class WALPlayer extends Configured implements Tool {
           Delete del = null;
           Cell lastCell = null;
           for (Cell cell : value.getCells()) {
+            context.getCounter(Counter.CELLS_READ).increment(1);
             // Filtering WAL meta marker entries.
             if (WALEdit.isMetaEditFamily(cell)) {
               continue;
@@ -214,9 +232,11 @@ public class WALPlayer extends Configured implements Tool {
                 // row or type changed, write out aggregate KVs.
                 if (put != null) {
                   context.write(tableOut, put);
+                  context.getCounter(Counter.PUTS).increment(1);
                 }
                 if (del != null) {
                   context.write(tableOut, del);
+                  context.getCounter(Counter.DELETES).increment(1);
                 }
                 if (CellUtil.isDelete(cell)) {
                   del = new Delete(CellUtil.cloneRow(cell));
@@ -229,14 +249,17 @@ public class WALPlayer extends Configured implements Tool {
               } else {
                 put.add(cell);
               }
+              context.getCounter(Counter.CELLS_WRITTEN).increment(1);
             }
             lastCell = cell;
           }
           // write residual KVs
           if (put != null) {
             context.write(tableOut, put);
+            context.getCounter(Counter.PUTS).increment(1);
           }
           if (del != null) {
+            context.getCounter(Counter.DELETES).increment(1);
             context.write(tableOut, del);
           }
         }
@@ -313,7 +336,7 @@ public class WALPlayer extends Configured implements Tool {
     setupTime(conf, WALInputFormat.START_TIME_KEY);
     setupTime(conf, WALInputFormat.END_TIME_KEY);
     String inputDirs = args[0];
-    String[] tables = args[1].split(",");
+    String[] tables = args.length == 1? new String [] {}: args[1].split(",");
     String[] tableMap;
     if (args.length > 2) {
       tableMap = args[2].split(",");
@@ -321,13 +344,14 @@ public class WALPlayer extends Configured implements Tool {
         throw new IOException("The same number of tables and mapping must be provided.");
       }
     } else {
-      // if not mapping is specified map each table to itself
+      // if no mapping is specified, map each table to itself
       tableMap = tables;
     }
     conf.setStrings(TABLES_KEY, tables);
     conf.setStrings(TABLE_MAP_KEY, tableMap);
     conf.set(FileInputFormat.INPUT_DIR, inputDirs);
-    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
+    Job job =
+        Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
     job.setJarByClass(WALPlayer.class);
 
     job.setInputFormatClass(WALInputFormat.class);
@@ -381,27 +405,27 @@ public class WALPlayer extends Configured implements Tool {
     if (errorMsg != null && errorMsg.length() > 0) {
       System.err.println("ERROR: " + errorMsg);
     }
-    System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
-    System.err.println("Replay all WAL files into HBase.");
-    System.err.println("<tables> is a comma separated list of tables.");
-    System.err.println("If no tables (\"\") are specified, all tables are imported.");
-    System.err.println("(Be careful, hbase:meta entries will be imported in this case.)\n");
-    System.err.println("WAL entries can be mapped to new set of tables via <tableMappings>.");
-    System.err.println("<tableMappings> is a comma separated list of target tables.");
-    System.err.println("If specified, each table in <tables> must have a mapping.\n");
-    System.err.println("By default " + NAME + " will load data directly into HBase.");
-    System.err.println("To generate HFiles for a bulk data load instead, pass the following option:");
-    System.err.println("  -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
-    System.err.println("  (Only one table can be specified, and no mapping is allowed!)");
-    System.err.println("Time range options:");
-    System.err.println("  -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
-    System.err.println("  -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
-    System.err.println("  (The start and the end date of timerange. The dates can be expressed");
-    System.err.println("  in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.");
-    System.err.println("  E.g. 1234567890120 or 2009-02-13T23:32:30.12)");
+    System.err.println("Usage: " + NAME + " [options] <WAL inputdir> [<tables> <tableMappings>]");
+    System.err.println(" <WAL inputdir>   directory of WALs to replay.");
+    System.err.println(" <tables>         comma separated list of tables. If no tables specified,");
+    System.err.println("                  all are imported (even hbase:meta if present).");
+    System.err.println(" <tableMappings>  WAL entries can be mapped to a new set of tables by passing");
+    System.err.println("                  <tableMappings>, a comma separated list of target tables.");
+    System.err.println("                  If specified, each table in <tables> must have a mapping.");
+    System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:");
+    System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
+    System.err.println(" Only one table can be specified, and no mapping allowed!");
+    System.err.println("To specify a time range, pass:");
+    System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
+    System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
+    System.err.println(" The start and the end date of timerange. The dates can be expressed");
+    System.err.println(" in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.");
+    System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12");
     System.err.println("Other options:");
-    System.err.println("  -D" + JOB_NAME_CONF_KEY + "=jobName");
-    System.err.println("  Use the specified mapreduce job name for the wal player");
+    System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName");
+    System.err.println(" Use the specified mapreduce job name for the wal player");
+    System.err.println(" -Dwal.input.separator=' '");
+    System.err.println(" Change WAL filename separator (WAL dir names use default ','.)");
     System.err.println("For performance also consider the following options:\n"
         + "  -Dmapreduce.map.speculative=false\n"
         + "  -Dmapreduce.reduce.speculative=false");
@@ -420,7 +444,7 @@ public class WALPlayer extends Configured implements Tool {
 
   @Override
   public int run(String[] args) throws Exception {
-    if (args.length < 2) {
+    if (args.length < 1) {
       usage("Wrong number of arguments: " + args.length);
       System.exit(-1);
     }
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellBasedWALPlayer2.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellBasedWALPlayer2.java
index 9a63ae9..b3e74ee 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellBasedWALPlayer2.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellBasedWALPlayer2.java
@@ -223,8 +223,8 @@ public class TestCellBasedWALPlayer2 {
       } catch (SecurityException e) {
         assertEquals(-1, newSecurityManager.getExitCode());
         assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
-        assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
-            " <tables> [<tableMappings>]"));
+        assertTrue(data.toString().contains("Usage: WALPlayer [options] <WAL inputdir>" +
+            " [<tables> <tableMappings>]"));
         assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
       }
 
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
index 7ddcb44..6439a14 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java
@@ -223,8 +223,8 @@ public class TestWALPlayer {
       } catch (SecurityException e) {
         assertEquals(-1, newSecurityManager.getExitCode());
         assertTrue(data.toString().contains("ERROR: Wrong number of arguments:"));
-        assertTrue(data.toString().contains("Usage: WALPlayer [options] <wal inputdir>" +
-            " <tables> [<tableMappings>]"));
+        assertTrue(data.toString().contains("Usage: WALPlayer [options] <WAL inputdir>" +
+            " [<tables> <tableMappings>]"));
         assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output"));
       }
 
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 0152303..8e437c7 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -862,7 +862,7 @@ WALPlayer can also generate HFiles for later bulk importing, in that case only a
 Invoke via:
 
 ----
-$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] <wal inputdir> <tables> [<tableMappings>]>
+$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] <WAL inputdir> [<tables> <tableMappings>]>
 ----
 
 For example:
@@ -880,29 +880,27 @@ To NOT run WALPlayer as a mapreduce job on your cluster, force it to run all in
 Running `WALPlayer` with no arguments prints brief usage information:
 
 ----
-Usage: WALPlayer [options] <wal inputdir> <tables> [<tableMappings>]
-Replay all WAL files into HBase.
-<tables> is a comma separated list of tables.
-If no tables ("") are specified, all tables are imported.
-(Be careful, hbase:meta entries will be imported in this case.)
-
-WAL entries can be mapped to new set of tables via <tableMappings>.
-<tableMappings> is a comma separated list of target tables.
-If specified, each table in <tables> must have a mapping.
-
-By default WALPlayer will load data directly into HBase.
-To generate HFiles for a bulk data load instead, pass the following option:
-  -Dwal.bulk.output=/path/for/output
-  (Only one table can be specified, and no mapping is allowed!)
-Time range options:
-  -Dwal.start.time=[date|ms]
-  -Dwal.end.time=[date|ms]
-  (The start and the end date of timerange. The dates can be expressed
-  in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.
-  E.g. 1234567890120 or 2009-02-13T23:32:30.12)
+Usage: WALPlayer [options] <WAL inputdir> [<tables> <tableMappings>]
+ <WAL inputdir>   directory of WALs to replay.
+ <tables>         comma separated list of tables. If no tables specified,
+                  all are imported (even hbase:meta if present).
+ <tableMappings>  WAL entries can be mapped to a new set of tables by passing
+                  <tableMappings>, a comma separated list of target tables.
+                  If specified, each table in <tables> must have a mapping.
+To generate HFiles to bulk load instead of loading HBase directly, pass:
+ -Dwal.bulk.output=/path/for/output
+ Only one table can be specified, and no mapping allowed!
+To specify a time range, pass:
+ -Dwal.start.time=[date|ms]
+ -Dwal.end.time=[date|ms]
+ The start and the end date of timerange. The dates can be expressed
+ in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.
+ E.g. 1234567890120 or 2009-02-13T23:32:30.12
 Other options:
-  -Dmapreduce.job.name=jobName
-  Use the specified mapreduce job name for the wal player
+ -Dmapreduce.job.name=jobName
+ Use the specified mapreduce job name for the wal player
+ -Dwal.input.separator=' '
+ Change WAL filename separator (WAL dir names use default ','.)
 For performance also consider the following options:
   -Dmapreduce.map.speculative=false
   -Dmapreduce.reduce.speculative=false