You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 19:43:34 UTC

svn commit: r1181947 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce: RowCounter.java TableInputFormatBase.java

Author: nspiegelberg
Date: Tue Oct 11 17:43:34 2011
New Revision: 1181947

URL: http://svn.apache.org/viewvc?rev=1181947&view=rev
Log:
Improve RowCounter to count rows in a specific key range.

Summary:
Currently RowCounter in MR package is a very simple map only job, and this
change should let user specify a key range and count the number of rows in this
range. It's done by specifying as arguments "--range=foo,bar" to the program
loader, which would count between ["foo", "bar") that has any column.

Test Plan:
This change is actually a modification used to test diff D295523. First I
loaded rows using recovery utility with specified key range (feature added in
D295523) and let this counter count the same range specified in recovery, and
check if total num of rows equals to num of rows in range. Then, I tried setting
rowcounter.start.key to the end key of that specified in the previous recovery,
and check if there is indeed no row in range.

Reviewed By: kannan
Reviewers: aaiyer, kannan
CC: madhuvaidya, , itapai, kannan
Differential Revision: 296340
Task ID: 659765

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java?rev=1181947&r1=1181946&r2=1181947&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java Tue Oct 11 17:43:34 2011
@@ -85,21 +85,42 @@ public class RowCounter {
   public static Job createSubmittableJob(Configuration conf, String[] args)
   throws IOException {
     String tableName = args[0];
-    Job job = new Job(conf, NAME + "_" + tableName);
-    job.setJarByClass(RowCounter.class);
-    // Columns are space delimited
+    String startKey = null;
+    String endKey = null;
     StringBuilder sb = new StringBuilder();
-    final int columnoffset = 1;
-    for (int i = columnoffset; i < args.length; i++) {
-      if (i > columnoffset) {
+
+    // First argument is table name, starting from second
+    for (int i = 1; i < args.length; i++) {
+      final String rangeSwitch = "--range=";
+      if (args[i].startsWith(rangeSwitch)) {
+        String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2);
+        if (startEnd.length != 2 || startEnd[1].contains(",")) {
+          printUsage("Please specify range in such format as \"--range=a,b\" " +
+              "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
+          return null;
+        }
+        startKey = startEnd[0];
+        endKey = startEnd[1];
+      }
+      else {
+        // if no switch, assume column names
+        sb.append(args[i]);
         sb.append(" ");
       }
-      sb.append(args[i]);
     }
+
+    Job job = new Job(conf, NAME + "_" + tableName);
+    job.setJarByClass(RowCounter.class);
     Scan scan = new Scan();
+    if (startKey != null && !startKey.equals("")) {
+      scan.setStartRow(Bytes.toBytes(startKey));
+    }
+    if (endKey != null && !endKey.equals("")) {
+      scan.setStopRow(Bytes.toBytes(endKey));
+    }
     scan.setFilter(new FirstKeyOnlyFilter());
     if (sb.length() > 0) {
-      for (String columnName :sb.toString().split(" ")) {
+      for (String columnName : sb.toString().trim().split(" ")) {
         String [] fields = columnName.split(":");
         if(fields.length == 1) {
           scan.addFamily(Bytes.toBytes(fields[0]));
@@ -108,7 +129,6 @@ public class RowCounter {
         }
       }
     }
-    // Second argument is the table name.
     job.setOutputFormatClass(NullOutputFormat.class);
     TableMapReduceUtil.initTableMapperJob(tableName, scan,
       RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
@@ -116,6 +136,22 @@ public class RowCounter {
     return job;
   }
 
+  /*
+   * @param errorMessage Can attach a message when error occurs.
+   */
+  private static void printUsage(String errorMessage) {
+    System.err.println("ERROR: " + errorMessage);
+    printUsage();
+  }
+
+  /*
+   * Prints usage without error message
+   */
+  private static void printUsage() {
+    System.err.println("Usage: RowCounter <tablename> " +
+        "[--range=[startKey],[endKey]] [<column1> <column2>...]");
+  }
+
   /**
    * Main entry point.
    *
@@ -126,11 +162,13 @@ public class RowCounter {
     Configuration conf = HBaseConfiguration.create();
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
     if (otherArgs.length < 1) {
-      System.err.println("ERROR: Wrong number of parameters: " + args.length);
-      System.err.println("Usage: RowCounter <tablename> [<column1> <column2>...]");
+      printUsage("Wrong number of parameters: " + args.length);
       System.exit(-1);
     }
     Job job = createSubmittableJob(conf, otherArgs);
+    if (job == null) {
+      System.exit(-1);
+    }
     System.exit(job.waitForCompletion(true) ? 0 : 1);
   }
 }

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java?rev=1181947&r1=1181946&r2=1181947&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java Tue Oct 11 17:43:34 2011
@@ -187,7 +187,6 @@ extends InputFormat<ImmutableBytesWritab
         new ArrayList<InputSplit>(numRegions * numMappersPerRegion);
     byte[] startRow = scan.getStartRow();
     byte[] stopRow = scan.getStopRow();
-    int numSplits = 0;
     for (int i = 0; i < numRegions * numMappersPerRegion; i++) {
       if (!includeRegionInSplit(keys.getFirst()[i / numMappersPerRegion],
           keys.getSecond()[i / numMappersPerRegion])) {
@@ -211,7 +210,7 @@ extends InputFormat<ImmutableBytesWritab
             splitStart, splitStop, regionLocation);
         splits.add(split);
         if (LOG.isDebugEnabled())
-          LOG.debug("getSplits: split -> " + (numSplits++) + " -> " + split);
+          LOG.debug("getSplits: split -> " + i + " -> " + split);
       }
     }
     return splits;