You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:07:27 UTC

svn commit: r1181424 - /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java

Author: nspiegelberg
Date: Tue Oct 11 02:07:27 2011
New Revision: 1181424

URL: http://svn.apache.org/viewvc?rev=1181424&view=rev
Log:
hbase export: turn caching off, allow CF to be specified, etc.

Summary:
A few small enhancements to HBase's native MR based export:
(i) Turn caching off during scan.
(ii) Allow ability to specify a specific column family to export.
(iii) Document use of -D params, for example, to set compression type.

Test Plan:
Pushed to cluster. Created table with two column families, and exported only
one of them. Also tried with compression turned on.

Export:

% bin/hadoop jar /tmp/hbase-0.21.0-SNAPSHOT.jar export -D
hbase.mapreduce.scan.column.family=actions2 -D mapred.output.compress=true -D
mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec -D
mapred.output.compression.type=BLOCK test /tmp/export_test10

Import:

%  bin/hadoop jar /tmp/hbase-0.21.0-SNAPSHOT.jar import testExport
/tmp/export_test10

DiffCamp Revision: 175335
Reviewed By: jgray
CC: jgray, hbase@lists
Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java?rev=1181424&r1=1181423&r2=1181424&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java Tue Oct 11 02:07:27 2011
@@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.HBaseConf
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
@@ -83,12 +84,19 @@ public class Export {
     job.setJarByClass(Exporter.class);
     // TODO: Allow passing filter and subset of rows/columns.
     Scan s = new Scan();
+
     // Optional arguments.
     int versions = args.length > 2? Integer.parseInt(args[2]): 1;
     s.setMaxVersions(versions);
     long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
     long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
     s.setTimeRange(startTime, endTime);
+    s.setCacheBlocks(false);
+
+    if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
+      s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
+    }
+
     Log.info("verisons=" + versions + ", starttime=" + startTime +
       ", endtime=" + endTime);
     TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null,
@@ -109,8 +117,16 @@ public class Export {
     if (errorMsg != null && errorMsg.length() > 0) {
       System.err.println("ERROR: " + errorMsg);
     }
-    System.err.println("Usage: Export <tablename> <outputdir> [<versions> " +
-      "[<starttime> [<endtime>]]]");
+    System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+      "[<starttime> [<endtime>]]]\n");
+    System.err.println("  Note: -D properties will be applied to the conf used. ");
+    System.err.println("  For example: ");
+    System.err.println("   -D mapred.output.compress=true");
+    System.err.println("   -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
+    System.err.println("   -D mapred.output.compression.type=BLOCK");
+    System.err.println("  Additionally, the following SCAN properties can be specified");
+    System.err.println("  to control/limit what is exported..");
+    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
   }
 
   /**