You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:07:27 UTC
svn commit: r1181424 -
/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
Author: nspiegelberg
Date: Tue Oct 11 02:07:27 2011
New Revision: 1181424
URL: http://svn.apache.org/viewvc?rev=1181424&view=rev
Log:
hbase export: turn caching off, allow CF to be specified, etc.
Summary:
A few small enhancements to HBase's native MR based export:
(i) Turn caching off during scan.
(ii) Allow ability to specify a specific column family to export.
(iii) Document use of -D params, for example, to set compression type.
Test Plan:
Pushed to cluster. Created table with two column families, and exported only
one of them. Also tried with compression turned on.
Export:
% bin/hadoop jar /tmp/hbase-0.21.0-SNAPSHOT.jar export -D
hbase.mapreduce.scan.column.family=actions2 -D mapred.output.compress=true -D
mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec -D
mapred.output.compression.type=BLOCK test /tmp/export_test10
Import:
% bin/hadoop jar /tmp/hbase-0.21.0-SNAPSHOT.jar import testExport
/tmp/export_test10
DiffCamp Revision: 175335
Reviewed By: jgray
CC: jgray, hbase@lists
Revert Plan:
OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java?rev=1181424&r1=1181423&r2=1181424&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java Tue Oct 11 02:07:27 2011
@@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.HBaseConf
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
@@ -83,12 +84,19 @@ public class Export {
job.setJarByClass(Exporter.class);
// TODO: Allow passing filter and subset of rows/columns.
Scan s = new Scan();
+
// Optional arguments.
int versions = args.length > 2? Integer.parseInt(args[2]): 1;
s.setMaxVersions(versions);
long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
s.setTimeRange(startTime, endTime);
+ s.setCacheBlocks(false);
+
+ if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
+ s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
+ }
+
Log.info("verisons=" + versions + ", starttime=" + startTime +
", endtime=" + endTime);
TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null,
@@ -109,8 +117,16 @@ public class Export {
if (errorMsg != null && errorMsg.length() > 0) {
System.err.println("ERROR: " + errorMsg);
}
- System.err.println("Usage: Export <tablename> <outputdir> [<versions> " +
- "[<starttime> [<endtime>]]]");
+ System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
+ "[<starttime> [<endtime>]]]\n");
+ System.err.println(" Note: -D properties will be applied to the conf used. ");
+ System.err.println(" For example: ");
+ System.err.println(" -D mapred.output.compress=true");
+ System.err.println(" -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
+ System.err.println(" -D mapred.output.compression.type=BLOCK");
+ System.err.println(" Additionally, the following SCAN properties can be specified");
+ System.err.println(" to control/limit what is exported..");
+ System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
}
/**