You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by Alex Newman <po...@gmail.com> on 2008/07/03 05:19:38 UTC
SampleUploader for the trunk hbase
/// This is very close to the example in the javadoc
already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
it to be the easiest way to get people started/motivated with HBase.
package org.apache.hadoop.hbase.mapred;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SampleUploader extends MapReduceBase
implements Mapper<LongWritable, Text, ImmutableBytesWritable, BatchUpdate>, Tool
{
private static final String NAME = "SampleUploader";
private Configuration conf;
public JobConf createSubmittableJob(String[] args) {
JobConf c = new JobConf(getConf(), SampleUploader.class);
c.setJobName(NAME);
c.setInputPath(new Path(args[0]));
c.setMapperClass(this.getClass());
c.setMapOutputKeyClass(ImmutableBytesWritable.class);
c.setMapOutputValueClass(BatchUpdate.class);
c.setReducerClass(TableUploader.class);
TableReduce.initJob(args[1], TableUploader.class, c);
return c;
}
public void map(LongWritable k, Text v,
OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter r)
throws IOException {
// Lines are space-delimited; first item is row, next the columnname and
// then the third the cell value.
String tmp = v.toString();
if (tmp.length() == 0) {
return;
}
String [] splits = v.toString().split(" ");
String row = splits[0];
BatchUpdate mw = new BatchUpdate(row);
mw.put( "count:", Bytes.toBytes(splits[1]));
r.setStatus("Map emitting " + row + " for record " + k.toString());
output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
}
public static class TableUploader extends TableReduce<ImmutableBytesWritable,
BatchUpdate> {
@Override
public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate> values,
OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
@SuppressWarnings("unused") Reporter reporter)
throws IOException {
while(values.hasNext()) {
output.collect(key, values.next());
}
}
}
static int printUsage() {
System.out.println(NAME + " <input> <table_name>");
return -1;
}
public int run(@SuppressWarnings("unused") String[] args) throws Exception {
// Make sure there are exactly 2 parameters left.
if (args.length != 2) {
System.out.println("ERROR: Wrong number of parameters: " +
args.length + " instead of 2.");
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
System.out.println("ERROR: Wrong number of parameters: " +
args.length + " instead of 2.");
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
public Configuration getConf() {
return this.conf;
}
public void setConf(final Configuration c) {
this.conf = c;
}
public static void main(String[] args) throws Exception {
int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
args);
System.exit(errCode);
}
}
Re: SampleUploader for the trunk hbase
Posted by stack <st...@duboce.net>.
We'd welcome the contrib. If sufficiently 'generic', we should just
check it in under our examples directory.
St.Ack
Dan Zinngrabe wrote:
> It would just need a little cleanup and it would be ready to go. We made it
> generic enough that it should work for just about anyone, with a few (minor)
> caveats. We used it to transition a fairly flat, large mysql table into
> HBase as well as for production backups and restore.
>
> On Mon, Jul 7, 2008 at 9:43 PM, stack <st...@duboce.net> wrote:
>
>
>> Sounds good. Would it make sense your posting your code or is it too
>> particular to your setup?
>> St.Ack
>>
>>
>> Dan Zinngrabe wrote:
>>
>>
>>> We have something very similar, but a little more flexible in use in
>>> production. Along with it is a simple exporter that outputs hbase data in
>>> the same format.
>>> It's not quite HBASE-50 <
>>>
>>> https://issues.apache.org/jira/browse/HBASE-50?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12580996#action_12580996
>>> but it does work pretty well so far for backing up even large tables.
>>>
>>> On Thu, Jul 3, 2008 at 10:48 AM, stack <st...@duboce.net> wrote:
>>>
>>>
>>>
>>>
>>>> Thanks Alex. Looks great. You want me to add it to the wiki? (Or you
>>>> could do it yourself).
>>>>
>>>> Yours does something slightly different it seems; you hardcode the column
>>>> name and do a count of splits[1]. You might add to the class comment a
>>>> description of what your MR job does.
>>>>
>>>> St.Ack
>>>>
>>>>
>>>>
>>>> Alex Newman wrote:
>>>>
>>>>
>>>>
>>>>
>>>>> /// This is very close to the example in the javadoc
>>>>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>>>>> it to be the easiest way to get people started/motivated with HBase.
>>>>>
>>>>>
>>>>> package org.apache.hadoop.hbase.mapred;
>>>>>
>>>>> import org.apache.hadoop.hbase.util.Bytes;
>>>>>
>>>>> import java.io.IOException;
>>>>> import java.util.Iterator;
>>>>>
>>>>> import org.apache.hadoop.conf.Configuration;
>>>>> import org.apache.hadoop.fs.Path;
>>>>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>>>>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>>>>
>>>>> import org.apache.hadoop.io.LongWritable;
>>>>> import org.apache.hadoop.io.MapWritable;
>>>>> import org.apache.hadoop.io.Text;
>>>>> import org.apache.hadoop.mapred.JobClient;
>>>>> import org.apache.hadoop.mapred.JobConf;
>>>>> import org.apache.hadoop.mapred.MapReduceBase;
>>>>> import org.apache.hadoop.mapred.Mapper;
>>>>> import org.apache.hadoop.mapred.OutputCollector;
>>>>> import org.apache.hadoop.mapred.Reporter;
>>>>> import org.apache.hadoop.util.Tool;
>>>>> import org.apache.hadoop.util.ToolRunner;
>>>>> public class SampleUploader extends MapReduceBase
>>>>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>>>>> BatchUpdate>, Tool
>>>>> {
>>>>> private static final String NAME = "SampleUploader";
>>>>> private Configuration conf;
>>>>>
>>>>> public JobConf createSubmittableJob(String[] args) {
>>>>> JobConf c = new JobConf(getConf(), SampleUploader.class);
>>>>> c.setJobName(NAME);
>>>>> c.setInputPath(new Path(args[0]));
>>>>> c.setMapperClass(this.getClass());
>>>>> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>>>>> c.setMapOutputValueClass(BatchUpdate.class);
>>>>> c.setReducerClass(TableUploader.class);
>>>>> TableReduce.initJob(args[1], TableUploader.class, c);
>>>>> return c;
>>>>> }
>>>>>
>>>>> public void map(LongWritable k, Text v,
>>>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter
>>>>> r)
>>>>> throws IOException {
>>>>> // Lines are space-delimited; first item is row, next the columnname
>>>>> and
>>>>> // then the third the cell value.
>>>>> String tmp = v.toString();
>>>>> if (tmp.length() == 0) {
>>>>> return;
>>>>> }
>>>>> String [] splits = v.toString().split(" ");
>>>>> String row = splits[0];
>>>>> BatchUpdate mw = new BatchUpdate(row);
>>>>>
>>>>> mw.put( "count:", Bytes.toBytes(splits[1]));
>>>>> r.setStatus("Map emitting " + row + " for record " + k.toString());
>>>>> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>>>>> }
>>>>>
>>>>> public static class TableUploader extends
>>>>> TableReduce<ImmutableBytesWritable,
>>>>> BatchUpdate> {
>>>>>
>>>>>
>>>>> @Override
>>>>> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>>>>> values,
>>>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>>>>> @SuppressWarnings("unused") Reporter reporter)
>>>>> throws IOException {
>>>>>
>>>>> while(values.hasNext()) {
>>>>> output.collect(key, values.next());
>>>>> }
>>>>> }
>>>>> }
>>>>>
>>>>>
>>>>> static int printUsage() {
>>>>> System.out.println(NAME + " <input> <table_name>");
>>>>> return -1;
>>>>> }
>>>>>
>>>>> public int run(@SuppressWarnings("unused") String[] args) throws
>>>>> Exception {
>>>>> // Make sure there are exactly 2 parameters left.
>>>>> if (args.length != 2) {
>>>>> System.out.println("ERROR: Wrong number of parameters: " +
>>>>> args.length + " instead of 2.");
>>>>> return printUsage();
>>>>> }
>>>>> JobClient.runJob(createSubmittableJob(args));
>>>>> return 0;
>>>>> }
>>>>> System.out.println("ERROR: Wrong number of parameters: " +
>>>>> args.length + " instead of 2.");
>>>>> return printUsage();
>>>>> }
>>>>> JobClient.runJob(createSubmittableJob(args));
>>>>> return 0;
>>>>> }
>>>>>
>>>>> public Configuration getConf() {
>>>>> return this.conf;
>>>>> }
>>>>>
>>>>> public void setConf(final Configuration c) {
>>>>> this.conf = c;
>>>>> }
>>>>>
>>>>> public static void main(String[] args) throws Exception {
>>>>> int errCode = ToolRunner.run(new Configuration(), new
>>>>> SampleUploader(),
>>>>> args);
>>>>> System.exit(errCode);
>>>>> }
>>>>> }
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>
>>>
>>>
>>>
>>
>
>
>
Re: SampleUploader for the trunk hbase
Posted by Dan Zinngrabe <da...@mahalo.com>.
It would just need a little cleanup and it would be ready to go. We made it
generic enough that it should work for just about anyone, with a few (minor)
caveats. We used it to transition a fairly flat, large mysql table into
HBase as well as for production backups and restore.
On Mon, Jul 7, 2008 at 9:43 PM, stack <st...@duboce.net> wrote:
> Sounds good. Would it make sense your posting your code or is it too
> particular to your setup?
> St.Ack
>
>
> Dan Zinngrabe wrote:
>
>> We have something very similar, but a little more flexible in use in
>> production. Along with it is a simple exporter that outputs hbase data in
>> the same format.
>> It's not quite HBASE-50 <
>>
>> https://issues.apache.org/jira/browse/HBASE-50?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12580996#action_12580996
>> but it does work pretty well so far for backing up even large tables.
>>
>> On Thu, Jul 3, 2008 at 10:48 AM, stack <st...@duboce.net> wrote:
>>
>>
>>
>>> Thanks Alex. Looks great. You want me to add it to the wiki? (Or you
>>> could do it yourself).
>>>
>>> Yours does something slightly different it seems; you hardcode the column
>>> name and do a count of splits[1]. You might add to the class comment a
>>> description of what your MR job does.
>>>
>>> St.Ack
>>>
>>>
>>>
>>> Alex Newman wrote:
>>>
>>>
>>>
>>>> /// This is very close to the example in the javadoc
>>>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>>>> it to be the easiest way to get people started/motivated with HBase.
>>>>
>>>>
>>>> package org.apache.hadoop.hbase.mapred;
>>>>
>>>> import org.apache.hadoop.hbase.util.Bytes;
>>>>
>>>> import java.io.IOException;
>>>> import java.util.Iterator;
>>>>
>>>> import org.apache.hadoop.conf.Configuration;
>>>> import org.apache.hadoop.fs.Path;
>>>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>>>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>>>
>>>> import org.apache.hadoop.io.LongWritable;
>>>> import org.apache.hadoop.io.MapWritable;
>>>> import org.apache.hadoop.io.Text;
>>>> import org.apache.hadoop.mapred.JobClient;
>>>> import org.apache.hadoop.mapred.JobConf;
>>>> import org.apache.hadoop.mapred.MapReduceBase;
>>>> import org.apache.hadoop.mapred.Mapper;
>>>> import org.apache.hadoop.mapred.OutputCollector;
>>>> import org.apache.hadoop.mapred.Reporter;
>>>> import org.apache.hadoop.util.Tool;
>>>> import org.apache.hadoop.util.ToolRunner;
>>>> public class SampleUploader extends MapReduceBase
>>>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>>>> BatchUpdate>, Tool
>>>> {
>>>> private static final String NAME = "SampleUploader";
>>>> private Configuration conf;
>>>>
>>>> public JobConf createSubmittableJob(String[] args) {
>>>> JobConf c = new JobConf(getConf(), SampleUploader.class);
>>>> c.setJobName(NAME);
>>>> c.setInputPath(new Path(args[0]));
>>>> c.setMapperClass(this.getClass());
>>>> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>>>> c.setMapOutputValueClass(BatchUpdate.class);
>>>> c.setReducerClass(TableUploader.class);
>>>> TableReduce.initJob(args[1], TableUploader.class, c);
>>>> return c;
>>>> }
>>>>
>>>> public void map(LongWritable k, Text v,
>>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter
>>>> r)
>>>> throws IOException {
>>>> // Lines are space-delimited; first item is row, next the columnname
>>>> and
>>>> // then the third the cell value.
>>>> String tmp = v.toString();
>>>> if (tmp.length() == 0) {
>>>> return;
>>>> }
>>>> String [] splits = v.toString().split(" ");
>>>> String row = splits[0];
>>>> BatchUpdate mw = new BatchUpdate(row);
>>>>
>>>> mw.put( "count:", Bytes.toBytes(splits[1]));
>>>> r.setStatus("Map emitting " + row + " for record " + k.toString());
>>>> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>>>> }
>>>>
>>>> public static class TableUploader extends
>>>> TableReduce<ImmutableBytesWritable,
>>>> BatchUpdate> {
>>>>
>>>>
>>>> @Override
>>>> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>>>> values,
>>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>>>> @SuppressWarnings("unused") Reporter reporter)
>>>> throws IOException {
>>>>
>>>> while(values.hasNext()) {
>>>> output.collect(key, values.next());
>>>> }
>>>> }
>>>> }
>>>>
>>>>
>>>> static int printUsage() {
>>>> System.out.println(NAME + " <input> <table_name>");
>>>> return -1;
>>>> }
>>>>
>>>> public int run(@SuppressWarnings("unused") String[] args) throws
>>>> Exception {
>>>> // Make sure there are exactly 2 parameters left.
>>>> if (args.length != 2) {
>>>> System.out.println("ERROR: Wrong number of parameters: " +
>>>> args.length + " instead of 2.");
>>>> return printUsage();
>>>> }
>>>> JobClient.runJob(createSubmittableJob(args));
>>>> return 0;
>>>> }
>>>> System.out.println("ERROR: Wrong number of parameters: " +
>>>> args.length + " instead of 2.");
>>>> return printUsage();
>>>> }
>>>> JobClient.runJob(createSubmittableJob(args));
>>>> return 0;
>>>> }
>>>>
>>>> public Configuration getConf() {
>>>> return this.conf;
>>>> }
>>>>
>>>> public void setConf(final Configuration c) {
>>>> this.conf = c;
>>>> }
>>>>
>>>> public static void main(String[] args) throws Exception {
>>>> int errCode = ToolRunner.run(new Configuration(), new
>>>> SampleUploader(),
>>>> args);
>>>> System.exit(errCode);
>>>> }
>>>> }
>>>>
>>>>
>>>>
>>>>
>>>
>>>
>>
>>
>>
>>
>
>
--
Dan Zinngrabe
Alchemist -- Mahalo.com
http://www.mahalo.com/member/quellish
dan@mahalo.com
Re: SampleUploader for the trunk hbase
Posted by stack <st...@duboce.net>.
Sounds good. Would it make sense your posting your code or is it too
particular to your setup?
St.Ack
Dan Zinngrabe wrote:
> We have something very similar, but a little more flexible in use in
> production. Along with it is a simple exporter that outputs hbase data in
> the same format.
> It's not quite HBASE-50 <
> https://issues.apache.org/jira/browse/HBASE-50?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12580996#action_12580996
>
> but it does work pretty well so far for backing up even large tables.
>
> On Thu, Jul 3, 2008 at 10:48 AM, stack <st...@duboce.net> wrote:
>
>
>> Thanks Alex. Looks great. You want me to add it to the wiki? (Or you
>> could do it yourself).
>>
>> Yours does something slightly different it seems; you hardcode the column
>> name and do a count of splits[1]. You might add to the class comment a
>> description of what your MR job does.
>>
>> St.Ack
>>
>>
>>
>> Alex Newman wrote:
>>
>>
>>> /// This is very close to the example in the javadoc
>>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>>> it to be the easiest way to get people started/motivated with HBase.
>>>
>>>
>>> package org.apache.hadoop.hbase.mapred;
>>>
>>> import org.apache.hadoop.hbase.util.Bytes;
>>>
>>> import java.io.IOException;
>>> import java.util.Iterator;
>>>
>>> import org.apache.hadoop.conf.Configuration;
>>> import org.apache.hadoop.fs.Path;
>>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>>
>>> import org.apache.hadoop.io.LongWritable;
>>> import org.apache.hadoop.io.MapWritable;
>>> import org.apache.hadoop.io.Text;
>>> import org.apache.hadoop.mapred.JobClient;
>>> import org.apache.hadoop.mapred.JobConf;
>>> import org.apache.hadoop.mapred.MapReduceBase;
>>> import org.apache.hadoop.mapred.Mapper;
>>> import org.apache.hadoop.mapred.OutputCollector;
>>> import org.apache.hadoop.mapred.Reporter;
>>> import org.apache.hadoop.util.Tool;
>>> import org.apache.hadoop.util.ToolRunner;
>>> public class SampleUploader extends MapReduceBase
>>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>>> BatchUpdate>, Tool
>>> {
>>> private static final String NAME = "SampleUploader";
>>> private Configuration conf;
>>>
>>> public JobConf createSubmittableJob(String[] args) {
>>> JobConf c = new JobConf(getConf(), SampleUploader.class);
>>> c.setJobName(NAME);
>>> c.setInputPath(new Path(args[0]));
>>> c.setMapperClass(this.getClass());
>>> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>>> c.setMapOutputValueClass(BatchUpdate.class);
>>> c.setReducerClass(TableUploader.class);
>>> TableReduce.initJob(args[1], TableUploader.class, c);
>>> return c;
>>> }
>>>
>>> public void map(LongWritable k, Text v,
>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter
>>> r)
>>> throws IOException {
>>> // Lines are space-delimited; first item is row, next the columnname
>>> and
>>> // then the third the cell value.
>>> String tmp = v.toString();
>>> if (tmp.length() == 0) {
>>> return;
>>> }
>>> String [] splits = v.toString().split(" ");
>>> String row = splits[0];
>>> BatchUpdate mw = new BatchUpdate(row);
>>>
>>> mw.put( "count:", Bytes.toBytes(splits[1]));
>>> r.setStatus("Map emitting " + row + " for record " + k.toString());
>>> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>>> }
>>>
>>> public static class TableUploader extends
>>> TableReduce<ImmutableBytesWritable,
>>> BatchUpdate> {
>>>
>>>
>>> @Override
>>> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>>> values,
>>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>>> @SuppressWarnings("unused") Reporter reporter)
>>> throws IOException {
>>>
>>> while(values.hasNext()) {
>>> output.collect(key, values.next());
>>> }
>>> }
>>> }
>>>
>>>
>>> static int printUsage() {
>>> System.out.println(NAME + " <input> <table_name>");
>>> return -1;
>>> }
>>>
>>> public int run(@SuppressWarnings("unused") String[] args) throws
>>> Exception {
>>> // Make sure there are exactly 2 parameters left.
>>> if (args.length != 2) {
>>> System.out.println("ERROR: Wrong number of parameters: " +
>>> args.length + " instead of 2.");
>>> return printUsage();
>>> }
>>> JobClient.runJob(createSubmittableJob(args));
>>> return 0;
>>> }
>>> System.out.println("ERROR: Wrong number of parameters: " +
>>> args.length + " instead of 2.");
>>> return printUsage();
>>> }
>>> JobClient.runJob(createSubmittableJob(args));
>>> return 0;
>>> }
>>>
>>> public Configuration getConf() {
>>> return this.conf;
>>> }
>>>
>>> public void setConf(final Configuration c) {
>>> this.conf = c;
>>> }
>>>
>>> public static void main(String[] args) throws Exception {
>>> int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
>>> args);
>>> System.exit(errCode);
>>> }
>>> }
>>>
>>>
>>>
>>
>
>
>
Re: SampleUploader for the trunk hbase
Posted by Dan Zinngrabe <da...@mahalo.com>.
We have something very similar, but a little more flexible in use in
production. Along with it is a simple exporter that outputs hbase data in
the same format.
It's not quite HBASE-50 <
https://issues.apache.org/jira/browse/HBASE-50?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12580996#action_12580996
>
but it does work pretty well so far for backing up even large tables.
On Thu, Jul 3, 2008 at 10:48 AM, stack <st...@duboce.net> wrote:
> Thanks Alex. Looks great. You want me to add it to the wiki? (Or you
> could do it yourself).
>
> Yours does something slightly different it seems; you hardcode the column
> name and do a count of splits[1]. You might add to the class comment a
> description of what your MR job does.
>
> St.Ack
>
>
>
> Alex Newman wrote:
>
>> /// This is very close to the example in the javadoc
>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>> it to be the easiest way to get people started/motivated with HBase.
>>
>>
>> package org.apache.hadoop.hbase.mapred;
>>
>> import org.apache.hadoop.hbase.util.Bytes;
>>
>> import java.io.IOException;
>> import java.util.Iterator;
>>
>> import org.apache.hadoop.conf.Configuration;
>> import org.apache.hadoop.fs.Path;
>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>
>> import org.apache.hadoop.io.LongWritable;
>> import org.apache.hadoop.io.MapWritable;
>> import org.apache.hadoop.io.Text;
>> import org.apache.hadoop.mapred.JobClient;
>> import org.apache.hadoop.mapred.JobConf;
>> import org.apache.hadoop.mapred.MapReduceBase;
>> import org.apache.hadoop.mapred.Mapper;
>> import org.apache.hadoop.mapred.OutputCollector;
>> import org.apache.hadoop.mapred.Reporter;
>> import org.apache.hadoop.util.Tool;
>> import org.apache.hadoop.util.ToolRunner;
>> public class SampleUploader extends MapReduceBase
>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>> BatchUpdate>, Tool
>> {
>> private static final String NAME = "SampleUploader";
>> private Configuration conf;
>>
>> public JobConf createSubmittableJob(String[] args) {
>> JobConf c = new JobConf(getConf(), SampleUploader.class);
>> c.setJobName(NAME);
>> c.setInputPath(new Path(args[0]));
>> c.setMapperClass(this.getClass());
>> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>> c.setMapOutputValueClass(BatchUpdate.class);
>> c.setReducerClass(TableUploader.class);
>> TableReduce.initJob(args[1], TableUploader.class, c);
>> return c;
>> }
>>
>> public void map(LongWritable k, Text v,
>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter
>> r)
>> throws IOException {
>> // Lines are space-delimited; first item is row, next the columnname
>> and
>> // then the third the cell value.
>> String tmp = v.toString();
>> if (tmp.length() == 0) {
>> return;
>> }
>> String [] splits = v.toString().split(" ");
>> String row = splits[0];
>> BatchUpdate mw = new BatchUpdate(row);
>>
>> mw.put( "count:", Bytes.toBytes(splits[1]));
>> r.setStatus("Map emitting " + row + " for record " + k.toString());
>> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>> }
>>
>> public static class TableUploader extends
>> TableReduce<ImmutableBytesWritable,
>> BatchUpdate> {
>>
>>
>> @Override
>> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>> values,
>> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>> @SuppressWarnings("unused") Reporter reporter)
>> throws IOException {
>>
>> while(values.hasNext()) {
>> output.collect(key, values.next());
>> }
>> }
>> }
>>
>>
>> static int printUsage() {
>> System.out.println(NAME + " <input> <table_name>");
>> return -1;
>> }
>>
>> public int run(@SuppressWarnings("unused") String[] args) throws
>> Exception {
>> // Make sure there are exactly 2 parameters left.
>> if (args.length != 2) {
>> System.out.println("ERROR: Wrong number of parameters: " +
>> args.length + " instead of 2.");
>> return printUsage();
>> }
>> JobClient.runJob(createSubmittableJob(args));
>> return 0;
>> }
>> System.out.println("ERROR: Wrong number of parameters: " +
>> args.length + " instead of 2.");
>> return printUsage();
>> }
>> JobClient.runJob(createSubmittableJob(args));
>> return 0;
>> }
>>
>> public Configuration getConf() {
>> return this.conf;
>> }
>>
>> public void setConf(final Configuration c) {
>> this.conf = c;
>> }
>>
>> public static void main(String[] args) throws Exception {
>> int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
>> args);
>> System.exit(errCode);
>> }
>> }
>>
>>
>
>
--
Dan Zinngrabe
Alchemist -- Mahalo.com
http://www.mahalo.com/member/quellish
dan@mahalo.com
Re: SampleUploader for the trunk hbase
Posted by stack <st...@duboce.net>.
Thanks Alex. Looks great. You want me to add it to the wiki? (Or you
could do it yourself).
Yours does something slightly different it seems; you hardcode the
column name and do a count of splits[1]. You might add to the class
comment a description of what your MR job does.
St.Ack
Alex Newman wrote:
> /// This is very close to the example in the javadoc
> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
> it to be the easiest way to get people started/motivated with HBase.
>
>
> package org.apache.hadoop.hbase.mapred;
>
> import org.apache.hadoop.hbase.util.Bytes;
>
> import java.io.IOException;
> import java.util.Iterator;
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
> import org.apache.hadoop.hbase.io.BatchUpdate;
>
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.MapWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.MapReduceBase;
> import org.apache.hadoop.mapred.Mapper;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
> public class SampleUploader extends MapReduceBase
> implements Mapper<LongWritable, Text, ImmutableBytesWritable, BatchUpdate>, Tool
> {
> private static final String NAME = "SampleUploader";
> private Configuration conf;
>
> public JobConf createSubmittableJob(String[] args) {
> JobConf c = new JobConf(getConf(), SampleUploader.class);
> c.setJobName(NAME);
> c.setInputPath(new Path(args[0]));
> c.setMapperClass(this.getClass());
> c.setMapOutputKeyClass(ImmutableBytesWritable.class);
> c.setMapOutputValueClass(BatchUpdate.class);
> c.setReducerClass(TableUploader.class);
> TableReduce.initJob(args[1], TableUploader.class, c);
> return c;
> }
>
> public void map(LongWritable k, Text v,
> OutputCollector<ImmutableBytesWritable, BatchUpdate> output, Reporter r)
> throws IOException {
> // Lines are space-delimited; first item is row, next the columnname and
> // then the third the cell value.
> String tmp = v.toString();
> if (tmp.length() == 0) {
> return;
> }
> String [] splits = v.toString().split(" ");
> String row = splits[0];
> BatchUpdate mw = new BatchUpdate(row);
>
> mw.put( "count:", Bytes.toBytes(splits[1]));
> r.setStatus("Map emitting " + row + " for record " + k.toString());
> output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
> }
>
> public static class TableUploader extends TableReduce<ImmutableBytesWritable,
> BatchUpdate> {
>
>
> @Override
> public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate> values,
> OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
> @SuppressWarnings("unused") Reporter reporter)
> throws IOException {
>
> while(values.hasNext()) {
> output.collect(key, values.next());
> }
> }
> }
>
>
> static int printUsage() {
> System.out.println(NAME + " <input> <table_name>");
> return -1;
> }
>
> public int run(@SuppressWarnings("unused") String[] args) throws Exception {
> // Make sure there are exactly 2 parameters left.
> if (args.length != 2) {
> System.out.println("ERROR: Wrong number of parameters: " +
> args.length + " instead of 2.");
> return printUsage();
> }
> JobClient.runJob(createSubmittableJob(args));
> return 0;
> }
> System.out.println("ERROR: Wrong number of parameters: " +
> args.length + " instead of 2.");
> return printUsage();
> }
> JobClient.runJob(createSubmittableJob(args));
> return 0;
> }
>
> public Configuration getConf() {
> return this.conf;
> }
>
> public void setConf(final Configuration c) {
> this.conf = c;
> }
>
> public static void main(String[] args) throws Exception {
> int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
> args);
> System.exit(errCode);
> }
> }
>