You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by Mark Kerzner <ma...@gmail.com> on 2011/02/02 03:36:15 UTC
Upgrading from HBase 0.20 to 0.89 code question
Hi,
below is a textbook example of using HBase from a MapReduce job. I am trying
to rewrite it in the 0.89 API, and I have not succeeded yet. Can
anyone please give me some pointers?
Thank you very much. Sincerely,
Mark
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.mapred.TableMap;
import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapred.TableReduce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MailboxIdCount extends Configured implements Tool {
// Name of this 'program'
static final String NAME = "mailboxid-counter";
private static IntWritable ONE = new IntWritable(1);
static class RowCounterMapper
implements TableMap<Text, IntWritable> {
private static enum Counters {
ROWS
}
@Override
public void map(ImmutableBytesWritable row, RowResult value,
OutputCollector<Text, IntWritable> output,
Reporter reporter)
throws IOException {
for (Map.Entry<byte[], Cell> e : value.entrySet()) {
Cell cell = e.getValue();
if (cell != null && cell.getValue().length > 0) {
Text text = new Text(cell.getValue());
output.collect(text, ONE);
}
}
}
@Override
public void configure(JobConf jc) {
// Nothing to do.
}
@Override
public void close() throws IOException {
// Nothing to do.
}
}
public static class RowCounterReducer
implements TableReduce<Text, IntWritable> {
@Override
public void configure(JobConf jc) {
// do nothing
}
@Override
public void close() throws IOException {
// do nothing
}
@Override
public void reduce(Text k2, Iterator<IntWritable> itrtr,
OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
throws IOException {
int sum = 0;
while (itrtr.hasNext()) {
IntWritable val = itrtr.next();
sum += val.get();
}
String family = "stats:";
String familyCell = family + k2.toString();
BatchUpdate update = new BatchUpdate(k2.toString());
update.put(familyCell, String.valueOf(sum).getBytes());
oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
update);
}
}
public JobConf createSubmittableJob(String[] args) throws IOException {
JobConf c = new JobConf(getConf(), getClass());
c.setJobName(NAME);
// Columns are space delimited
StringBuilder sb = new StringBuilder();
final int columnoffset = 2;
for (int i = columnoffset; i < args.length; i++) {
if (i > columnoffset) {
sb.append(" ");
}
sb.append(args[i]);
}
// Second argument is the table name.
TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
RowCounterMapper.class, Text.class, IntWritable.class, c);
TableMapReduceUtil.initTableReduceJob("mailbox-status",
RowCounterReducer.class, c);
// First arg is the output directory.
FileOutputFormat.setOutputPath(c, new Path(args[0]));
return c;
}
static int printUsage() {
System.out.println(NAME
+ " <outputdir> <tablename> <column1> [<column2>...]");
return -1;
}
@Override
public int run(final String[] args) throws Exception {
// Make sure there are at least 3 parameters
if (args.length < 3) {
System.err.println("ERROR: Wrong number of parameters: " +
args.length);
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
public static void main(String[] args) throws Exception {
HBaseConfiguration c = new HBaseConfiguration();
int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
System.exit(errCode);
}
}
Re: Upgrading from HBase 0.20 to 0.89 code question
Posted by praba karan <pr...@gmail.com>.
Hey Ryan,
I just got uploaded the small sample data into the Hbase-0.89. I will post
the Map Reduce code after completing the test. I need to get rid of the
exception which I am facing now. When I run the Map Reduce program in my
machine I am getting the following error.
................
................
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:317)
at org.apache.hadoop.mapred.Child$4.run(Child.java:217)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1063)
at org.apache.hadoop.mapred.Child.main(Child.java:211)
*attempt_201102010535_0004_m_000000_0:
org.apache.hadoop.hbase.client.NoServerForRegionException: Timed out trying
to locate root region*
attempt_201102010535_0004_m_000000_0: at
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.locateRootRegion(HConnectionManager.java:1089)
attempt_201102010535_0004_m_000000_0: at
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.locateRegion(HConnectionManager.java:668)
.........
....
Guess I had missing some of configuration in hadoop. Please help to resolve
this one..
Regards
Prabakaran
On Wed, Feb 2, 2011 at 11:05 PM, Jean-Daniel Cryans <jd...@apache.org>wrote:
> Basically, BatchUpdate becomes Put and RowResult becomes Result.
>
> J-D
>
> On Tue, Feb 1, 2011 at 6:36 PM, Mark Kerzner <ma...@gmail.com>
> wrote:
> > Hi,
> >
> > below is a textbook example of using HBase from a MapReduce job. I am
> trying
> > to rewrite it in the 0.89 API, and I have not succeeded yet. Can
> > anyone please give me some pointers?
> >
> > Thank you very much. Sincerely,
> > Mark
> >
> > import java.io.IOException;
> > import java.util.Iterator;
> > import java.util.Map;
> > import org.apache.hadoop.conf.Configured;
> > import org.apache.hadoop.fs.Path;
> > import org.apache.hadoop.hbase.HBaseConfiguration;
> > import org.apache.hadoop.hbase.io.BatchUpdate;
> > import org.apache.hadoop.hbase.io.Cell;
> > import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
> > import org.apache.hadoop.hbase.io.RowResult;
> > import org.apache.hadoop.hbase.mapred.TableMap;
> > import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
> > import org.apache.hadoop.hbase.mapred.TableReduce;
> > import org.apache.hadoop.io.IntWritable;
> > import org.apache.hadoop.io.Text;
> > import org.apache.hadoop.mapred.FileOutputFormat;
> > import org.apache.hadoop.mapred.JobClient;
> > import org.apache.hadoop.mapred.JobConf;
> > import org.apache.hadoop.mapred.OutputCollector;
> > import org.apache.hadoop.mapred.Reporter;
> > import org.apache.hadoop.util.Tool;
> > import org.apache.hadoop.util.ToolRunner;
> >
> > public class MailboxIdCount extends Configured implements Tool {
> > // Name of this 'program'
> >
> > static final String NAME = "mailboxid-counter";
> > private static IntWritable ONE = new IntWritable(1);
> >
> > static class RowCounterMapper
> > implements TableMap<Text, IntWritable> {
> >
> > private static enum Counters {
> >
> > ROWS
> > }
> >
> > @Override
> > public void map(ImmutableBytesWritable row, RowResult value,
> > OutputCollector<Text, IntWritable> output,
> > Reporter reporter)
> > throws IOException {
> > for (Map.Entry<byte[], Cell> e : value.entrySet()) {
> > Cell cell = e.getValue();
> > if (cell != null && cell.getValue().length > 0) {
> > Text text = new Text(cell.getValue());
> > output.collect(text, ONE);
> > }
> > }
> >
> > }
> >
> > @Override
> > public void configure(JobConf jc) {
> > // Nothing to do.
> > }
> >
> > @Override
> > public void close() throws IOException {
> > // Nothing to do.
> > }
> > }
> >
> > public static class RowCounterReducer
> > implements TableReduce<Text, IntWritable> {
> >
> > @Override
> > public void configure(JobConf jc) {
> > // do nothing
> > }
> >
> > @Override
> > public void close() throws IOException {
> > // do nothing
> > }
> >
> > @Override
> > public void reduce(Text k2, Iterator<IntWritable> itrtr,
> > OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
> > throws IOException {
> > int sum = 0;
> > while (itrtr.hasNext()) {
> > IntWritable val = itrtr.next();
> > sum += val.get();
> > }
> > String family = "stats:";
> > String familyCell = family + k2.toString();
> > BatchUpdate update = new BatchUpdate(k2.toString());
> > update.put(familyCell, String.valueOf(sum).getBytes());
> > oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
> > update);
> > }
> > }
> >
> > public JobConf createSubmittableJob(String[] args) throws IOException
> {
> > JobConf c = new JobConf(getConf(), getClass());
> > c.setJobName(NAME);
> > // Columns are space delimited
> > StringBuilder sb = new StringBuilder();
> > final int columnoffset = 2;
> > for (int i = columnoffset; i < args.length; i++) {
> > if (i > columnoffset) {
> > sb.append(" ");
> > }
> > sb.append(args[i]);
> > }
> > // Second argument is the table name.
> > TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
> > RowCounterMapper.class, Text.class, IntWritable.class, c);
> > TableMapReduceUtil.initTableReduceJob("mailbox-status",
> > RowCounterReducer.class, c);
> > // First arg is the output directory.
> > FileOutputFormat.setOutputPath(c, new Path(args[0]));
> > return c;
> > }
> >
> > static int printUsage() {
> > System.out.println(NAME
> > + " <outputdir> <tablename> <column1> [<column2>...]");
> > return -1;
> > }
> >
> > @Override
> > public int run(final String[] args) throws Exception {
> > // Make sure there are at least 3 parameters
> > if (args.length < 3) {
> > System.err.println("ERROR: Wrong number of parameters: " +
> > args.length);
> > return printUsage();
> > }
> > JobClient.runJob(createSubmittableJob(args));
> > return 0;
> > }
> >
> > public static void main(String[] args) throws Exception {
> > HBaseConfiguration c = new HBaseConfiguration();
> > int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
> > System.exit(errCode);
> > }
> > }
> >
>
Re: Upgrading from HBase 0.20 to 0.89 code question
Posted by Jean-Daniel Cryans <jd...@apache.org>.
Basically, BatchUpdate becomes Put and RowResult becomes Result.
J-D
On Tue, Feb 1, 2011 at 6:36 PM, Mark Kerzner <ma...@gmail.com> wrote:
> Hi,
>
> below is a textbook example of using HBase from a MapReduce job. I am trying
> to rewrite it in the 0.89 API, and I have not succeeded yet. Can
> anyone please give me some pointers?
>
> Thank you very much. Sincerely,
> Mark
>
> import java.io.IOException;
> import java.util.Iterator;
> import java.util.Map;
> import org.apache.hadoop.conf.Configured;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.hbase.HBaseConfiguration;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.io.Cell;
> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
> import org.apache.hadoop.hbase.io.RowResult;
> import org.apache.hadoop.hbase.mapred.TableMap;
> import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
> import org.apache.hadoop.hbase.mapred.TableReduce;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.FileOutputFormat;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
>
> public class MailboxIdCount extends Configured implements Tool {
> // Name of this 'program'
>
> static final String NAME = "mailboxid-counter";
> private static IntWritable ONE = new IntWritable(1);
>
> static class RowCounterMapper
> implements TableMap<Text, IntWritable> {
>
> private static enum Counters {
>
> ROWS
> }
>
> @Override
> public void map(ImmutableBytesWritable row, RowResult value,
> OutputCollector<Text, IntWritable> output,
> Reporter reporter)
> throws IOException {
> for (Map.Entry<byte[], Cell> e : value.entrySet()) {
> Cell cell = e.getValue();
> if (cell != null && cell.getValue().length > 0) {
> Text text = new Text(cell.getValue());
> output.collect(text, ONE);
> }
> }
>
> }
>
> @Override
> public void configure(JobConf jc) {
> // Nothing to do.
> }
>
> @Override
> public void close() throws IOException {
> // Nothing to do.
> }
> }
>
> public static class RowCounterReducer
> implements TableReduce<Text, IntWritable> {
>
> @Override
> public void configure(JobConf jc) {
> // do nothing
> }
>
> @Override
> public void close() throws IOException {
> // do nothing
> }
>
> @Override
> public void reduce(Text k2, Iterator<IntWritable> itrtr,
> OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
> throws IOException {
> int sum = 0;
> while (itrtr.hasNext()) {
> IntWritable val = itrtr.next();
> sum += val.get();
> }
> String family = "stats:";
> String familyCell = family + k2.toString();
> BatchUpdate update = new BatchUpdate(k2.toString());
> update.put(familyCell, String.valueOf(sum).getBytes());
> oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
> update);
> }
> }
>
> public JobConf createSubmittableJob(String[] args) throws IOException {
> JobConf c = new JobConf(getConf(), getClass());
> c.setJobName(NAME);
> // Columns are space delimited
> StringBuilder sb = new StringBuilder();
> final int columnoffset = 2;
> for (int i = columnoffset; i < args.length; i++) {
> if (i > columnoffset) {
> sb.append(" ");
> }
> sb.append(args[i]);
> }
> // Second argument is the table name.
> TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
> RowCounterMapper.class, Text.class, IntWritable.class, c);
> TableMapReduceUtil.initTableReduceJob("mailbox-status",
> RowCounterReducer.class, c);
> // First arg is the output directory.
> FileOutputFormat.setOutputPath(c, new Path(args[0]));
> return c;
> }
>
> static int printUsage() {
> System.out.println(NAME
> + " <outputdir> <tablename> <column1> [<column2>...]");
> return -1;
> }
>
> @Override
> public int run(final String[] args) throws Exception {
> // Make sure there are at least 3 parameters
> if (args.length < 3) {
> System.err.println("ERROR: Wrong number of parameters: " +
> args.length);
> return printUsage();
> }
> JobClient.runJob(createSubmittableJob(args));
> return 0;
> }
>
> public static void main(String[] args) throws Exception {
> HBaseConfiguration c = new HBaseConfiguration();
> int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
> System.exit(errCode);
> }
> }
>