You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by Mark Kerzner <ma...@gmail.com> on 2011/02/02 03:36:15 UTC

Upgrading from HBase 0.20 to 0.89 code question

Hi,

below is a textbook example of using HBase from a MapReduce job. I am trying
to rewrite it in the 0.89 API, and I have not succeeded yet. Can
anyone please give me some pointers?

Thank you very much. Sincerely,
Mark

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.mapred.TableMap;
import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapred.TableReduce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MailboxIdCount extends Configured implements Tool {
    // Name of this 'program'

    static final String NAME = "mailboxid-counter";
    private static IntWritable ONE = new IntWritable(1);

    static class RowCounterMapper
            implements TableMap<Text, IntWritable> {

        private static enum Counters {

            ROWS
        }

        @Override
        public void map(ImmutableBytesWritable row, RowResult value,
                OutputCollector<Text, IntWritable> output,
                Reporter reporter)
                throws IOException {
            for (Map.Entry<byte[], Cell> e : value.entrySet()) {
                Cell cell = e.getValue();
                if (cell != null && cell.getValue().length > 0) {
                    Text text = new Text(cell.getValue());
                    output.collect(text, ONE);
                }
            }

        }

        @Override
        public void configure(JobConf jc) {
            // Nothing to do.
        }

        @Override
        public void close() throws IOException {
            // Nothing to do.
        }
    }

    public static class RowCounterReducer
            implements TableReduce<Text, IntWritable> {

        @Override
        public void configure(JobConf jc) {
            // do nothing
        }

        @Override
        public void close() throws IOException {
            // do nothing
        }

        @Override
        public void reduce(Text k2, Iterator<IntWritable> itrtr,
OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
throws IOException {
            int sum = 0;
            while (itrtr.hasNext()) {
                IntWritable val = itrtr.next();
                sum += val.get();
            }
            String family = "stats:";
            String familyCell = family + k2.toString();
            BatchUpdate update = new BatchUpdate(k2.toString());
            update.put(familyCell, String.valueOf(sum).getBytes());
            oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
update);
        }
    }

    public JobConf createSubmittableJob(String[] args) throws IOException {
        JobConf c = new JobConf(getConf(), getClass());
        c.setJobName(NAME);
        // Columns are space delimited
        StringBuilder sb = new StringBuilder();
        final int columnoffset = 2;
        for (int i = columnoffset; i < args.length; i++) {
            if (i > columnoffset) {
                sb.append(" ");
            }
            sb.append(args[i]);
        }
        // Second argument is the table name.
        TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
                RowCounterMapper.class, Text.class, IntWritable.class, c);
        TableMapReduceUtil.initTableReduceJob("mailbox-status",
RowCounterReducer.class, c);
        // First arg is the output directory.
        FileOutputFormat.setOutputPath(c, new Path(args[0]));
        return c;
    }

    static int printUsage() {
        System.out.println(NAME
                + " <outputdir> <tablename> <column1> [<column2>...]");
        return -1;
    }

    @Override
    public int run(final String[] args) throws Exception {
        // Make sure there are at least 3 parameters
        if (args.length < 3) {
            System.err.println("ERROR: Wrong number of parameters: " +
args.length);
            return printUsage();
        }
        JobClient.runJob(createSubmittableJob(args));
        return 0;
    }

    public static void main(String[] args) throws Exception {
        HBaseConfiguration c = new HBaseConfiguration();
        int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
        System.exit(errCode);
    }
}

Re: Upgrading from HBase 0.20 to 0.89 code question

Posted by praba karan <pr...@gmail.com>.
Hey Ryan,

I just got uploaded the small sample data into the Hbase-0.89. I will post
the Map Reduce code after completing the test. I need to get rid of the
exception which I am facing now. When I run the Map Reduce program in my
machine I am getting the following error.

................
................
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:317)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:217)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1063)
        at org.apache.hadoop.mapred.Child.main(Child.java:211)

*attempt_201102010535_0004_m_000000_0:
org.apache.hadoop.hbase.client.NoServerForRegionException: Timed out trying
to locate root region*
attempt_201102010535_0004_m_000000_0:   at
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.locateRootRegion(HConnectionManager.java:1089)
attempt_201102010535_0004_m_000000_0:   at
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.locateRegion(HConnectionManager.java:668)
.........
....

Guess I had missing some of configuration in hadoop. Please help to resolve
this one..


Regards
Prabakaran







On Wed, Feb 2, 2011 at 11:05 PM, Jean-Daniel Cryans <jd...@apache.org>wrote:

> Basically, BatchUpdate becomes Put and RowResult becomes Result.
>
> J-D
>
> On Tue, Feb 1, 2011 at 6:36 PM, Mark Kerzner <ma...@gmail.com>
> wrote:
> > Hi,
> >
> > below is a textbook example of using HBase from a MapReduce job. I am
> trying
> > to rewrite it in the 0.89 API, and I have not succeeded yet. Can
> > anyone please give me some pointers?
> >
> > Thank you very much. Sincerely,
> > Mark
> >
> > import java.io.IOException;
> > import java.util.Iterator;
> > import java.util.Map;
> > import org.apache.hadoop.conf.Configured;
> > import org.apache.hadoop.fs.Path;
> > import org.apache.hadoop.hbase.HBaseConfiguration;
> > import org.apache.hadoop.hbase.io.BatchUpdate;
> > import org.apache.hadoop.hbase.io.Cell;
> > import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
> > import org.apache.hadoop.hbase.io.RowResult;
> > import org.apache.hadoop.hbase.mapred.TableMap;
> > import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
> > import org.apache.hadoop.hbase.mapred.TableReduce;
> > import org.apache.hadoop.io.IntWritable;
> > import org.apache.hadoop.io.Text;
> > import org.apache.hadoop.mapred.FileOutputFormat;
> > import org.apache.hadoop.mapred.JobClient;
> > import org.apache.hadoop.mapred.JobConf;
> > import org.apache.hadoop.mapred.OutputCollector;
> > import org.apache.hadoop.mapred.Reporter;
> > import org.apache.hadoop.util.Tool;
> > import org.apache.hadoop.util.ToolRunner;
> >
> > public class MailboxIdCount extends Configured implements Tool {
> >    // Name of this 'program'
> >
> >    static final String NAME = "mailboxid-counter";
> >    private static IntWritable ONE = new IntWritable(1);
> >
> >    static class RowCounterMapper
> >            implements TableMap<Text, IntWritable> {
> >
> >        private static enum Counters {
> >
> >            ROWS
> >        }
> >
> >        @Override
> >        public void map(ImmutableBytesWritable row, RowResult value,
> >                OutputCollector<Text, IntWritable> output,
> >                Reporter reporter)
> >                throws IOException {
> >            for (Map.Entry<byte[], Cell> e : value.entrySet()) {
> >                Cell cell = e.getValue();
> >                if (cell != null && cell.getValue().length > 0) {
> >                    Text text = new Text(cell.getValue());
> >                    output.collect(text, ONE);
> >                }
> >            }
> >
> >        }
> >
> >        @Override
> >        public void configure(JobConf jc) {
> >            // Nothing to do.
> >        }
> >
> >        @Override
> >        public void close() throws IOException {
> >            // Nothing to do.
> >        }
> >    }
> >
> >    public static class RowCounterReducer
> >            implements TableReduce<Text, IntWritable> {
> >
> >        @Override
> >        public void configure(JobConf jc) {
> >            // do nothing
> >        }
> >
> >        @Override
> >        public void close() throws IOException {
> >            // do nothing
> >        }
> >
> >        @Override
> >        public void reduce(Text k2, Iterator<IntWritable> itrtr,
> > OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
> > throws IOException {
> >            int sum = 0;
> >            while (itrtr.hasNext()) {
> >                IntWritable val = itrtr.next();
> >                sum += val.get();
> >            }
> >            String family = "stats:";
> >            String familyCell = family + k2.toString();
> >            BatchUpdate update = new BatchUpdate(k2.toString());
> >            update.put(familyCell, String.valueOf(sum).getBytes());
> >            oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
> > update);
> >        }
> >    }
> >
> >    public JobConf createSubmittableJob(String[] args) throws IOException
> {
> >        JobConf c = new JobConf(getConf(), getClass());
> >        c.setJobName(NAME);
> >        // Columns are space delimited
> >        StringBuilder sb = new StringBuilder();
> >        final int columnoffset = 2;
> >        for (int i = columnoffset; i < args.length; i++) {
> >            if (i > columnoffset) {
> >                sb.append(" ");
> >            }
> >            sb.append(args[i]);
> >        }
> >        // Second argument is the table name.
> >        TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
> >                RowCounterMapper.class, Text.class, IntWritable.class, c);
> >        TableMapReduceUtil.initTableReduceJob("mailbox-status",
> > RowCounterReducer.class, c);
> >        // First arg is the output directory.
> >        FileOutputFormat.setOutputPath(c, new Path(args[0]));
> >        return c;
> >    }
> >
> >    static int printUsage() {
> >        System.out.println(NAME
> >                + " <outputdir> <tablename> <column1> [<column2>...]");
> >        return -1;
> >    }
> >
> >    @Override
> >    public int run(final String[] args) throws Exception {
> >        // Make sure there are at least 3 parameters
> >        if (args.length < 3) {
> >            System.err.println("ERROR: Wrong number of parameters: " +
> > args.length);
> >            return printUsage();
> >        }
> >        JobClient.runJob(createSubmittableJob(args));
> >        return 0;
> >    }
> >
> >    public static void main(String[] args) throws Exception {
> >        HBaseConfiguration c = new HBaseConfiguration();
> >        int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
> >        System.exit(errCode);
> >    }
> > }
> >
>

Re: Upgrading from HBase 0.20 to 0.89 code question

Posted by Jean-Daniel Cryans <jd...@apache.org>.
Basically, BatchUpdate becomes Put and RowResult becomes Result.

J-D

On Tue, Feb 1, 2011 at 6:36 PM, Mark Kerzner <ma...@gmail.com> wrote:
> Hi,
>
> below is a textbook example of using HBase from a MapReduce job. I am trying
> to rewrite it in the 0.89 API, and I have not succeeded yet. Can
> anyone please give me some pointers?
>
> Thank you very much. Sincerely,
> Mark
>
> import java.io.IOException;
> import java.util.Iterator;
> import java.util.Map;
> import org.apache.hadoop.conf.Configured;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.hbase.HBaseConfiguration;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.io.Cell;
> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
> import org.apache.hadoop.hbase.io.RowResult;
> import org.apache.hadoop.hbase.mapred.TableMap;
> import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
> import org.apache.hadoop.hbase.mapred.TableReduce;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.FileOutputFormat;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
>
> public class MailboxIdCount extends Configured implements Tool {
>    // Name of this 'program'
>
>    static final String NAME = "mailboxid-counter";
>    private static IntWritable ONE = new IntWritable(1);
>
>    static class RowCounterMapper
>            implements TableMap<Text, IntWritable> {
>
>        private static enum Counters {
>
>            ROWS
>        }
>
>        @Override
>        public void map(ImmutableBytesWritable row, RowResult value,
>                OutputCollector<Text, IntWritable> output,
>                Reporter reporter)
>                throws IOException {
>            for (Map.Entry<byte[], Cell> e : value.entrySet()) {
>                Cell cell = e.getValue();
>                if (cell != null && cell.getValue().length > 0) {
>                    Text text = new Text(cell.getValue());
>                    output.collect(text, ONE);
>                }
>            }
>
>        }
>
>        @Override
>        public void configure(JobConf jc) {
>            // Nothing to do.
>        }
>
>        @Override
>        public void close() throws IOException {
>            // Nothing to do.
>        }
>    }
>
>    public static class RowCounterReducer
>            implements TableReduce<Text, IntWritable> {
>
>        @Override
>        public void configure(JobConf jc) {
>            // do nothing
>        }
>
>        @Override
>        public void close() throws IOException {
>            // do nothing
>        }
>
>        @Override
>        public void reduce(Text k2, Iterator<IntWritable> itrtr,
> OutputCollector<ImmutableBytesWritable, BatchUpdate> oc, Reporter rprtr)
> throws IOException {
>            int sum = 0;
>            while (itrtr.hasNext()) {
>                IntWritable val = itrtr.next();
>                sum += val.get();
>            }
>            String family = "stats:";
>            String familyCell = family + k2.toString();
>            BatchUpdate update = new BatchUpdate(k2.toString());
>            update.put(familyCell, String.valueOf(sum).getBytes());
>            oc.collect(new ImmutableBytesWritable(familyCell.getBytes()),
> update);
>        }
>    }
>
>    public JobConf createSubmittableJob(String[] args) throws IOException {
>        JobConf c = new JobConf(getConf(), getClass());
>        c.setJobName(NAME);
>        // Columns are space delimited
>        StringBuilder sb = new StringBuilder();
>        final int columnoffset = 2;
>        for (int i = columnoffset; i < args.length; i++) {
>            if (i > columnoffset) {
>                sb.append(" ");
>            }
>            sb.append(args[i]);
>        }
>        // Second argument is the table name.
>        TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
>                RowCounterMapper.class, Text.class, IntWritable.class, c);
>        TableMapReduceUtil.initTableReduceJob("mailbox-status",
> RowCounterReducer.class, c);
>        // First arg is the output directory.
>        FileOutputFormat.setOutputPath(c, new Path(args[0]));
>        return c;
>    }
>
>    static int printUsage() {
>        System.out.println(NAME
>                + " <outputdir> <tablename> <column1> [<column2>...]");
>        return -1;
>    }
>
>    @Override
>    public int run(final String[] args) throws Exception {
>        // Make sure there are at least 3 parameters
>        if (args.length < 3) {
>            System.err.println("ERROR: Wrong number of parameters: " +
> args.length);
>            return printUsage();
>        }
>        JobClient.runJob(createSubmittableJob(args));
>        return 0;
>    }
>
>    public static void main(String[] args) throws Exception {
>        HBaseConfiguration c = new HBaseConfiguration();
>        int errCode = ToolRunner.run(c, new MailboxIdCount(), args);
>        System.exit(errCode);
>    }
> }
>