You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-user@hadoop.apache.org by Nishant Sonar <ni...@synechron.com> on 2010/05/19 09:36:59 UTC
Need Working example for DBOutputFormat
Hello,
Does any body has a working example of DBOutputformat. That connects to
the DB Server (MYSQL) and then writes a record to the table.
I tried by following the instruction on
"http://www.cloudera.com/blog/2009/03/database-access-with-hadoop/" as
below but was getting an IOException.
It will be great if anyone can send me example for hadoop 0.20.2 . The
one below is for an earlier version.
<!-- Runner Class -->
public class EmployeeDBRunner {
public static void main(String[] args) {
Configuration configuration = new Configuration();
JobConf jobConf = new JobConf(configuration,
EmployeeDBRunner.class);
DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",
"jdbc:mysql://localhost/mydatabase","myuser", "mypass");
String[] fields = { "employee_id", "name" };
DBOutputFormat.setOutput(jobConf, "employees", fields);
JobConf conf = new JobConf(EmployeeDBRunner.class);
conf.setJobName("Employee");
FileInputFormat.addInputPath(conf, new Path(args[0])); //set
input as file
conf.setMapperClass(TokenMapper.class);
conf.setReducerClass(DBReducer.class);
conf.setOutputFormat(DBOutputFormat.class); //set output as
DBOF to output data to a table.
// <Text, IntWritable>
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
// <MyRecord,NullWritable>
conf.setOutputKeyClass(MyRecord.class);
conf.setOutputValueClass(NullWritable.class);
try {
JobClient.runJob(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
}
<!-- Mapper -->
public class TokenMapper extends MapReduceBase implements
Mapper<Object, Text, Text, IntWritable> {
IntWritable single = new IntWritable(1);
public void map(Object arg0, Text line,
OutputCollector<Text, IntWritable> collector, Reporter arg3)
throws IOException {
StringTokenizer stk = new StringTokenizer(line.toString());
while (stk.hasMoreTokens()) {
Text token = new Text(stk.nextToken());
collector.collect(token, single);
}
}
}
<!-- Reducer class-->
public class DBReducer extends MapReduceBase implements
org.apache.hadoop.mapred.Reducer<Text, IntWritable,
MyRecord,NullWritable> {
NullWritable n = NullWritable.get();
public void reduce(Text key, Iterator<IntWritable> values,
OutputCollector<MyRecord,NullWritable> output, Reporter
reporter)
throws IOException {
long sum = 0;
for (; values.hasNext();) {
values.next();
sum++;
}
MyRecord mRecord = new MyRecord(sum, key.toString());
System.out.println(mRecord.getName());
output.collect(mRecord,n);
}
}
Re: Need Working example for DBOutputFormat
Posted by Aaron Kimball <aa...@cloudera.com>.
Nishant,
What exact IOException are you getting?
I think the issue is that you're creating two different JobConf objects
there. They should be merged into one. Just use "jobConf" all the way
through; don't create that second "conf" object.
Good luck,
- Aaron
PS - I'd be remiss if I didn't urge you to check out Sqoop (
http://github.com/cloudera/sqoop) for database imports/exports too :)
On Wed, May 19, 2010 at 1:37 AM, Sonal Goyal <so...@gmail.com> wrote:
> Hi Nishant,
>
> If MySQL is your target database, you can check open source
> http://code.google.com/p/hiho/ which uses load data infile for a faster
> upload to the db.
>
> Let me know if you need any help.
>
> Thanks and Regards,
> Sonal
> www.meghsoft.com
>
>
>
> On Wed, May 19, 2010 at 1:06 PM, Nishant Sonar <ni...@synechron.com>wrote:
>
>> Hello,
>>
>> Does any body has a working example of DBOutputformat. That connects to
>> the DB Server (MYSQL) and then writes a record to the table.
>>
>> I tried by following the instruction on "
>> http://www.cloudera.com/blog/2009/03/database-access-with-hadoop/" as
>> below but was getting an IOException.
>>
>> It will be great if anyone can send me example for hadoop 0.20.2 . The one
>> below is for an earlier version.
>>
>> <!-- Runner Class -->
>>
>> public class EmployeeDBRunner {
>> public static void main(String[] args) {
>> Configuration configuration = new Configuration();
>> JobConf jobConf = new JobConf(configuration,
>> EmployeeDBRunner.class);
>> DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",
>> "jdbc:mysql://localhost/mydatabase","myuser", "mypass");
>> String[] fields = { "employee_id", "name" };
>> DBOutputFormat.setOutput(jobConf, "employees", fields);
>>
>> JobConf conf = new JobConf(EmployeeDBRunner.class);
>> conf.setJobName("Employee");
>> FileInputFormat.addInputPath(conf, new Path(args[0])); //set input
>> as file
>> conf.setMapperClass(TokenMapper.class);
>> conf.setReducerClass(DBReducer.class);
>> conf.setOutputFormat(DBOutputFormat.class); //set output as DBOF
>> to output data to a table.
>>
>> // <Text, IntWritable>
>> conf.setMapOutputKeyClass(Text.class);
>> conf.setMapOutputValueClass(IntWritable.class);
>>
>> // <MyRecord,NullWritable>
>> conf.setOutputKeyClass(MyRecord.class);
>> conf.setOutputValueClass(NullWritable.class);
>> try {
>> JobClient.runJob(conf);
>> } catch (IOException e) {
>> e.printStackTrace();
>> }
>>
>> }
>> }
>>
>> <!-- Mapper -->
>> public class TokenMapper extends MapReduceBase implements
>> Mapper<Object, Text, Text, IntWritable> {
>> IntWritable single = new IntWritable(1);
>>
>> public void map(Object arg0, Text line,
>> OutputCollector<Text, IntWritable> collector, Reporter arg3)
>> throws IOException {
>> StringTokenizer stk = new StringTokenizer(line.toString());
>> while (stk.hasMoreTokens()) {
>> Text token = new Text(stk.nextToken());
>> collector.collect(token, single);
>> }
>>
>> }
>> }
>>
>> <!-- Reducer class-->
>> public class DBReducer extends MapReduceBase implements
>> org.apache.hadoop.mapred.Reducer<Text, IntWritable,
>> MyRecord,NullWritable> {
>> NullWritable n = NullWritable.get();
>> public void reduce(Text key, Iterator<IntWritable> values,
>> OutputCollector<MyRecord,NullWritable> output, Reporter
>> reporter)
>> throws IOException {
>> long sum = 0;
>> for (; values.hasNext();) {
>> values.next();
>> sum++;
>> }
>> MyRecord mRecord = new MyRecord(sum, key.toString());
>> System.out.println(mRecord.getName());
>> output.collect(mRecord,n);
>> }
>> }
>>
>>
>>
>
Re: Need Working example for DBOutputFormat
Posted by Sonal Goyal <so...@gmail.com>.
Hi Nishant,
If MySQL is your target database, you can check open source
http://code.google.com/p/hiho/ which uses load data infile for a faster
upload to the db.
Let me know if you need any help.
Thanks and Regards,
Sonal
www.meghsoft.com
On Wed, May 19, 2010 at 1:06 PM, Nishant Sonar <ni...@synechron.com>wrote:
> Hello,
>
> Does any body has a working example of DBOutputformat. That connects to the
> DB Server (MYSQL) and then writes a record to the table.
>
> I tried by following the instruction on "
> http://www.cloudera.com/blog/2009/03/database-access-with-hadoop/" as
> below but was getting an IOException.
>
> It will be great if anyone can send me example for hadoop 0.20.2 . The one
> below is for an earlier version.
>
> <!-- Runner Class -->
>
> public class EmployeeDBRunner {
> public static void main(String[] args) {
> Configuration configuration = new Configuration();
> JobConf jobConf = new JobConf(configuration,
> EmployeeDBRunner.class);
> DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver",
> "jdbc:mysql://localhost/mydatabase","myuser", "mypass");
> String[] fields = { "employee_id", "name" };
> DBOutputFormat.setOutput(jobConf, "employees", fields);
>
> JobConf conf = new JobConf(EmployeeDBRunner.class);
> conf.setJobName("Employee");
> FileInputFormat.addInputPath(conf, new Path(args[0])); //set input
> as file
> conf.setMapperClass(TokenMapper.class);
> conf.setReducerClass(DBReducer.class);
> conf.setOutputFormat(DBOutputFormat.class); //set output as DBOF to
> output data to a table.
>
> // <Text, IntWritable>
> conf.setMapOutputKeyClass(Text.class);
> conf.setMapOutputValueClass(IntWritable.class);
>
> // <MyRecord,NullWritable>
> conf.setOutputKeyClass(MyRecord.class);
> conf.setOutputValueClass(NullWritable.class);
> try {
> JobClient.runJob(conf);
> } catch (IOException e) {
> e.printStackTrace();
> }
>
> }
> }
>
> <!-- Mapper -->
> public class TokenMapper extends MapReduceBase implements
> Mapper<Object, Text, Text, IntWritable> {
> IntWritable single = new IntWritable(1);
>
> public void map(Object arg0, Text line,
> OutputCollector<Text, IntWritable> collector, Reporter arg3)
> throws IOException {
> StringTokenizer stk = new StringTokenizer(line.toString());
> while (stk.hasMoreTokens()) {
> Text token = new Text(stk.nextToken());
> collector.collect(token, single);
> }
>
> }
> }
>
> <!-- Reducer class-->
> public class DBReducer extends MapReduceBase implements
> org.apache.hadoop.mapred.Reducer<Text, IntWritable,
> MyRecord,NullWritable> {
> NullWritable n = NullWritable.get();
> public void reduce(Text key, Iterator<IntWritable> values,
> OutputCollector<MyRecord,NullWritable> output, Reporter
> reporter)
> throws IOException {
> long sum = 0;
> for (; values.hasNext();) {
> values.next();
> sum++;
> }
> MyRecord mRecord = new MyRecord(sum, key.toString());
> System.out.println(mRecord.getName());
> output.collect(mRecord,n);
> }
> }
>
>
>