You are viewing a plain text version of this content. The canonical link for it is here.

Posted to common-user@hadoop.apache.org by Chris Williams <ch...@gmail.com> on 2010/03/28 21:52:24 UTC

Error converting WordCount to v0.20.x

I am working through the WordCount example to get rid of all the deprecation
warnings.  While running it, my reduce function isn't being called.  Any
ideas?  The code below can also be found here: http://gist.github.com/346975

Thanks!
Chris

package hadoop.examples;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;

public class WordCount extends Configured implements Tool {

	public static class Map extends Mapper<LongWritable, Text, Text,
IntWritable> {
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();

		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line);
			while (tokenizer.hasMoreTokens()) {
				word.set(tokenizer.nextToken());
				context.write(word, one);
			}
		}
	}

	public static class Reduce extends Reducer<Text, IntWritable, Text,
IntWritable> {
		public void reduce(Text key, Iterator<IntWritable> values, Context
context)
				throws IOException, InterruptedException {
			int sum = 0;
			while (values.hasNext()) {
				sum += values.next().get();
			}
			context.write(key, new IntWritable(sum));
		}
	}

	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new Configuration(), new WordCount(), args);
		System.exit(res);
	}

	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = getConf();
		Job job = new Job(conf, "wordcount");

		job.setJarByClass(WordCount.class);
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);
		//job.setCombinerClass(Reduce.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);

		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		return job.waitForCompletion(true) ? 0 : 1;
	}
}

-- 
View this message in context: http://n3.nabble.com/Error-converting-WordCount-to-v0-20-x-tp682061p682061.html
Sent from the Users mailing list archive at Nabble.com.

Re: Error converting WordCount to v0.20.x

Posted by slim tebourbi <sl...@gmail.com>.

I've tried to try the same thing and I noted that even the map function was
not executed!

here are the logs :

$ hadoop jar wordcount.jar org.stebourbi.hadoop.training.WordCount input
output

10/04/01 23:39:53 INFO security.Groups: Group mapping
impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping;
cacheTimeout=300000
10/04/01 23:39:53 WARN conf.Configuration: mapred.task.id is deprecated.
Instead, use mapreduce.task.attempt.id

10/04/01 23:39:53 DEBUG mapreduce.JobSubmitter: Configuring job
job_201004012334_0007 with
hdfs://localhost:9000/tmp/hadoop-tebourbi/mapred/staging/tebourbi/.staging/job_201004012334_0007
as the submit dir
10/04/01 23:39:53 WARN mapreduce.JobSubmitter: Use GenericOptionsParser for
parsing the arguments. Applications should implement Tool for the same.
10/04/01 23:39:53 DEBUG mapreduce.JobSubmitter: default FileSystem:
hdfs://localhost:9000
10/04/01 23:39:54 DEBUG mapreduce.JobSubmitter: Creating splits at
hdfs://localhost:9000/tmp/hadoop-tebourbi/mapred/staging/tebourbi/.staging/job_201004012334_0007
10/04/01 23:39:54 INFO input.FileInputFormat: Total input paths to process :
3
10/04/01 23:39:54 DEBUG input.FileInputFormat: Total # of splits: 3
10/04/01 23:39:54 WARN conf.Configuration: mapred.map.tasks is deprecated.
Instead, use mapreduce.job.maps
10/04/01 23:39:54 INFO mapreduce.JobSubmitter: number of splits:3
10/04/01 23:39:54 INFO mapreduce.JobSubmitter: adding the following
namenodes' delegation tokens:null
10/04/01 23:39:54 INFO mapreduce.Job: Running job: job_201004012334_0007
10/04/01 23:39:55 INFO mapreduce.Job:  map 0% reduce 0%
10/04/01 23:39:55 INFO mapreduce.Job: Job complete: job_201004012334_0007
10/04/01 23:39:55 INFO mapreduce.Job: Counters: 4
    Job Counters
        Total time spent by all maps waiting after reserving slots (ms)=0
        Total time spent by all reduces waiting after reserving slots (ms)=0
        SLOTS_MILLIS_MAPS=0
        SLOTS_MILLIS_REDUCES=0


However, the same code works well on eclipse as a simple java program!

Slim.

2010/3/28 Chris Williams <ch...@gmail.com>

>
> I am working through the WordCount example to get rid of all the
> deprecation
> warnings.  While running it, my reduce function isn't being called.  Any
> ideas?  The code below can also be found here:
> http://gist.github.com/346975
>
> Thanks!
> Chris
>
> package hadoop.examples;
>
> import java.io.IOException;
> import java.util.*;
>
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.*;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
> import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
> import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.conf.Configured;
>
> public class WordCount extends Configured implements Tool {
>
>        public static class Map extends Mapper<LongWritable, Text, Text,
> IntWritable> {
>                private final static IntWritable one = new IntWritable(1);
>                private Text word = new Text();
>
>                public void map(LongWritable key, Text value, Context
> context)
>                                throws IOException, InterruptedException {
>                        String line = value.toString();
>                        StringTokenizer tokenizer = new
> StringTokenizer(line);
>                        while (tokenizer.hasMoreTokens()) {
>                                word.set(tokenizer.nextToken());
>                                context.write(word, one);
>                        }
>                }
>        }
>
>        public static class Reduce extends Reducer<Text, IntWritable, Text,
> IntWritable> {
>                public void reduce(Text key, Iterator<IntWritable> values,
> Context
> context)
>                                throws IOException, InterruptedException {
>                        int sum = 0;
>                        while (values.hasNext()) {
>                                sum += values.next().get();
>                        }
>                        context.write(key, new IntWritable(sum));
>                }
>        }
>
>        public static void main(String[] args) throws Exception {
>                int res = ToolRunner.run(new Configuration(), new
> WordCount(), args);
>                System.exit(res);
>        }
>
>        @Override
>        public int run(String[] args) throws Exception {
>                Configuration conf = getConf();
>                Job job = new Job(conf, "wordcount");
>
>                job.setJarByClass(WordCount.class);
>                job.setMapperClass(Map.class);
>                job.setReducerClass(Reduce.class);
>                //job.setCombinerClass(Reduce.class);
>
>                job.setInputFormatClass(TextInputFormat.class);
>                job.setOutputFormatClass(TextOutputFormat.class);
>
>                job.setOutputKeyClass(Text.class);
>                job.setOutputValueClass(IntWritable.class);
>
>                FileInputFormat.addInputPath(job, new Path(args[0]));
>                FileOutputFormat.setOutputPath(job, new Path(args[1]));
>
>                return job.waitForCompletion(true) ? 0 : 1;
>        }
> }
>
> --
> View this message in context:
> http://n3.nabble.com/Error-converting-WordCount-to-v0-20-x-tp682061p682061.html
> Sent from the Users mailing list archive at Nabble.com.
>