You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hadoop.apache.org by xeonmailinglist <xe...@gmail.com> on 2015/09/02 17:27:59 UTC
Interface expected in the map definition?
I am setting my wordcount example, which is very similar to the
Wordcount example that we find in the Internet.
1.
The MyMap class extends and implements the same classes as the ones
defined in the original wordcount example, but in my case I get the
error of “Interface expected here”. I really don’t understand why I
get this error. See my example below [1]. Any help here?
2.
Is it possible to access the JobConf variable inside the map or
reduce methods?
[1] My Wordcount example
|package org.apache.hadoop.mapred.examples; import
org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable; import
org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*; import
org.apache.hadoop.mapreduce.Job; import
org.apache.hadoop.mapreduce.Mapper; import
org.apache.hadoop.mapreduce.ReduceContext; import
org.apache.hadoop.mapreduce.Reducer; import
org.apache.hadoop.util.GenericOptionsParser; import java.io.IOException;
import java.util.ArrayList; import java.util.Iterator; import
java.util.List; import java.util.StringTokenizer; /** * My example of a
common wordcount. Compare with the official WordCount.class to
understand the differences between both classes. */ public class
MyWordCount { public static class MyMap extends MapReduceBase implements
Mapper<LongWritable, Text, Text, IntWritable> { <<<< Interface expected
here!!! private final static IntWritable one = new IntWritable(1);
private Text word = new Text(); public void map(LongWritable key, Text
value, OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException { String line = value.toString(); StringTokenizer
tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens())
{ word.set(tokenizer.nextToken()); output.collect(word, one); } } }
public static class MyReducer extends
Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result
= new IntWritable(); MedusaDigests parser = new MedusaDigests(); public
void reduce(Text key, Iterable<IntWritable> values, Context context )
throws IOException, InterruptedException { int sum = 0; for (IntWritable
val : values) { System.out.println(" - key ( " +
key.getClass().toString() + "): " + key.toString() + " value ( " +
val.getClass().toString() + " ): " + val.toString()); sum += val.get();
} result.set(sum); context.write(key, result); } public void run(Context
context) throws IOException, InterruptedException { setup(context); try
{ while (context.nextKey()) { System.out.println("Key: " +
context.getCurrentKey()); reduce(context.getCurrentKey(),
context.getValues(), context); // If a back up store is used, reset it
Iterator<IntWritable> iter = context.getValues().iterator(); if(iter
instanceof ReduceContext.ValueIterator) {
((ReduceContext.ValueIterator<IntWritable>)iter).resetBackupStore(); } }
} finally { cleanup(context); } } protected void cleanup(Context
context) throws IOException, InterruptedException {
parser.cleanup(context); } } /** Identity mapper set by the user. */
public static class MyFullyIndentityMapper extends Mapper<Object, Text,
Text, IntWritable>{ private Text word = new Text(); private IntWritable
val = new IntWritable(); public void map(Object key, Text value, Context
context ) throws IOException, InterruptedException { StringTokenizer itr
= new StringTokenizer(value.toString()); word.set(itr.nextToken());
val.set(Integer.valueOf(itr.nextToken())); context.write(word, val); }
public void run(Context context) throws IOException,
InterruptedException { setup(context); try { while
(context.nextKeyValue()) { System.out.println("Key ( " +
context.getCurrentKey().getClass().getName() + " ): " +
context.getCurrentKey() + " Value (" +
context.getCurrentValue().getClass().getName() + "): " +
context.getCurrentValue()); map(context.getCurrentKey(),
context.getCurrentValue(), context); } } finally { cleanup(context); } }
} public static void main(String[] args) throws Exception {
GenericOptionsParser parser = new GenericOptionsParser(new
Configuration(), args); String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length < 2) { System.err.println("Usage: wordcount
[<in>...] <out>"); System.exit(2); } // path that contains the file with
all attributes necessary to the execution of the job Medusa execution =
new Medusa(args); // first map tasks JobConf conf = new
JobConf(MyWordCount.class); conf.setJobName("wordcount");
conf.setClass("mapreduce.job.map.identity.class",
MyFullyIndentityMapper.class, Mapper.class);
System.out.println(conf.toString());
conf.setJarByClass(MyWordCount.class); conf.setMapperClass(MyMap.class);
conf.setPartitionerClass(MyHashPartitioner.class);
conf.setReducerClass(MyReducer.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class); conf.setNumReduceTasks(1);
List<Path> inputPaths = new ArrayList<Path>(); for (int i = 0; i <
otherArgs.length - 1; ++i) { inputPaths.add(new Path(otherArgs[i])); }
Path outputPath = new Path(otherArgs[otherArgs.length - 1]);
execution.setInputPath(inputPaths); execution.setOutputPath(outputPath);
// launch the job directly execution.submit(new Job(conf)); } } |
Re: Interface expected in the map definition?
Posted by "sreebalineni ." <sr...@gmail.com>.
That must be jar files missing in classpath. check this link
http://stackoverflow.com/questions/15127082/running-map-reduce-job-on-cdh4-example
On Wed, Sep 2, 2015 at 8:57 PM, xeonmailinglist <xe...@gmail.com>
wrote:
> I am setting my wordcount example, which is very similar to the Wordcount
> example that we find in the Internet.
>
> 1.
>
> The MyMap class extends and implements the same classes as the ones
> defined in the original wordcount example, but in my case I get the error
> of “Interface expected here”. I really don’t understand why I get this
> error. See my example below [1]. Any help here?
> 2.
>
> Is it possible to access the JobConf variable inside the map or reduce
> methods?
>
>
> [1] My Wordcount example
>
>
> package org.apache.hadoop.mapred.examples;
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.*;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.ReduceContext;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.util.GenericOptionsParser;
>
> import java.io.IOException;
> import java.util.ArrayList;
> import java.util.Iterator;
> import java.util.List;
> import java.util.StringTokenizer;
>
> /**
> * My example of a common wordcount. Compare with the official WordCount.class to understand the differences between both classes.
> */
> public class MyWordCount {
>
> public static class MyMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { <<<< Interface expected here!!!
> private final static IntWritable one = new IntWritable(1);
> private Text word = new Text();
>
> public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
> String line = value.toString();
> StringTokenizer tokenizer = new StringTokenizer(line);
> while (tokenizer.hasMoreTokens()) {
> word.set(tokenizer.nextToken());
> output.collect(word, one);
> }
> }
> }
>
> public static class MyReducer
> extends Reducer<Text,IntWritable,Text,IntWritable> {
> private IntWritable result = new IntWritable();
> MedusaDigests parser = new MedusaDigests();
>
> public void reduce(Text key, Iterable<IntWritable> values,
> Context context
> ) throws IOException, InterruptedException {
> int sum = 0;
> for (IntWritable val : values) {
> System.out.println(" - key ( " + key.getClass().toString() + "): " + key.toString()
> + " value ( " + val.getClass().toString() + " ): " + val.toString());
> sum += val.get();
> }
> result.set(sum);
> context.write(key, result);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKey()) {
> System.out.println("Key: " + context.getCurrentKey());
> reduce(context.getCurrentKey(), context.getValues(), context);
> // If a back up store is used, reset it
> Iterator<IntWritable> iter = context.getValues().iterator();
> if(iter instanceof ReduceContext.ValueIterator) {
> ((ReduceContext.ValueIterator<IntWritable>)iter).resetBackupStore();
> }
> }
> } finally {
> cleanup(context);
> }
> }
>
> protected void cleanup(Context context)
> throws IOException, InterruptedException {
> parser.cleanup(context);
> }
> }
>
> /** Identity mapper set by the user. */
> public static class MyFullyIndentityMapper
> extends Mapper<Object, Text, Text, IntWritable>{
>
> private Text word = new Text();
> private IntWritable val = new IntWritable();
>
> public void map(Object key, Text value, Context context
> ) throws IOException, InterruptedException {
>
> StringTokenizer itr = new StringTokenizer(value.toString());
> word.set(itr.nextToken());
> val.set(Integer.valueOf(itr.nextToken()));
> context.write(word, val);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKeyValue()) {
> System.out.println("Key ( " + context.getCurrentKey().getClass().getName() + " ): " + context.getCurrentKey()
> + " Value (" + context.getCurrentValue().getClass().getName() + "): " + context.getCurrentValue());
> map(context.getCurrentKey(), context.getCurrentValue(), context);
> }
> } finally {
> cleanup(context);
> }
> }
> }
>
> public static void main(String[] args) throws Exception {
> GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args);
>
> String[] otherArgs = parser.getRemainingArgs();
> if (otherArgs.length < 2) {
> System.err.println("Usage: wordcount [<in>...] <out>");
> System.exit(2);
> }
>
> // path that contains the file with all attributes necessary to the execution of the job
> Medusa execution = new Medusa(args);
>
> // first map tasks
> JobConf conf = new JobConf(MyWordCount.class);
> conf.setJobName("wordcount");
> conf.setClass("mapreduce.job.map.identity.class", MyFullyIndentityMapper.class, Mapper.class);
> System.out.println(conf.toString());
>
> conf.setJarByClass(MyWordCount.class);
> conf.setMapperClass(MyMap.class);
> conf.setPartitionerClass(MyHashPartitioner.class);
> conf.setReducerClass(MyReducer.class);
> conf.setOutputKeyClass(Text.class);
> conf.setOutputValueClass(IntWritable.class);
> conf.setNumReduceTasks(1);
>
> List<Path> inputPaths = new ArrayList<Path>();
> for (int i = 0; i < otherArgs.length - 1; ++i) {
> inputPaths.add(new Path(otherArgs[i]));
> }
> Path outputPath = new Path(otherArgs[otherArgs.length - 1]);
> execution.setInputPath(inputPaths);
> execution.setOutputPath(outputPath);
>
> // launch the job directly
> execution.submit(new Job(conf));
> }
> }
>
>
>
Re: Interface expected in the map definition?
Posted by "sreebalineni ." <sr...@gmail.com>.
That must be jar files missing in classpath. check this link
http://stackoverflow.com/questions/15127082/running-map-reduce-job-on-cdh4-example
On Wed, Sep 2, 2015 at 8:57 PM, xeonmailinglist <xe...@gmail.com>
wrote:
> I am setting my wordcount example, which is very similar to the Wordcount
> example that we find in the Internet.
>
> 1.
>
> The MyMap class extends and implements the same classes as the ones
> defined in the original wordcount example, but in my case I get the error
> of “Interface expected here”. I really don’t understand why I get this
> error. See my example below [1]. Any help here?
> 2.
>
> Is it possible to access the JobConf variable inside the map or reduce
> methods?
>
>
> [1] My Wordcount example
>
>
> package org.apache.hadoop.mapred.examples;
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.*;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.ReduceContext;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.util.GenericOptionsParser;
>
> import java.io.IOException;
> import java.util.ArrayList;
> import java.util.Iterator;
> import java.util.List;
> import java.util.StringTokenizer;
>
> /**
> * My example of a common wordcount. Compare with the official WordCount.class to understand the differences between both classes.
> */
> public class MyWordCount {
>
> public static class MyMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { <<<< Interface expected here!!!
> private final static IntWritable one = new IntWritable(1);
> private Text word = new Text();
>
> public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
> String line = value.toString();
> StringTokenizer tokenizer = new StringTokenizer(line);
> while (tokenizer.hasMoreTokens()) {
> word.set(tokenizer.nextToken());
> output.collect(word, one);
> }
> }
> }
>
> public static class MyReducer
> extends Reducer<Text,IntWritable,Text,IntWritable> {
> private IntWritable result = new IntWritable();
> MedusaDigests parser = new MedusaDigests();
>
> public void reduce(Text key, Iterable<IntWritable> values,
> Context context
> ) throws IOException, InterruptedException {
> int sum = 0;
> for (IntWritable val : values) {
> System.out.println(" - key ( " + key.getClass().toString() + "): " + key.toString()
> + " value ( " + val.getClass().toString() + " ): " + val.toString());
> sum += val.get();
> }
> result.set(sum);
> context.write(key, result);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKey()) {
> System.out.println("Key: " + context.getCurrentKey());
> reduce(context.getCurrentKey(), context.getValues(), context);
> // If a back up store is used, reset it
> Iterator<IntWritable> iter = context.getValues().iterator();
> if(iter instanceof ReduceContext.ValueIterator) {
> ((ReduceContext.ValueIterator<IntWritable>)iter).resetBackupStore();
> }
> }
> } finally {
> cleanup(context);
> }
> }
>
> protected void cleanup(Context context)
> throws IOException, InterruptedException {
> parser.cleanup(context);
> }
> }
>
> /** Identity mapper set by the user. */
> public static class MyFullyIndentityMapper
> extends Mapper<Object, Text, Text, IntWritable>{
>
> private Text word = new Text();
> private IntWritable val = new IntWritable();
>
> public void map(Object key, Text value, Context context
> ) throws IOException, InterruptedException {
>
> StringTokenizer itr = new StringTokenizer(value.toString());
> word.set(itr.nextToken());
> val.set(Integer.valueOf(itr.nextToken()));
> context.write(word, val);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKeyValue()) {
> System.out.println("Key ( " + context.getCurrentKey().getClass().getName() + " ): " + context.getCurrentKey()
> + " Value (" + context.getCurrentValue().getClass().getName() + "): " + context.getCurrentValue());
> map(context.getCurrentKey(), context.getCurrentValue(), context);
> }
> } finally {
> cleanup(context);
> }
> }
> }
>
> public static void main(String[] args) throws Exception {
> GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args);
>
> String[] otherArgs = parser.getRemainingArgs();
> if (otherArgs.length < 2) {
> System.err.println("Usage: wordcount [<in>...] <out>");
> System.exit(2);
> }
>
> // path that contains the file with all attributes necessary to the execution of the job
> Medusa execution = new Medusa(args);
>
> // first map tasks
> JobConf conf = new JobConf(MyWordCount.class);
> conf.setJobName("wordcount");
> conf.setClass("mapreduce.job.map.identity.class", MyFullyIndentityMapper.class, Mapper.class);
> System.out.println(conf.toString());
>
> conf.setJarByClass(MyWordCount.class);
> conf.setMapperClass(MyMap.class);
> conf.setPartitionerClass(MyHashPartitioner.class);
> conf.setReducerClass(MyReducer.class);
> conf.setOutputKeyClass(Text.class);
> conf.setOutputValueClass(IntWritable.class);
> conf.setNumReduceTasks(1);
>
> List<Path> inputPaths = new ArrayList<Path>();
> for (int i = 0; i < otherArgs.length - 1; ++i) {
> inputPaths.add(new Path(otherArgs[i]));
> }
> Path outputPath = new Path(otherArgs[otherArgs.length - 1]);
> execution.setInputPath(inputPaths);
> execution.setOutputPath(outputPath);
>
> // launch the job directly
> execution.submit(new Job(conf));
> }
> }
>
>
>
Re: Interface expected in the map definition?
Posted by "sreebalineni ." <sr...@gmail.com>.
That must be jar files missing in classpath. check this link
http://stackoverflow.com/questions/15127082/running-map-reduce-job-on-cdh4-example
On Wed, Sep 2, 2015 at 8:57 PM, xeonmailinglist <xe...@gmail.com>
wrote:
> I am setting my wordcount example, which is very similar to the Wordcount
> example that we find in the Internet.
>
> 1.
>
> The MyMap class extends and implements the same classes as the ones
> defined in the original wordcount example, but in my case I get the error
> of “Interface expected here”. I really don’t understand why I get this
> error. See my example below [1]. Any help here?
> 2.
>
> Is it possible to access the JobConf variable inside the map or reduce
> methods?
>
>
> [1] My Wordcount example
>
>
> package org.apache.hadoop.mapred.examples;
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.*;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.ReduceContext;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.util.GenericOptionsParser;
>
> import java.io.IOException;
> import java.util.ArrayList;
> import java.util.Iterator;
> import java.util.List;
> import java.util.StringTokenizer;
>
> /**
> * My example of a common wordcount. Compare with the official WordCount.class to understand the differences between both classes.
> */
> public class MyWordCount {
>
> public static class MyMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { <<<< Interface expected here!!!
> private final static IntWritable one = new IntWritable(1);
> private Text word = new Text();
>
> public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
> String line = value.toString();
> StringTokenizer tokenizer = new StringTokenizer(line);
> while (tokenizer.hasMoreTokens()) {
> word.set(tokenizer.nextToken());
> output.collect(word, one);
> }
> }
> }
>
> public static class MyReducer
> extends Reducer<Text,IntWritable,Text,IntWritable> {
> private IntWritable result = new IntWritable();
> MedusaDigests parser = new MedusaDigests();
>
> public void reduce(Text key, Iterable<IntWritable> values,
> Context context
> ) throws IOException, InterruptedException {
> int sum = 0;
> for (IntWritable val : values) {
> System.out.println(" - key ( " + key.getClass().toString() + "): " + key.toString()
> + " value ( " + val.getClass().toString() + " ): " + val.toString());
> sum += val.get();
> }
> result.set(sum);
> context.write(key, result);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKey()) {
> System.out.println("Key: " + context.getCurrentKey());
> reduce(context.getCurrentKey(), context.getValues(), context);
> // If a back up store is used, reset it
> Iterator<IntWritable> iter = context.getValues().iterator();
> if(iter instanceof ReduceContext.ValueIterator) {
> ((ReduceContext.ValueIterator<IntWritable>)iter).resetBackupStore();
> }
> }
> } finally {
> cleanup(context);
> }
> }
>
> protected void cleanup(Context context)
> throws IOException, InterruptedException {
> parser.cleanup(context);
> }
> }
>
> /** Identity mapper set by the user. */
> public static class MyFullyIndentityMapper
> extends Mapper<Object, Text, Text, IntWritable>{
>
> private Text word = new Text();
> private IntWritable val = new IntWritable();
>
> public void map(Object key, Text value, Context context
> ) throws IOException, InterruptedException {
>
> StringTokenizer itr = new StringTokenizer(value.toString());
> word.set(itr.nextToken());
> val.set(Integer.valueOf(itr.nextToken()));
> context.write(word, val);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKeyValue()) {
> System.out.println("Key ( " + context.getCurrentKey().getClass().getName() + " ): " + context.getCurrentKey()
> + " Value (" + context.getCurrentValue().getClass().getName() + "): " + context.getCurrentValue());
> map(context.getCurrentKey(), context.getCurrentValue(), context);
> }
> } finally {
> cleanup(context);
> }
> }
> }
>
> public static void main(String[] args) throws Exception {
> GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args);
>
> String[] otherArgs = parser.getRemainingArgs();
> if (otherArgs.length < 2) {
> System.err.println("Usage: wordcount [<in>...] <out>");
> System.exit(2);
> }
>
> // path that contains the file with all attributes necessary to the execution of the job
> Medusa execution = new Medusa(args);
>
> // first map tasks
> JobConf conf = new JobConf(MyWordCount.class);
> conf.setJobName("wordcount");
> conf.setClass("mapreduce.job.map.identity.class", MyFullyIndentityMapper.class, Mapper.class);
> System.out.println(conf.toString());
>
> conf.setJarByClass(MyWordCount.class);
> conf.setMapperClass(MyMap.class);
> conf.setPartitionerClass(MyHashPartitioner.class);
> conf.setReducerClass(MyReducer.class);
> conf.setOutputKeyClass(Text.class);
> conf.setOutputValueClass(IntWritable.class);
> conf.setNumReduceTasks(1);
>
> List<Path> inputPaths = new ArrayList<Path>();
> for (int i = 0; i < otherArgs.length - 1; ++i) {
> inputPaths.add(new Path(otherArgs[i]));
> }
> Path outputPath = new Path(otherArgs[otherArgs.length - 1]);
> execution.setInputPath(inputPaths);
> execution.setOutputPath(outputPath);
>
> // launch the job directly
> execution.submit(new Job(conf));
> }
> }
>
>
>
Re: Interface expected in the map definition?
Posted by "sreebalineni ." <sr...@gmail.com>.
That must be jar files missing in classpath. check this link
http://stackoverflow.com/questions/15127082/running-map-reduce-job-on-cdh4-example
On Wed, Sep 2, 2015 at 8:57 PM, xeonmailinglist <xe...@gmail.com>
wrote:
> I am setting my wordcount example, which is very similar to the Wordcount
> example that we find in the Internet.
>
> 1.
>
> The MyMap class extends and implements the same classes as the ones
> defined in the original wordcount example, but in my case I get the error
> of “Interface expected here”. I really don’t understand why I get this
> error. See my example below [1]. Any help here?
> 2.
>
> Is it possible to access the JobConf variable inside the map or reduce
> methods?
>
>
> [1] My Wordcount example
>
>
> package org.apache.hadoop.mapred.examples;
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.IntWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.*;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.ReduceContext;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.util.GenericOptionsParser;
>
> import java.io.IOException;
> import java.util.ArrayList;
> import java.util.Iterator;
> import java.util.List;
> import java.util.StringTokenizer;
>
> /**
> * My example of a common wordcount. Compare with the official WordCount.class to understand the differences between both classes.
> */
> public class MyWordCount {
>
> public static class MyMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { <<<< Interface expected here!!!
> private final static IntWritable one = new IntWritable(1);
> private Text word = new Text();
>
> public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
> String line = value.toString();
> StringTokenizer tokenizer = new StringTokenizer(line);
> while (tokenizer.hasMoreTokens()) {
> word.set(tokenizer.nextToken());
> output.collect(word, one);
> }
> }
> }
>
> public static class MyReducer
> extends Reducer<Text,IntWritable,Text,IntWritable> {
> private IntWritable result = new IntWritable();
> MedusaDigests parser = new MedusaDigests();
>
> public void reduce(Text key, Iterable<IntWritable> values,
> Context context
> ) throws IOException, InterruptedException {
> int sum = 0;
> for (IntWritable val : values) {
> System.out.println(" - key ( " + key.getClass().toString() + "): " + key.toString()
> + " value ( " + val.getClass().toString() + " ): " + val.toString());
> sum += val.get();
> }
> result.set(sum);
> context.write(key, result);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKey()) {
> System.out.println("Key: " + context.getCurrentKey());
> reduce(context.getCurrentKey(), context.getValues(), context);
> // If a back up store is used, reset it
> Iterator<IntWritable> iter = context.getValues().iterator();
> if(iter instanceof ReduceContext.ValueIterator) {
> ((ReduceContext.ValueIterator<IntWritable>)iter).resetBackupStore();
> }
> }
> } finally {
> cleanup(context);
> }
> }
>
> protected void cleanup(Context context)
> throws IOException, InterruptedException {
> parser.cleanup(context);
> }
> }
>
> /** Identity mapper set by the user. */
> public static class MyFullyIndentityMapper
> extends Mapper<Object, Text, Text, IntWritable>{
>
> private Text word = new Text();
> private IntWritable val = new IntWritable();
>
> public void map(Object key, Text value, Context context
> ) throws IOException, InterruptedException {
>
> StringTokenizer itr = new StringTokenizer(value.toString());
> word.set(itr.nextToken());
> val.set(Integer.valueOf(itr.nextToken()));
> context.write(word, val);
> }
>
> public void run(Context context) throws IOException, InterruptedException {
> setup(context);
> try {
> while (context.nextKeyValue()) {
> System.out.println("Key ( " + context.getCurrentKey().getClass().getName() + " ): " + context.getCurrentKey()
> + " Value (" + context.getCurrentValue().getClass().getName() + "): " + context.getCurrentValue());
> map(context.getCurrentKey(), context.getCurrentValue(), context);
> }
> } finally {
> cleanup(context);
> }
> }
> }
>
> public static void main(String[] args) throws Exception {
> GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args);
>
> String[] otherArgs = parser.getRemainingArgs();
> if (otherArgs.length < 2) {
> System.err.println("Usage: wordcount [<in>...] <out>");
> System.exit(2);
> }
>
> // path that contains the file with all attributes necessary to the execution of the job
> Medusa execution = new Medusa(args);
>
> // first map tasks
> JobConf conf = new JobConf(MyWordCount.class);
> conf.setJobName("wordcount");
> conf.setClass("mapreduce.job.map.identity.class", MyFullyIndentityMapper.class, Mapper.class);
> System.out.println(conf.toString());
>
> conf.setJarByClass(MyWordCount.class);
> conf.setMapperClass(MyMap.class);
> conf.setPartitionerClass(MyHashPartitioner.class);
> conf.setReducerClass(MyReducer.class);
> conf.setOutputKeyClass(Text.class);
> conf.setOutputValueClass(IntWritable.class);
> conf.setNumReduceTasks(1);
>
> List<Path> inputPaths = new ArrayList<Path>();
> for (int i = 0; i < otherArgs.length - 1; ++i) {
> inputPaths.add(new Path(otherArgs[i]));
> }
> Path outputPath = new Path(otherArgs[otherArgs.length - 1]);
> execution.setInputPath(inputPaths);
> execution.setOutputPath(outputPath);
>
> // launch the job directly
> execution.submit(new Job(conf));
> }
> }
>
>
>