You are viewing a plain text version of this content. The canonical link for it is here.
Posted to general@hadoop.apache.org by Harshit Kumar <hk...@gmail.com> on 2009/09/03 00:04:15 UTC

Hadoop executing a custom WRITABLE type

Hi

I am executing a custom writable type called as Duo. It is a class that
implement the Writable Interface.

input is a text file, in which each record consist of 3 words. for ex:
hello come here
Where are you
How are you

In the Driver class, Mapper makes the first word as key, and create an
object of type Duo from the rest of 2 words.

Here is the code below


import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.NullOutputFormat;


   public class WordCount {

    public static class Map extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Duo> {
      private Text word = new Text();

       public void map(LongWritable key, Text value, OutputCollector<Text,
Duo> output, Reporter reporter) throws IOException {
           String line = value.toString();
           StringTokenizer tokenizer = new StringTokenizer(line);
           while (tokenizer.hasMoreTokens()) {
               word.set(tokenizer.nextToken());
               output.collect(word, new Duo("kumar","kumar"));
           }
       }
    }

    public static class Reduce extends MapReduceBase implements
Reducer<Text, Duo, Text, IntWritable> {

       public void reduce(Text key, Iterator<Duo> values,
OutputCollector<Text, IntWritable> output, Reporter reporter) throws
IOException {
       Configuration conf = new Configuration();
       FileSystem fs = FileSystem.get(conf);
       Path subjectFile = new Path(key.toString());
       FSDataOutputStream out = fs.create(subjectFile);
           while (values.hasNext()) {
           out.writeChars(key.toString());
           Duo d = values.next();
           out.writeChars(d.getProperty());
           out.writeChars(d.getObject());
           }
       }
    }

    public static void main(String[] args) throws Exception {
       System.out.println("1");
     JobConf conf = new JobConf(WordCount.class);
       conf.setJobName("wordcount");

       conf.setOutputKeyClass(Text.class);
       conf.setOutputValueClass(Duo.class);

       conf.setMapperClass(Map.class);
       conf.setCombinerClass(Reduce.class);
       conf.setReducerClass(Reduce.class);

       conf.setInputFormat(TextInputFormat.class);
       conf.setOutputFormat(NullOutputFormat.class);

       FileInputFormat.setInputPaths(conf, new Path(args[0]));
       FileOutputFormat.setOutputPath(conf, new Path(args[1]));
       System.out.println("2");
       JobClient.runJob(conf);
    }

   }


-----------------------------------
code for Duo class is as follows

import org.apache.hadoop.io.*;
import java.io.*;

//public class Duo implements WritableComparable<Duo>{
public class Duo implements Writable{
public String property;
public String object;
 public Duo(){
set(new String(),new String());
}
public Duo(String p, String o){
set(p,o);
}
 public void set(String p, String o){
property=p;
object=o;
}
 public String getProperty(){
return property;
}
 public String getObject(){
return object;
}
 public void write(DataOutput out) throws IOException {
out.writeChars(property);
out.writeChars(object);
}
 public void readFields(DataInput in) throws IOException {
property = in.readLine();
object = in.readLine();
}
 public int hashCode(){
return property.hashCode()* 163 +object.hashCode();
}
 public boolean equals(Object other){
if (other instanceof Duo){
Duo od = (Duo) other;
return property.equals(od.property) && object.equals(od);
}
return false;
}
 public String toString(){
return property + "\t" + object;
}
   public int compareTo(Duo other){
if (property.compareTo(other.property)==0)
return object.compareTo(other.object);
else
return property.compareTo(other.property);
}
 }

When I execute the driver program, the following error is displayed. Please
help me explain this problem and if possible suggest some solution also.

1
2
09/09/03 06:40:41 WARN mapred.JobClient: Use GenericOptionsParser for
parsing the arguments. Applications should implement Tool for the same.
09/09/03 06:40:42 INFO mapred.FileInputFormat: Total input paths to process
: 1
09/09/03 06:40:42 INFO mapred.FileInputFormat: Total input paths to process
: 1
09/09/03 06:40:43 INFO mapred.JobClient: Running job: job_200909021822_0046
09/09/03 06:40:44 INFO mapred.JobClient:  map 0% reduce 0%
09/09/03 06:40:50 INFO mapred.JobClient: Task Id :
attempt_200909021822_0046_m_000000_0, Status : FAILED
java.lang.RuntimeException: java.lang.RuntimeException:
java.lang.ClassNotFoundException: Duo
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:680)
at org.apache.hadoop.mapred.JobConf.getOutputValueClass(JobConf.java:681)
at org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:567)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:383)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:193)
at org.apache.hadoop.mapred.TaskTracker$Child.main(TaskTracker.java:2198)
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: Duo
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:648)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:672)
... 5 more
Caused by: java.lang.ClassNotFoundException: Duo
at java.net.URLClassLoader$1.run(URLClassLoader.java:200)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:188)
at java.lang.ClassLoader.loadClass(ClassLoader.java:307)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:252)
at java.lang.ClassLoader.loadClassInternal(ClassLoader.java:320)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:628)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:646)
... 6 more


I appreciate your efforts to read this message

Thanks
H. Kumar