You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-user@hadoop.apache.org by Vikas Jadhav <vi...@gmail.com> on 2013/01/29 13:00:01 UTC

Issue with Reduce Side join using datajoin package

I am using Hadoop 1.0.3

I am getting following Error


13/01/29 06:55:19 INFO mapred.JobClient: Task Id :
attempt_201301290120_0006_r_000000_0, Status : FAILED
java.lang.NullPointerException
        at MyJoin$TaggedWritable.readFields(MyJoin.java:101)
        at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
        at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
        at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
        at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
        at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
        at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
        at
org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106)
        at
org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129)
        at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)




It is poiting to  String dataClz = in.readUTF(); this line in readFields


*

public
*

*void* readFields( DataInput in) *throws* IOException

{

*this*.tag.readFields( in);

//String dataClz = in.readUTF();

String dataClz = in.readUTF(); ----> error log show this line is culprit

*try*
* *

{

//try - catch is needed because the " error: unreported exception

//ClassNotFoundException; must be caught or declared to be thrown"

//is "raised" from compiler

*if*( *this*.data == *null* || !*this*.data.getClass().getName().equals(
dataClz))

{

//this line of code "raises" the compile error mentioned above

*this*.data = (Writable) ReflectionUtils.*newInstance*( Class.*forName*(
dataClz), *null*);

}

*this*.data.readFields( in);

}

*catch*( ClassNotFoundException cnfe)

{

System.*out*.println( "Problem in TaggedWritable class, method readFields."
);

}

}//end readFields


-- 
*
*
*

Thanx and Regards*
* Vikas Jadhav*

Fwd: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
---------- Forwarded message ----------
From: Vikas Jadhav <vi...@gmail.com>
Date: Thu, Jan 31, 2013 at 11:14 PM
Subject: Re: Issue with Reduce Side join using datajoin package
To: user@hadoop.apache.org


***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)

    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)

    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)

    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)





-- 
*
*
*

Thanx and Regards*
* Vikas Jadhav*

Fwd: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
---------- Forwarded message ----------
From: Vikas Jadhav <vi...@gmail.com>
Date: Thu, Jan 31, 2013 at 11:14 PM
Subject: Re: Issue with Reduce Side join using datajoin package
To: user@hadoop.apache.org


***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)

    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)

    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)

    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)





-- 
*
*
*

Thanx and Regards*
* Vikas Jadhav*

Fwd: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
---------- Forwarded message ----------
From: Vikas Jadhav <vi...@gmail.com>
Date: Thu, Jan 31, 2013 at 11:14 PM
Subject: Re: Issue with Reduce Side join using datajoin package
To: user@hadoop.apache.org


***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)

    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)

    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)

    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)





-- 
*
*
*

Thanx and Regards*
* Vikas Jadhav*

Fwd: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
---------- Forwarded message ----------
From: Vikas Jadhav <vi...@gmail.com>
Date: Thu, Jan 31, 2013 at 11:14 PM
Subject: Re: Issue with Reduce Side join using datajoin package
To: user@hadoop.apache.org


***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)

    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)

    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)

    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)





-- 
*
*
*

Thanx and Regards*
* Vikas Jadhav*

Re: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)
    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

Re: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)
    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

Re: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)
    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

Re: Issue with Reduce Side join using datajoin package

Posted by Vikas Jadhav <vi...@gmail.com>.
***************source ****************



public class MyJoin extends Configured implements Tool {



    public static class MapClass extends DataJoinMapperBase {

        protected Text generateInputTag(String inputFile) {
            System.out.println("Starting generateInputTag() : "+inputFile);
            String datasource = inputFile.split("-")[0];
            return new Text(datasource);
        }

        protected Text generateGroupKey(TaggedMapOutput aRecord) {
            System.out.println(" Statring generateGroupKey() : "+aRecord);
            String line = ((Text) aRecord.getData()).toString();
            String[] tokens = line.split(",");
            String groupKey = tokens[0];
            return new Text(groupKey);
        }

        protected TaggedMapOutput generateTaggedMapOutput(Object value) {
            System.out.println("starting  generateTaggedMapOutput() value
: "+value);
            TaggedWritable retv = new TaggedWritable((Text) value);
            retv.setTag(this.inputTag);
            return retv;
        }
    }

    public static class Reduce extends DataJoinReducerBase {

        protected TaggedMapOutput combine(Object[] tags, Object[] values) {
             System.out.println("combine :");
            if (tags.length < 2) return null;
            String joinedStr = "";
            for (int i=0; i<values.length; i++) {
                if (i > 0) joinedStr += ",";
                TaggedWritable tw = (TaggedWritable) values[i];
                String line = ((Text) tw.getData()).toString();
                String[] tokens = line.split(",", 2);
                joinedStr += tokens[1];
            }
            TaggedWritable retv = new TaggedWritable(new Text(joinedStr));
            retv.setTag((Text) tags[0]);
            return retv;
        }
    }

    public static class TaggedWritable extends TaggedMapOutput {

        private Writable data;


        public TaggedWritable()
        {
            this.tag = new Text();

        }//end empty( taking no parameters) constructor TaggedWritable

        public TaggedWritable(Writable data) {

            this.tag = new Text("");
            this.data = data;
        }

        public Writable getData() {
            return data;
        }

        public void write(DataOutput out) throws IOException {
            //System.out.println(");
            this.tag.write(out);
            this.data.write(out);
            System.out.println("Tag  :"+tag+" Data  :"+ data);
        }

        /*
        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            this.data.readFields(in);
        } */


        public void readFields(DataInput in) throws IOException {
            System.out.println(" Starting short readFields(): "+ in);
            this.tag.readFields(in);
            String w = in.toString();
            if(this.data == null)
                try {
                    this.data =(Writable)
ReflectionUtils.newInstance(Class.forName(w), null);
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            this.data.readFields(in);
        }


    }
    public int run(String[] args) throws Exception {
        System.out.println("Starting run() Method:");
        Configuration conf = getConf();
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/core-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/mapred-site.xml"));
        conf.addResource(new
Path("/home/vikas/project/hadoop-1.0.3/conf/hdfs-site.xml"));
        JobConf job = new JobConf(conf, MyJoin.class);
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);

        FileInputFormat.setInputPaths(job, in);

        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("DataJoin_cust X order");

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TaggedWritable.class);
        job.set("mapred.textoutputformat.separator", ",");
        JobClient.runJob(job);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Starting main() function:");
        int res = ToolRunner.run(new Configuration(),
                                 new MyJoin(),
                                 args);

        System.exit(res);
    }
}



*************************and
error*********************************************


13/01/31 23:04:26 INFO util.NativeCodeLoader: Loaded the native-hadoop
library
13/01/31 23:04:26 WARN snappy.LoadSnappy: Snappy native library not loaded
13/01/31 23:04:26 INFO mapred.FileInputFormat: Total input paths to process
: 2
13/01/31 23:04:26 INFO mapred.JobClient: Running job: job_201301312254_0004
13/01/31 23:04:27 INFO mapred.JobClient:  map 0% reduce 0%
13/01/31 23:04:41 INFO mapred.JobClient:  map 66% reduce 0%
13/01/31 23:04:47 INFO mapred.JobClient:  map 100% reduce 0%
13/01/31 23:04:50 INFO mapred.JobClient:  map 100% reduce 22%
13/01/31 23:04:58 INFO mapred.JobClient: Task Id :
attempt_201301312254_0004_r_000000_0, Status : FAILED
java.lang.NullPointerException
    at MyJoin$TaggedWritable.readFields(MyJoin.java:125)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
    at
org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
    at
org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
    at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
    at
org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
    at
com.sas.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:107)
    at com.sas.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:132)
    at
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

Re: Issue with Reduce Side join using datajoin package

Posted by Vinod Kumar Vavilapalli <vi...@hortonworks.com>.
Seems like a bug in your code, can you share the source here?

+Vinod


On Tue, Jan 29, 2013 at 4:00 AM, Vikas Jadhav <vi...@gmail.com>wrote:

> I am using Hadoop 1.0.3
>
> I am getting following Error
>
>
> 13/01/29 06:55:19 INFO mapred.JobClient: Task Id :
> attempt_201301290120_0006_r_000000_0, Status : FAILED
> java.lang.NullPointerException
>         at MyJoin$TaggedWritable.readFields(MyJoin.java:101)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129)
>         at
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
>
>
> It is poiting to  String dataClz = in.readUTF(); this line in readFields
>
>
> *
>
> public
> *
>
> *void* readFields( DataInput in) *throws* IOException
>
> {
>
> *this*.tag.readFields( in);
>
> //String dataClz = in.readUTF();
>
> String dataClz = in.readUTF(); ----> error log show this line is culprit
>
> *try*
> * *
>
> {
>
> //try - catch is needed because the " error: unreported exception
>
> //ClassNotFoundException; must be caught or declared to be thrown"
>
> //is "raised" from compiler
>
> *if*( *this*.data == *null* || !*this*.data.getClass().getName().equals(
> dataClz))
>
> {
>
> //this line of code "raises" the compile error mentioned above
>
> *this*.data = (Writable) ReflectionUtils.*newInstance*( Class.*forName*(
> dataClz), *null*);
>
> }
>
> *this*.data.readFields( in);
>
> }
>
> *catch*( ClassNotFoundException cnfe)
>
> {
>
> System.*out*.println( "Problem in TaggedWritable class, method
> readFields.");
>
> }
>
> }//end readFields
>
>
> --
> *
> *
> *
>
> Thanx and Regards*
> * Vikas Jadhav*
>



-- 
+Vinod
Hortonworks Inc.
http://hortonworks.com/

Re: Issue with Reduce Side join using datajoin package

Posted by Vinod Kumar Vavilapalli <vi...@hortonworks.com>.
Seems like a bug in your code, can you share the source here?

+Vinod


On Tue, Jan 29, 2013 at 4:00 AM, Vikas Jadhav <vi...@gmail.com>wrote:

> I am using Hadoop 1.0.3
>
> I am getting following Error
>
>
> 13/01/29 06:55:19 INFO mapred.JobClient: Task Id :
> attempt_201301290120_0006_r_000000_0, Status : FAILED
> java.lang.NullPointerException
>         at MyJoin$TaggedWritable.readFields(MyJoin.java:101)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129)
>         at
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
>
>
> It is poiting to  String dataClz = in.readUTF(); this line in readFields
>
>
> *
>
> public
> *
>
> *void* readFields( DataInput in) *throws* IOException
>
> {
>
> *this*.tag.readFields( in);
>
> //String dataClz = in.readUTF();
>
> String dataClz = in.readUTF(); ----> error log show this line is culprit
>
> *try*
> * *
>
> {
>
> //try - catch is needed because the " error: unreported exception
>
> //ClassNotFoundException; must be caught or declared to be thrown"
>
> //is "raised" from compiler
>
> *if*( *this*.data == *null* || !*this*.data.getClass().getName().equals(
> dataClz))
>
> {
>
> //this line of code "raises" the compile error mentioned above
>
> *this*.data = (Writable) ReflectionUtils.*newInstance*( Class.*forName*(
> dataClz), *null*);
>
> }
>
> *this*.data.readFields( in);
>
> }
>
> *catch*( ClassNotFoundException cnfe)
>
> {
>
> System.*out*.println( "Problem in TaggedWritable class, method
> readFields.");
>
> }
>
> }//end readFields
>
>
> --
> *
> *
> *
>
> Thanx and Regards*
> * Vikas Jadhav*
>



-- 
+Vinod
Hortonworks Inc.
http://hortonworks.com/

Re: Issue with Reduce Side join using datajoin package

Posted by Vinod Kumar Vavilapalli <vi...@hortonworks.com>.
Seems like a bug in your code, can you share the source here?

+Vinod


On Tue, Jan 29, 2013 at 4:00 AM, Vikas Jadhav <vi...@gmail.com>wrote:

> I am using Hadoop 1.0.3
>
> I am getting following Error
>
>
> 13/01/29 06:55:19 INFO mapred.JobClient: Task Id :
> attempt_201301290120_0006_r_000000_0, Status : FAILED
> java.lang.NullPointerException
>         at MyJoin$TaggedWritable.readFields(MyJoin.java:101)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129)
>         at
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
>
>
> It is poiting to  String dataClz = in.readUTF(); this line in readFields
>
>
> *
>
> public
> *
>
> *void* readFields( DataInput in) *throws* IOException
>
> {
>
> *this*.tag.readFields( in);
>
> //String dataClz = in.readUTF();
>
> String dataClz = in.readUTF(); ----> error log show this line is culprit
>
> *try*
> * *
>
> {
>
> //try - catch is needed because the " error: unreported exception
>
> //ClassNotFoundException; must be caught or declared to be thrown"
>
> //is "raised" from compiler
>
> *if*( *this*.data == *null* || !*this*.data.getClass().getName().equals(
> dataClz))
>
> {
>
> //this line of code "raises" the compile error mentioned above
>
> *this*.data = (Writable) ReflectionUtils.*newInstance*( Class.*forName*(
> dataClz), *null*);
>
> }
>
> *this*.data.readFields( in);
>
> }
>
> *catch*( ClassNotFoundException cnfe)
>
> {
>
> System.*out*.println( "Problem in TaggedWritable class, method
> readFields.");
>
> }
>
> }//end readFields
>
>
> --
> *
> *
> *
>
> Thanx and Regards*
> * Vikas Jadhav*
>



-- 
+Vinod
Hortonworks Inc.
http://hortonworks.com/

Re: Issue with Reduce Side join using datajoin package

Posted by Vinod Kumar Vavilapalli <vi...@hortonworks.com>.
Seems like a bug in your code, can you share the source here?

+Vinod


On Tue, Jan 29, 2013 at 4:00 AM, Vikas Jadhav <vi...@gmail.com>wrote:

> I am using Hadoop 1.0.3
>
> I am getting following Error
>
>
> 13/01/29 06:55:19 INFO mapred.JobClient: Task Id :
> attempt_201301290120_0006_r_000000_0, Status : FAILED
> java.lang.NullPointerException
>         at MyJoin$TaggedWritable.readFields(MyJoin.java:101)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67)
>         at
> org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271)
>         at
> org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249)
>         at
> org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106)
>         at
> org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129)
>         at
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519)
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
>
>
> It is poiting to  String dataClz = in.readUTF(); this line in readFields
>
>
> *
>
> public
> *
>
> *void* readFields( DataInput in) *throws* IOException
>
> {
>
> *this*.tag.readFields( in);
>
> //String dataClz = in.readUTF();
>
> String dataClz = in.readUTF(); ----> error log show this line is culprit
>
> *try*
> * *
>
> {
>
> //try - catch is needed because the " error: unreported exception
>
> //ClassNotFoundException; must be caught or declared to be thrown"
>
> //is "raised" from compiler
>
> *if*( *this*.data == *null* || !*this*.data.getClass().getName().equals(
> dataClz))
>
> {
>
> //this line of code "raises" the compile error mentioned above
>
> *this*.data = (Writable) ReflectionUtils.*newInstance*( Class.*forName*(
> dataClz), *null*);
>
> }
>
> *this*.data.readFields( in);
>
> }
>
> *catch*( ClassNotFoundException cnfe)
>
> {
>
> System.*out*.println( "Problem in TaggedWritable class, method
> readFields.");
>
> }
>
> }//end readFields
>
>
> --
> *
> *
> *
>
> Thanx and Regards*
> * Vikas Jadhav*
>



-- 
+Vinod
Hortonworks Inc.
http://hortonworks.com/