You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-user@hadoop.apache.org by Paul van Hoven <pa...@googlemail.com> on 2012/12/18 17:18:58 UTC
Job does not finish due to java.lang.ClassCastException: class java.util.Date
Hi,
I wrote a small java program for processing some log files (line after
line separated by newline). I'm using ecm on amazon to perform my job.
I wrote the following code:
public static class Map extends Mapper<LongWritable, Text, Date, Object> {
private LineParser lineParser = new LineParser();
public void map( LongWritable key, Text value, Context context )
throws IOException, InterruptedException {
Click click = lineParser.parseClick( value.toString() );
if( click != null ) {
context.write( click.timestamp, click );
return;
}
Conversion conversion = lineParser.parseConversion( value.toString() );
if( conversion != null ) {
context.write( conversion.timestamp, conversion );
return;
}
}
}
public static class Reduce extends Reducer<Date, Object, Text, Text> {
public void reduce( Date key, Iterable<Object> values, Context
context) throws IOException, InterruptedException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
for( Object obj : values ) {
if( obj.getClass().getName().equals("myHadoopProject.Click") )
context.write( new Text( sdf.format( key ) ), new Text( ((Click)
obj).toHadoopString() ) );
else
context.write( new Text( sdf.format( key ) ), new Text( ((Click)
obj).toHadoopString() ) );
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "MyHadoopJob");
job.setJarByClass(MyHadoopClass.class);
job.setOutputKeyClass(Date.class);
job.setOutputValueClass(Object.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath( job, new Path( args[0] ) );
FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
job.waitForCompletion(true);
}
Unfortunately I get the following Exception and the job fails:
java.lang.ClassCastException: class java.util.Date
at java.lang.Class.asSubclass(Class.java:3018)
at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
What is wrong with my code?
Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date
Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).
You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].
[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description
On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
> private LineParser lineParser = new LineParser();
>
> public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
> Click click = lineParser.parseClick( value.toString() );
> if( click != null ) {
> context.write( click.timestamp, click );
> return;
> }
>
> Conversion conversion = lineParser.parseConversion( value.toString() );
> if( conversion != null ) {
> context.write( conversion.timestamp, conversion );
> return;
> }
>
> }
> }
>
>
>
> public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
> public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
> SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
> for( Object obj : values ) {
> if( obj.getClass().getName().equals("myHadoopProject.Click") )
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> else
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> }
>
> }
>
> }
>
>
>
> public static void main(String[] args) throws Exception {
> Configuration conf = new Configuration();
>
> Job job = new Job(conf, "MyHadoopJob");
> job.setJarByClass(MyHadoopClass.class);
>
> job.setOutputKeyClass(Date.class);
> job.setOutputValueClass(Object.class);
>
> job.setMapperClass(Map.class);
> job.setReducerClass(Reduce.class);
>
> job.setInputFormatClass(TextInputFormat.class);
> job.setOutputFormatClass(TextOutputFormat.class);
>
> FileInputFormat.addInputPath( job, new Path( args[0] ) );
> FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
> job.waitForCompletion(true);
> }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
> at java.lang.Class.asSubclass(Class.java:3018)
> at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
> at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
> at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
> at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
> at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?
--
Harsh J
Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date
Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).
You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].
[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description
On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
> private LineParser lineParser = new LineParser();
>
> public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
> Click click = lineParser.parseClick( value.toString() );
> if( click != null ) {
> context.write( click.timestamp, click );
> return;
> }
>
> Conversion conversion = lineParser.parseConversion( value.toString() );
> if( conversion != null ) {
> context.write( conversion.timestamp, conversion );
> return;
> }
>
> }
> }
>
>
>
> public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
> public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
> SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
> for( Object obj : values ) {
> if( obj.getClass().getName().equals("myHadoopProject.Click") )
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> else
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> }
>
> }
>
> }
>
>
>
> public static void main(String[] args) throws Exception {
> Configuration conf = new Configuration();
>
> Job job = new Job(conf, "MyHadoopJob");
> job.setJarByClass(MyHadoopClass.class);
>
> job.setOutputKeyClass(Date.class);
> job.setOutputValueClass(Object.class);
>
> job.setMapperClass(Map.class);
> job.setReducerClass(Reduce.class);
>
> job.setInputFormatClass(TextInputFormat.class);
> job.setOutputFormatClass(TextOutputFormat.class);
>
> FileInputFormat.addInputPath( job, new Path( args[0] ) );
> FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
> job.waitForCompletion(true);
> }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
> at java.lang.Class.asSubclass(Class.java:3018)
> at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
> at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
> at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
> at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
> at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?
--
Harsh J
Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date
Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).
You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].
[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description
On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
> private LineParser lineParser = new LineParser();
>
> public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
> Click click = lineParser.parseClick( value.toString() );
> if( click != null ) {
> context.write( click.timestamp, click );
> return;
> }
>
> Conversion conversion = lineParser.parseConversion( value.toString() );
> if( conversion != null ) {
> context.write( conversion.timestamp, conversion );
> return;
> }
>
> }
> }
>
>
>
> public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
> public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
> SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
> for( Object obj : values ) {
> if( obj.getClass().getName().equals("myHadoopProject.Click") )
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> else
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> }
>
> }
>
> }
>
>
>
> public static void main(String[] args) throws Exception {
> Configuration conf = new Configuration();
>
> Job job = new Job(conf, "MyHadoopJob");
> job.setJarByClass(MyHadoopClass.class);
>
> job.setOutputKeyClass(Date.class);
> job.setOutputValueClass(Object.class);
>
> job.setMapperClass(Map.class);
> job.setReducerClass(Reduce.class);
>
> job.setInputFormatClass(TextInputFormat.class);
> job.setOutputFormatClass(TextOutputFormat.class);
>
> FileInputFormat.addInputPath( job, new Path( args[0] ) );
> FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
> job.waitForCompletion(true);
> }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
> at java.lang.Class.asSubclass(Class.java:3018)
> at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
> at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
> at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
> at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
> at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?
--
Harsh J
Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date
Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).
You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].
[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description
On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
> private LineParser lineParser = new LineParser();
>
> public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
> Click click = lineParser.parseClick( value.toString() );
> if( click != null ) {
> context.write( click.timestamp, click );
> return;
> }
>
> Conversion conversion = lineParser.parseConversion( value.toString() );
> if( conversion != null ) {
> context.write( conversion.timestamp, conversion );
> return;
> }
>
> }
> }
>
>
>
> public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
> public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
> SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
> for( Object obj : values ) {
> if( obj.getClass().getName().equals("myHadoopProject.Click") )
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> else
> context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
> }
>
> }
>
> }
>
>
>
> public static void main(String[] args) throws Exception {
> Configuration conf = new Configuration();
>
> Job job = new Job(conf, "MyHadoopJob");
> job.setJarByClass(MyHadoopClass.class);
>
> job.setOutputKeyClass(Date.class);
> job.setOutputValueClass(Object.class);
>
> job.setMapperClass(Map.class);
> job.setReducerClass(Reduce.class);
>
> job.setInputFormatClass(TextInputFormat.class);
> job.setOutputFormatClass(TextOutputFormat.class);
>
> FileInputFormat.addInputPath( job, new Path( args[0] ) );
> FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
> job.waitForCompletion(true);
> }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
> at java.lang.Class.asSubclass(Class.java:3018)
> at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
> at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
> at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
> at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
> at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?
--
Harsh J