You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-user@hadoop.apache.org by Paul van Hoven <pa...@googlemail.com> on 2012/12/18 17:18:58 UTC

Job does not finish due to java.lang.ClassCastException: class java.util.Date

Hi,

I wrote a small java program for processing some log files (line after
line separated by newline). I'm using ecm on amazon to perform my job.
I wrote the following code:

public static class Map extends Mapper<LongWritable, Text, Date, Object> {

		private LineParser lineParser = new LineParser();

		public void map( LongWritable key, Text value, Context context )
throws IOException, InterruptedException {

			Click click = lineParser.parseClick( value.toString() );
			if( click != null ) {
				context.write( click.timestamp, click );
				return;
			}
			
			Conversion conversion = lineParser.parseConversion( value.toString() );
			if( conversion != null ) {
				context.write( conversion.timestamp, conversion );
				return;
			}

		}
	}



	public static class Reduce extends Reducer<Date, Object, Text, Text> {

		public void reduce( Date key, Iterable<Object> values, Context
context) throws IOException, InterruptedException {

			SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
			for( Object obj : values ) {
				if( obj.getClass().getName().equals("myHadoopProject.Click") )
					context.write( new Text( sdf.format( key ) ), new Text( ((Click)
obj).toHadoopString() ) );
				else
					context.write( new Text( sdf.format( key ) ), new Text( ((Click)
obj).toHadoopString() ) );
			}
			
		}

	}



	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();

		Job job = new Job(conf, "MyHadoopJob");
		job.setJarByClass(MyHadoopClass.class);

		job.setOutputKeyClass(Date.class);
		job.setOutputValueClass(Object.class);

		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);

		FileInputFormat.addInputPath( job, new Path( args[0] ) );
		FileOutputFormat.setOutputPath( job, new Path( args[1] ) );

		job.waitForCompletion(true);
	}


Unfortunately I get the following Exception and the job fails:


java.lang.ClassCastException: class java.util.Date
	at java.lang.Class.asSubclass(Class.java:3018)
	at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
	at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
	at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
	at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
	at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
	at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:396)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
	at org.apache.hadoop.mapred.Child.main(Child.java:249)


What is wrong with my code?

Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date

Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).

You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].

[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description

On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
>                 private LineParser lineParser = new LineParser();
>
>                 public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
>                         Click click = lineParser.parseClick( value.toString() );
>                         if( click != null ) {
>                                 context.write( click.timestamp, click );
>                                 return;
>                         }
>
>                         Conversion conversion = lineParser.parseConversion( value.toString() );
>                         if( conversion != null ) {
>                                 context.write( conversion.timestamp, conversion );
>                                 return;
>                         }
>
>                 }
>         }
>
>
>
>         public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
>                 public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
>                         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
>                         for( Object obj : values ) {
>                                 if( obj.getClass().getName().equals("myHadoopProject.Click") )
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                                 else
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                         }
>
>                 }
>
>         }
>
>
>
>         public static void main(String[] args) throws Exception {
>                 Configuration conf = new Configuration();
>
>                 Job job = new Job(conf, "MyHadoopJob");
>                 job.setJarByClass(MyHadoopClass.class);
>
>                 job.setOutputKeyClass(Date.class);
>                 job.setOutputValueClass(Object.class);
>
>                 job.setMapperClass(Map.class);
>                 job.setReducerClass(Reduce.class);
>
>                 job.setInputFormatClass(TextInputFormat.class);
>                 job.setOutputFormatClass(TextOutputFormat.class);
>
>                 FileInputFormat.addInputPath( job, new Path( args[0] ) );
>                 FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
>                 job.waitForCompletion(true);
>         }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
>         at java.lang.Class.asSubclass(Class.java:3018)
>         at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
>         at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
>         at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
>         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?



-- 
Harsh J

Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date

Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).

You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].

[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description

On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
>                 private LineParser lineParser = new LineParser();
>
>                 public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
>                         Click click = lineParser.parseClick( value.toString() );
>                         if( click != null ) {
>                                 context.write( click.timestamp, click );
>                                 return;
>                         }
>
>                         Conversion conversion = lineParser.parseConversion( value.toString() );
>                         if( conversion != null ) {
>                                 context.write( conversion.timestamp, conversion );
>                                 return;
>                         }
>
>                 }
>         }
>
>
>
>         public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
>                 public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
>                         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
>                         for( Object obj : values ) {
>                                 if( obj.getClass().getName().equals("myHadoopProject.Click") )
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                                 else
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                         }
>
>                 }
>
>         }
>
>
>
>         public static void main(String[] args) throws Exception {
>                 Configuration conf = new Configuration();
>
>                 Job job = new Job(conf, "MyHadoopJob");
>                 job.setJarByClass(MyHadoopClass.class);
>
>                 job.setOutputKeyClass(Date.class);
>                 job.setOutputValueClass(Object.class);
>
>                 job.setMapperClass(Map.class);
>                 job.setReducerClass(Reduce.class);
>
>                 job.setInputFormatClass(TextInputFormat.class);
>                 job.setOutputFormatClass(TextOutputFormat.class);
>
>                 FileInputFormat.addInputPath( job, new Path( args[0] ) );
>                 FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
>                 job.waitForCompletion(true);
>         }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
>         at java.lang.Class.asSubclass(Class.java:3018)
>         at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
>         at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
>         at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
>         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?



-- 
Harsh J

Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date

Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).

You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].

[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description

On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
>                 private LineParser lineParser = new LineParser();
>
>                 public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
>                         Click click = lineParser.parseClick( value.toString() );
>                         if( click != null ) {
>                                 context.write( click.timestamp, click );
>                                 return;
>                         }
>
>                         Conversion conversion = lineParser.parseConversion( value.toString() );
>                         if( conversion != null ) {
>                                 context.write( conversion.timestamp, conversion );
>                                 return;
>                         }
>
>                 }
>         }
>
>
>
>         public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
>                 public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
>                         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
>                         for( Object obj : values ) {
>                                 if( obj.getClass().getName().equals("myHadoopProject.Click") )
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                                 else
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                         }
>
>                 }
>
>         }
>
>
>
>         public static void main(String[] args) throws Exception {
>                 Configuration conf = new Configuration();
>
>                 Job job = new Job(conf, "MyHadoopJob");
>                 job.setJarByClass(MyHadoopClass.class);
>
>                 job.setOutputKeyClass(Date.class);
>                 job.setOutputValueClass(Object.class);
>
>                 job.setMapperClass(Map.class);
>                 job.setReducerClass(Reduce.class);
>
>                 job.setInputFormatClass(TextInputFormat.class);
>                 job.setOutputFormatClass(TextOutputFormat.class);
>
>                 FileInputFormat.addInputPath( job, new Path( args[0] ) );
>                 FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
>                 job.waitForCompletion(true);
>         }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
>         at java.lang.Class.asSubclass(Class.java:3018)
>         at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
>         at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
>         at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
>         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?



-- 
Harsh J

Re: Job does not finish due to java.lang.ClassCastException: class java.util.Date

Posted by Harsh J <ha...@cloudera.com>.
Hadoop, out of the box, does not serialize/understand java native
types (such as String, Date, etc.).

You will need to use Writable types [1], or use Avro serialization if
want some work done for free already [2].

[1] - Hadoop: The Definitive Guide, chapter 2 and 4 probably. Also see
http://developer.yahoo.com/hadoop/tutorial/module4.html. Notably also
http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html
[2] - Apache Avro described at http://avro.apache.org and MR explained
at http://avro.apache.org/docs/1.7.3/api/java/org/apache/avro/mapred/package-summary.html#package_description

On Tue, Dec 18, 2012 at 9:48 PM, Paul van Hoven
<pa...@googlemail.com> wrote:
> Hi,
>
> I wrote a small java program for processing some log files (line after
> line separated by newline). I'm using ecm on amazon to perform my job.
> I wrote the following code:
>
> public static class Map extends Mapper<LongWritable, Text, Date, Object> {
>
>                 private LineParser lineParser = new LineParser();
>
>                 public void map( LongWritable key, Text value, Context context )
> throws IOException, InterruptedException {
>
>                         Click click = lineParser.parseClick( value.toString() );
>                         if( click != null ) {
>                                 context.write( click.timestamp, click );
>                                 return;
>                         }
>
>                         Conversion conversion = lineParser.parseConversion( value.toString() );
>                         if( conversion != null ) {
>                                 context.write( conversion.timestamp, conversion );
>                                 return;
>                         }
>
>                 }
>         }
>
>
>
>         public static class Reduce extends Reducer<Date, Object, Text, Text> {
>
>                 public void reduce( Date key, Iterable<Object> values, Context
> context) throws IOException, InterruptedException {
>
>                         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z");
>                         for( Object obj : values ) {
>                                 if( obj.getClass().getName().equals("myHadoopProject.Click") )
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                                 else
>                                         context.write( new Text( sdf.format( key ) ), new Text( ((Click)
> obj).toHadoopString() ) );
>                         }
>
>                 }
>
>         }
>
>
>
>         public static void main(String[] args) throws Exception {
>                 Configuration conf = new Configuration();
>
>                 Job job = new Job(conf, "MyHadoopJob");
>                 job.setJarByClass(MyHadoopClass.class);
>
>                 job.setOutputKeyClass(Date.class);
>                 job.setOutputValueClass(Object.class);
>
>                 job.setMapperClass(Map.class);
>                 job.setReducerClass(Reduce.class);
>
>                 job.setInputFormatClass(TextInputFormat.class);
>                 job.setOutputFormatClass(TextOutputFormat.class);
>
>                 FileInputFormat.addInputPath( job, new Path( args[0] ) );
>                 FileOutputFormat.setOutputPath( job, new Path( args[1] ) );
>
>                 job.waitForCompletion(true);
>         }
>
>
> Unfortunately I get the following Exception and the job fails:
>
>
> java.lang.ClassCastException: class java.util.Date
>         at java.lang.Class.asSubclass(Class.java:3018)
>         at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:786)
>         at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:975)
>         at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:681)
>         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:375)
>         at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1132)
>         at org.apache.hadoop.mapred.Child.main(Child.java:249)
>
>
> What is wrong with my code?



-- 
Harsh J