You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "dongxu (JIRA)" <ji...@apache.org> on 2014/11/03 08:48:33 UTC

[jira] [Updated] (SPARK-4201) Can't use concat() on partition column in where condition (Hive compatibility problem)

     [ https://issues.apache.org/jira/browse/SPARK-4201?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

dongxu updated SPARK-4201:
--------------------------
    Description: 
The team used hive to query,we try to  move it to spark-sql.
when I search sentences like that. 
select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929';
It can't work ,but it work well in hive.
I have to rewrite the sql to  "select count(1) from  gulfstream_day_driver_base_2 where  year = 2014 and  month = 09 day= 29.
There are some error log.
14/11/03 15:05:03 ERROR SparkSQLDriver: Failed in [select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929']
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Aggregate false, [], [SUM(PartialCount#1390L) AS c_0#1337L]
 Exchange SinglePartition
  Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
   HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:360)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:360)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:415)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange SinglePartition
 Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
  HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 16 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
 HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:86)
	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 20 more
Caused by: org.apache.spark.SparkException: Task not serializable
	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166)
	at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158)
	at org.apache.spark.SparkContext.clean(SparkContext.scala:1242)
	at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:597)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 24 more
Caused by: java.io.NotSerializableException: org.apache.spark.sql.hive.HiveGenericUdf$DeferredObjectAdapter
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1164)
	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1346)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1154)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
	at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:42)
	at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:73)
	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:164)
	... 30 more


  was:
The team used hive to query,we try to  move it to spark-sql.
when I search sentences like that. 
select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929';
It can't work ,but it work well in hive.
I have to rewrite the sql to  "select count(1) from  gulfstream_day_driver_base_2 where  year = 2014 and  month = 09 day= 29.
There are some error logs.
14/11/03 15:05:03 ERROR SparkSQLDriver: Failed in [select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929']
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Aggregate false, [], [SUM(PartialCount#1390L) AS c_0#1337L]
 Exchange SinglePartition
  Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
   HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:360)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:360)
	at org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:415)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange SinglePartition
 Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
  HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 16 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
 HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:86)
	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 20 more
Caused by: org.apache.spark.SparkException: Task not serializable
	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166)
	at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158)
	at org.apache.spark.SparkContext.clean(SparkContext.scala:1242)
	at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:597)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
	... 24 more
Caused by: java.io.NotSerializableException: org.apache.spark.sql.hive.HiveGenericUdf$DeferredObjectAdapter
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1164)
	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1346)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1154)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
	at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:42)
	at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:73)
	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:164)
	... 30 more



> Can't use concat() on partition column in where condition (Hive compatibility problem)
> --------------------------------------------------------------------------------------
>
>                 Key: SPARK-4201
>                 URL: https://issues.apache.org/jira/browse/SPARK-4201
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.0.0, 1.1.0
>         Environment: Hive 0.12+hadoop 2.4/hadoop 2.2 +spark 1.1
>            Reporter: dongxu
>            Priority: Minor
>              Labels: com
>
> The team used hive to query,we try to  move it to spark-sql.
> when I search sentences like that. 
> select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929';
> It can't work ,but it work well in hive.
> I have to rewrite the sql to  "select count(1) from  gulfstream_day_driver_base_2 where  year = 2014 and  month = 09 day= 29.
> There are some error log.
> 14/11/03 15:05:03 ERROR SparkSQLDriver: Failed in [select count(1) from  gulfstream_day_driver_base_2 where  concat(year,month,day) = '20140929']
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Aggregate false, [], [SUM(PartialCount#1390L) AS c_0#1337L]
>  Exchange SinglePartition
>   Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
>    HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
> 	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
> 	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:360)
> 	at org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:360)
> 	at org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:415)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291)
> 	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> 	at java.lang.reflect.Method.invoke(Method.java:597)
> 	at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
> 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
> 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange SinglePartition
>  Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
>   HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
> 	at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44)
> 	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
> 	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
> 	... 16 more
> Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Aggregate true, [], [COUNT(1) AS PartialCount#1390L]
>  HiveTableScan [], (MetastoreRelation default, gulfstream_day_driver_base_2, None), Some((HiveGenericUdf#org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat(year#1339,month#1340,day#1341) = 20140929))
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
> 	at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126)
> 	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:86)
> 	at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45)
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
> 	... 20 more
> Caused by: org.apache.spark.SparkException: Task not serializable
> 	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166)
> 	at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158)
> 	at org.apache.spark.SparkContext.clean(SparkContext.scala:1242)
> 	at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:597)
> 	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:128)
> 	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
> 	... 24 more
> Caused by: java.io.NotSerializableException: org.apache.spark.sql.hive.HiveGenericUdf$DeferredObjectAdapter
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1164)
> 	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1346)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1154)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
> 	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
> 	at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:42)
> 	at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:73)
> 	at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:164)
> 	... 30 more



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org