You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spark.apache.org by StanZhai <ma...@zhaishidan.cn> on 2015/06/23 10:42:15 UTC
[SparkSQL 1.4]Could not use concat with UDF in where clause
Hi all,
After upgraded the cluster from spark 1.3.1 to 1.4.0(rc4), I encountered the
following exception when use concat with UDF in where clause:
===================Exception====================
org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to
dataType on unresolved object, tree:
'concat(HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFYear(date#1776),年)
at
org.apache.spark.sql.catalyst.analysis.UnresolvedFunction.dataType(unresolved.scala:82)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5$$anonfun$applyOrElse$15.apply(HiveTypeCoercion.scala:299)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5$$anonfun$applyOrElse$15.apply(HiveTypeCoercion.scala:299)
at
scala.collection.LinearSeqOptimized$class.exists(LinearSeqOptimized.scala:80)
at scala.collection.immutable.List.exists(List.scala:84)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5.applyOrElse(HiveTypeCoercion.scala:299)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5.applyOrElse(HiveTypeCoercion.scala:298)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:221)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionDown$1(QueryPlan.scala:75)
at
org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:85)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsDown(QueryPlan.scala:94)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressions(QueryPlan.scala:64)
at
org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformAllExpressions$1.applyOrElse(QueryPlan.scala:136)
at
org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformAllExpressions$1.applyOrElse(QueryPlan.scala:135)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:221)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:242)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:272)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:227)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:242)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:272)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:227)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:212)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.transformAllExpressions(QueryPlan.scala:135)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$.apply(HiveTypeCoercion.scala:298)
at
org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$.apply(HiveTypeCoercion.scala:297)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:61)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:59)
at
scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:59)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:51)
at scala.collection.immutable.List.foreach(List.scala:318)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:51)
at
org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:922)
at
org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:922)
at
org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:920)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:131)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:744)
at test.service.SparkHiveService.query(SparkHiveService.scala:79)
...
at java.lang.Thread.run(Thread.java:745)
=============The SQL is: ===================
select * from test where concat(year(date), '年') in ( '2015年', '2014年' )
limit 10
This SQL can be run in spark 1.3.1 but error in spark 1.4. I've tried run
some similar sql in spark 1.4.0, found the following sql could be run
correctly:
select * from test where concat(year(date), '年') = '2015年' limit 10
select * from test where concat(sex, 'T') in ( 'MT' ) limit 10
In short, when I use 'concat', UDF and 'in' together in sql, I will get the
exception: Invalid call to dataType on unresolved object.
Is catalyst changed from 1.3 to 1.4? Any suggestion?
Best, Stan
--
View this message in context: http://apache-spark-developers-list.1001551.n3.nabble.com/SparkSQL-1-4-Could-not-use-concat-with-UDF-in-where-clause-tp12832.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org
Re: [SparkSQL 1.4]Could not use concat with UDF in where clause
Posted by StanZhai <ma...@zhaishidan.cn>.
Hi Michael Armbrust,
I have filed an issue on JIRA for this,
https://issues.apache.org/jira/browse/SPARK-8588
<https://issues.apache.org/jira/browse/SPARK-8588>
--
View this message in context: http://apache-spark-developers-list.1001551.n3.nabble.com/SparkSQL-1-4-Could-not-use-concat-with-UDF-in-where-clause-tp12832p12848.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org
Re: [SparkSQL 1.4]Could not use concat with UDF in where clause
Posted by Michael Armbrust <mi...@databricks.com>.
Can you file a JIRA please?
On Tue, Jun 23, 2015 at 1:42 AM, StanZhai <ma...@zhaishidan.cn> wrote:
> Hi all,
>
> After upgraded the cluster from spark 1.3.1 to 1.4.0(rc4), I encountered
> the
> following exception when use concat with UDF in where clause:
>
> ===================Exception====================
> org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to
> dataType on unresolved object, tree:
> 'concat(HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFYear(date#1776),年)
> at
>
> org.apache.spark.sql.catalyst.analysis.UnresolvedFunction.dataType(unresolved.scala:82)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5$$anonfun$applyOrElse$15.apply(HiveTypeCoercion.scala:299)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5$$anonfun$applyOrElse$15.apply(HiveTypeCoercion.scala:299)
> at
>
> scala.collection.LinearSeqOptimized$class.exists(LinearSeqOptimized.scala:80)
> at scala.collection.immutable.List.exists(List.scala:84)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5.applyOrElse(HiveTypeCoercion.scala:299)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$$anonfun$apply$5.applyOrElse(HiveTypeCoercion.scala:298)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
> at
>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:221)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.org
> $apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionDown$1(QueryPlan.scala:75)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:85)
> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> at scala.collection.Iterator$class.foreach(Iterator.scala:727)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
> at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
> at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
> at scala.collection.AbstractIterator.to(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
> at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
> at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsDown(QueryPlan.scala:94)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressions(QueryPlan.scala:64)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformAllExpressions$1.applyOrElse(QueryPlan.scala:136)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformAllExpressions$1.applyOrElse(QueryPlan.scala:135)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
> at
>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:221)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:242)
> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> at scala.collection.Iterator$class.foreach(Iterator.scala:727)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
> at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
> at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
> at scala.collection.AbstractIterator.to(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
> at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
> at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:272)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:227)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:242)
> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> at scala.collection.Iterator$class.foreach(Iterator.scala:727)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
> at
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
> at
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
> at scala.collection.TraversableOnce$class.to
> (TraversableOnce.scala:273)
> at scala.collection.AbstractIterator.to(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
> at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
> at
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
> at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:272)
> at
>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:227)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:212)
> at
>
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformAllExpressions(QueryPlan.scala:135)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$.apply(HiveTypeCoercion.scala:298)
> at
>
> org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion$InConversion$.apply(HiveTypeCoercion.scala:297)
> at
>
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:61)
> at
>
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:59)
> at
>
> scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
> at scala.collection.immutable.List.foldLeft(List.scala:84)
> at
>
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:59)
> at
>
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:51)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at
>
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:51)
> at
>
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:922)
> at
>
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:922)
> at
>
> org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:920)
> at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:131)
> at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:744)
> at test.service.SparkHiveService.query(SparkHiveService.scala:79)
> ...
> at java.lang.Thread.run(Thread.java:745)
>
> =============The SQL is: ===================
> select * from test where concat(year(date), '年') in ( '2015年', '2014年' )
> limit 10
>
> This SQL can be run in spark 1.3.1 but error in spark 1.4. I've tried run
> some similar sql in spark 1.4.0, found the following sql could be run
> correctly:
>
> select * from test where concat(year(date), '年') = '2015年' limit 10
> select * from test where concat(sex, 'T') in ( 'MT' ) limit 10
>
> In short, when I use 'concat', UDF and 'in' together in sql, I will get the
> exception: Invalid call to dataType on unresolved object.
>
> Is catalyst changed from 1.3 to 1.4? Any suggestion?
>
> Best, Stan
>
>
>
> --
> View this message in context:
> http://apache-spark-developers-list.1001551.n3.nabble.com/SparkSQL-1-4-Could-not-use-concat-with-UDF-in-where-clause-tp12832.html
> Sent from the Apache Spark Developers List mailing list archive at
> Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
> For additional commands, e-mail: dev-help@spark.apache.org
>
>