You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yin Huai (JIRA)" <ji...@apache.org> on 2016/07/27 05:22:20 UTC
[jira] [Commented] (SPARK-16748) Errors thrown by UDFs cause TreeNodeException when the query has an ORDER BY clause

    [ https://issues.apache.org/jira/browse/SPARK-16748?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15395063#comment-15395063 ] 

Yin Huai commented on SPARK-16748:
----------------------------------

Seems we should should not wrap the NPE in a TreeNodeException.

> Errors thrown by UDFs cause TreeNodeException when the query has an ORDER BY clause
> -----------------------------------------------------------------------------------
>
>                 Key: SPARK-16748
>                 URL: https://issues.apache.org/jira/browse/SPARK-16748
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>            Reporter: Yin Huai
>
> {code}
> import org.apache.spark.sql.functions._
> val myUDF = udf((c: String) => s"""${c.take(5)}""")
> spark.sql("SELECT cast(null as string) as a").select(myUDF($"a").as("b")).orderBy($"b").collect
> {code}
> {code}
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange rangepartitioning(b#345 ASC, 200)
> +- *Project [UDF(null) AS b#345]
>    +- Scan OneRowRelation[]
> 	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:50)
> 	at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:113)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114)
> 	at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:233)
> 	at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:113)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:361)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114)
> 	at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225)
> 	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:272)
> 	at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2183)
> 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
> 	at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2532)
> 	at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2182)
> 	at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Dataset.scala:2187)
> 	at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Dataset.scala:2187)
> 	at org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2545)
> 	at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2187)
> 	at org.apache.spark.sql.Dataset.collect(Dataset.scala:2163)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org