You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Josh Rosen (JIRA)" <ji...@apache.org> on 2016/08/17 00:11:22 UTC

[jira] [Commented] (SPARK-17098) "SELECT COUNT(NULL) OVER ()" throws UnsupportedOperationException during analysis

    [ https://issues.apache.org/jira/browse/SPARK-17098?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15423634#comment-15423634 ] 

Josh Rosen commented on SPARK-17098:
------------------------------------

Actually, given the error here I think that the problem could be that sometimes {{WindowExpression.foldable == true}} even though {{WindowExpression}} is {{Unevaluable}}:

{code}
case class WindowExpression(
    windowFunction: Expression,
    windowSpec: WindowSpecDefinition) extends Expression with Unevaluable {

  override def children: Seq[Expression] = windowFunction :: windowSpec :: Nil

  override def dataType: DataType = windowFunction.dataType
  override def foldable: Boolean = windowFunction.foldable
  override def nullable: Boolean = windowFunction.nullable

  override def toString: String = s"$windowFunction $windowSpec"
  override def sql: String = windowFunction.sql + " OVER " + windowSpec.sql
}
{code}

/cc [~hvanhovell], FYI. 

> "SELECT COUNT(NULL) OVER ()" throws UnsupportedOperationException during analysis
> ---------------------------------------------------------------------------------
>
>                 Key: SPARK-17098
>                 URL: https://issues.apache.org/jira/browse/SPARK-17098
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: Josh Rosen
>
> Running
> {code}
> SELECT COUNT(NULL) OVER ()
> {code}
> throws an UnsupportedOperationException during analysis:
> {code}
> java.lang.UnsupportedOperationException: Cannot evaluate expression: cast(0 as bigint) windowspecdefinition(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
> 	at org.apache.spark.sql.catalyst.expressions.Unevaluable$class.eval(Expression.scala:221)
> 	at org.apache.spark.sql.catalyst.expressions.WindowExpression.eval(windowExpressions.scala:288)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$18$$anonfun$applyOrElse$3.applyOrElse(Optimizer.scala:759)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$18$$anonfun$applyOrElse$3.applyOrElse(Optimizer.scala:752)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:279)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:279)
> 	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:278)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionDown$1(QueryPlan.scala:156)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:166)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:170)
> 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> 	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> 	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:170)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$4.apply(QueryPlan.scala:175)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179)
> 	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsDown(QueryPlan.scala:175)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$18.applyOrElse(Optimizer.scala:752)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$18.applyOrElse(Optimizer.scala:751)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:279)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:279)
> 	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:278)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:284)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:268)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(Optimizer.scala:751)
> 	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(Optimizer.scala:750)
> 	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
> 	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
> 	at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
> 	at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
> 	at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35)
> 	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
> 	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
> 	at scala.collection.immutable.List.foreach(List.scala:381)
> 	at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
> 	at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:74)
> 	at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:74)
> 	at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:78)
> 	at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:76)
> 	at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:83)
> 	at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:83)
> 	at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2558)
> 	at org.apache.spark.sql.Dataset.head(Dataset.scala:1924)
> 	at org.apache.spark.sql.Dataset.take(Dataset.scala:2139)
> {code}
> Given that 
> {code}
> SELECT COUNT(0) OVER ()
> {code}
> works fine my hunch is that this is uncovering a bug in the ordering of our optimizer rules or a bug in the constant-folding rule itself. This particular example is probably unimportant by itself but may be an indicator of other problems.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org