You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Takeshi Yamamuro (JIRA)" <ji...@apache.org> on 2017/05/20 15:01:04 UTC

[jira] [Closed] (SPARK-20804) Join with null safe equality fails with AnalysisException

     [ https://issues.apache.org/jira/browse/SPARK-20804?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Takeshi Yamamuro closed SPARK-20804.
------------------------------------
    Resolution: Duplicate

> Join with null safe equality fails with AnalysisException
> ---------------------------------------------------------
>
>                 Key: SPARK-20804
>                 URL: https://issues.apache.org/jira/browse/SPARK-20804
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.2.0
>         Environment: org.apache.spark#spark-sql_2.11;2.3.0-SNAPSHOT from asf snapshots, Mon May 15 08:09:18 EDT 2017
>            Reporter: koert kuipers
>            Priority: Minor
>
> {noformat}
> val x = Seq(("a", 1), ("a", 2), (null, 1)).toDF("k", "v")
> val sums = x.groupBy($"k").agg(sum($"v") as "sum")
> x
>   .join(sums, x("k") <=> sums("k"))
>   .drop(sums("k"))
>   .show
> {noformat}
> gives:
> {noformat}
>   org.apache.spark.sql.AnalysisException: Detected cartesian product for INNER join between logical plans
> Project [_2#54 AS v#57]
> +- LocalRelation [_1#53, _2#54]
> and
> Aggregate [k#69], [k#69, sum(cast(v#70 as bigint)) AS sum#65L]
> +- Project [_1#53 AS k#69, _2#54 AS v#70]
>    +- LocalRelation [_1#53, _2#54]
> Join condition is missing or trivial.
> Use the CROSS JOIN syntax to allow cartesian products between these relations.;
>   at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$20.applyOrElse(Optimizer.scala:1081)
>   at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$20.applyOrElse(Optimizer.scala:1078)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267)
>   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256)
>   at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts.apply(Optimizer.scala:1078)
>   at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts.apply(Optimizer.scala:1063)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
>   at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
>   at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
>   at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
>   at scala.collection.immutable.List.foreach(List.scala:381)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
>   at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:79)
>   at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:79)
>   at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:85)
>   at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:81)
>   at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:90)
>   at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:90)
>   at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2901)
>   at org.apache.spark.sql.Dataset.head(Dataset.scala:2238)
>   at org.apache.spark.sql.Dataset.take(Dataset.scala:2451)
>   at org.apache.spark.sql.Dataset.showString(Dataset.scala:248)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:680)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:639)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:648)
> {noformat}
> but this works fine:
> {noformat}
> val x = Seq(("a", 1), ("a", 2), (null, 1)).toDF("k", "v")
> val sums = x.select($"k" as "k1", $"v").groupBy($"k1").agg(sum($"v") as "sum")
> x
>   .join(sums, x("k") <=> sums("k1"))
>   .drop(sums("k1"))
>   .show
> +----+---+---+                                                                  
> |   k|  v|sum|
> +----+---+---+
> |   a|  1|  3|
> |   a|  2|  3|
> |null|  1|  1|
> +----+---+---+
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org