You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Pablo Langa Blanco (Jira)" <ji...@apache.org> on 2022/07/10 23:18:00 UTC

[jira] [Commented] (SPARK-39426) Subquery star select creates broken plan in case of self join

    [ https://issues.apache.org/jira/browse/SPARK-39426?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17564735#comment-17564735 ] 

Pablo Langa Blanco commented on SPARK-39426:
--------------------------------------------

I tested it on master and 3.3.0 and it seems to be fixed.

> Subquery star select creates broken plan in case of self join
> -------------------------------------------------------------
>
>                 Key: SPARK-39426
>                 URL: https://issues.apache.org/jira/browse/SPARK-39426
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.2.1
>            Reporter: Denis
>            Priority: Major
>
> Subquery star select creates broken plan in case of self join
> How to reproduce: 
> {code:java}
> import spark.implicits._
> spark.sparkContext.setCheckpointDir(Files.createTempDirectory("some-prefix").toFile.toString)
> val frame = Seq(1).toDF("id").checkpoint()
> val joined = frame
> .join(frame, Seq("id"), "left")
> .select("id")
> joined
> .join(joined, Seq("id"), "left")
> .as("a")
> .select("a.*"){code}
> This query throws exception: 
> {code:java}
> Exception in thread "main" org.apache.spark.sql.AnalysisException: Resolved attribute(s) id#7 missing from id#10,id#11 in operator !Project [id#7, id#10]. Attribute(s) with the same name appear in the operation: id. Please check if the right attribute(s) are used.;
> Project [id#10, id#4]
> +- SubqueryAlias a
>    +- Project [id#10, id#4]
>       +- Join LeftOuter, (id#4 = id#10)
>          :- Project [id#4]
>          :  +- Project [id#7, id#4]
>          :     +- Join LeftOuter, (id#4 = id#7)
>          :        :- LogicalRDD [id#4], false
>          :        +- LogicalRDD [id#7], false
>          +- Project [id#10]
>             +- !Project [id#7, id#10]
>                +- Join LeftOuter, (id#10 = id#11)
>                   :- LogicalRDD [id#10], false
>                   +- LogicalRDD [id#11], false    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis(CheckAnalysis.scala:51)
>     at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis$(CheckAnalysis.scala:50)
>     at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:182)
>     at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:471)
>     at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:94)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:263)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
>     at scala.collection.Iterator.foreach(Iterator.scala:943)
>     at scala.collection.Iterator.foreach$(Iterator.scala:943)
>     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>     at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>     at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>     at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
>     at scala.collection.Iterator.foreach(Iterator.scala:943)
>     at scala.collection.Iterator.foreach$(Iterator.scala:943)
>     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>     at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>     at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>     at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
>     at scala.collection.Iterator.foreach(Iterator.scala:943)
>     at scala.collection.Iterator.foreach$(Iterator.scala:943)
>     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>     at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>     at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>     at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
>     at scala.collection.Iterator.foreach(Iterator.scala:943)
>     at scala.collection.Iterator.foreach$(Iterator.scala:943)
>     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>     at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>     at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>     at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
>     at scala.collection.Iterator.foreach(Iterator.scala:943)
>     at scala.collection.Iterator.foreach$(Iterator.scala:943)
>     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>     at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>     at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>     at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>     at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
>     at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:94)
>     at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:91)
>     at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:182)
>     at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:205)
>     at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
>     at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:202)
>     at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88)
>     at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
>     at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:196)
>     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>     at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:196)
>     at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88)
>     at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:86)
>     at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:78)
>     at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90)
>     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>     at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88)
>     at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:3734)
>     at org.apache.spark.sql.Dataset.select(Dataset.scala:1454)
>     at org.apache.spark.sql.Dataset.select(Dataset.scala:1471)
>     at com.joom.gburg_local.GburgTestLocal$.main(GburgTestLocal.scala:61)
>     at com.joom.gburg_local.GburgTestLocal.main(GburgTestLocal.scala)
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org