You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Pablo Langa Blanco (Jira)" <ji...@apache.org> on 2022/07/10 23:18:00 UTC
[jira] [Commented] (SPARK-39426) Subquery star select creates broken plan in case of self join
[ https://issues.apache.org/jira/browse/SPARK-39426?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17564735#comment-17564735 ]
Pablo Langa Blanco commented on SPARK-39426:
--------------------------------------------
I tested it on master and 3.3.0 and it seems to be fixed.
> Subquery star select creates broken plan in case of self join
> -------------------------------------------------------------
>
> Key: SPARK-39426
> URL: https://issues.apache.org/jira/browse/SPARK-39426
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.2.1
> Reporter: Denis
> Priority: Major
>
> Subquery star select creates broken plan in case of self join
> How to reproduce:
> {code:java}
> import spark.implicits._
> spark.sparkContext.setCheckpointDir(Files.createTempDirectory("some-prefix").toFile.toString)
> val frame = Seq(1).toDF("id").checkpoint()
> val joined = frame
> .join(frame, Seq("id"), "left")
> .select("id")
> joined
> .join(joined, Seq("id"), "left")
> .as("a")
> .select("a.*"){code}
> This query throws exception:
> {code:java}
> Exception in thread "main" org.apache.spark.sql.AnalysisException: Resolved attribute(s) id#7 missing from id#10,id#11 in operator !Project [id#7, id#10]. Attribute(s) with the same name appear in the operation: id. Please check if the right attribute(s) are used.;
> Project [id#10, id#4]
> +- SubqueryAlias a
> +- Project [id#10, id#4]
> +- Join LeftOuter, (id#4 = id#10)
> :- Project [id#4]
> : +- Project [id#7, id#4]
> : +- Join LeftOuter, (id#4 = id#7)
> : :- LogicalRDD [id#4], false
> : +- LogicalRDD [id#7], false
> +- Project [id#10]
> +- !Project [id#7, id#10]
> +- Join LeftOuter, (id#10 = id#11)
> :- LogicalRDD [id#10], false
> +- LogicalRDD [id#11], false at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis(CheckAnalysis.scala:51)
> at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis$(CheckAnalysis.scala:50)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:182)
> at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:471)
> at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:94)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:263)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262)
> at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:94)
> at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:91)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:182)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:205)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:202)
> at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88)
> at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
> at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:196)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:196)
> at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88)
> at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:86)
> at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:78)
> at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88)
> at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:3734)
> at org.apache.spark.sql.Dataset.select(Dataset.scala:1454)
> at org.apache.spark.sql.Dataset.select(Dataset.scala:1471)
> at com.joom.gburg_local.GburgTestLocal$.main(GburgTestLocal.scala:61)
> at com.joom.gburg_local.GburgTestLocal.main(GburgTestLocal.scala)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org