You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/07/24 03:50:22 UTC
[spark] branch branch-3.0 updated: [SPARK-32237][SQL][3.0] Resolve
hint in CTE
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new be1b282 [SPARK-32237][SQL][3.0] Resolve hint in CTE
be1b282 is described below
commit be1b28209d8b6ac1f46e8899b85f7cb691a97051
Author: LantaoJin <ji...@gmail.com>
AuthorDate: Fri Jul 24 03:48:16 2020 +0000
[SPARK-32237][SQL][3.0] Resolve hint in CTE
### What changes were proposed in this pull request?
The backport of #29062
This PR is to move `Substitution` rule before `Hints` rule in `Analyzer` to avoid hint in CTE not working.
### Why are the changes needed?
Below SQL in Spark3.0 will throw AnalysisException, but it works in Spark2.x
```sql
WITH cte AS (SELECT /*+ REPARTITION(3) */ T.id, T.data FROM $t1 T)
SELECT cte.id, cte.data FROM cte
```
```
Failed to analyze query: org.apache.spark.sql.AnalysisException: cannot resolve '`cte.id`' given input columns: [cte.data, cte.id]; line 3 pos 7;
'Project ['cte.id, 'cte.data]
+- SubqueryAlias cte
+- Project [id#21L, data#22]
+- SubqueryAlias T
+- SubqueryAlias testcat.ns1.ns2.tbl
+- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl
'Project ['cte.id, 'cte.data]
+- SubqueryAlias cte
+- Project [id#21L, data#22]
+- SubqueryAlias T
+- SubqueryAlias testcat.ns1.ns2.tbl
+- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl
```
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Add a unit test
Closes #29201 from LantaoJin/SPARK-32237_branch-3.0.
Lead-authored-by: LantaoJin <ji...@gmail.com>
Co-authored-by: Alan Jin <ji...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 10 ++++-----
.../sql/catalyst/analysis/AnalysisSuite.scala | 25 +++++++++++++++++++++-
.../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 +++++++++++
3 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bd5a797..89454c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -200,16 +200,16 @@ class Analyzer(
val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil
lazy val batches: Seq[Batch] = Seq(
- Batch("Hints", fixedPoint,
- new ResolveHints.ResolveJoinStrategyHints(conf),
- new ResolveHints.ResolveCoalesceHints(conf)),
- Batch("Simple Sanity Check", Once,
- LookupFunctions),
Batch("Substitution", fixedPoint,
CTESubstitution,
WindowsSubstitution,
EliminateUnions,
new SubstituteUnresolvedOrdinals(conf)),
+ Batch("Hints", fixedPoint,
+ new ResolveHints.ResolveJoinStrategyHints(conf),
+ new ResolveHints.ResolveCoalesceHints(conf)),
+ Batch("Simple Sanity Check", Once,
+ LookupFunctions),
Batch("Resolution", fixedPoint,
ResolveTableValuedFunctions ::
ResolveNamespace(catalogManager) ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 8db2036..453a4e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,7 +25,7 @@ import org.apache.log4j.Level
import org.scalatest.Matchers
import org.apache.spark.api.python.PythonEvalType
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
@@ -879,4 +879,27 @@ class AnalysisSuite extends AnalysisTest with Matchers {
Seq("Intersect can only be performed on tables with the compatible column types. " +
"timestamp <> double at the second column of the second table"))
}
+
+ test("SPARK-32237: Hint in CTE") {
+ val plan = With(
+ Project(
+ Seq(UnresolvedAttribute("cte.a")),
+ UnresolvedRelation(TableIdentifier("cte"))
+ ),
+ Seq(
+ (
+ "cte",
+ SubqueryAlias(
+ AliasIdentifier("cte"),
+ UnresolvedHint(
+ "REPARTITION",
+ Seq(Literal(3)),
+ Project(testRelation.output, testRelation)
+ )
+ )
+ )
+ )
+ )
+ assertAnalysisSuccess(plan)
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6fab47d..f5dba4c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3468,6 +3468,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
+ test("SPARK-32237: Hint in CTE") {
+ withTable("t") {
+ sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS id")
+ checkAnswer(
+ sql("""
+ |WITH cte AS (SELECT /*+ REPARTITION(3) */ * FROM t)
+ |SELECT * FROM cte
+ """.stripMargin),
+ Row(1) :: Nil)
+ }
+ }
+
test("SPARK-32372: ResolveReferences.dedupRight should only rewrite attributes for ancestor " +
"plans of the conflict plan") {
sql("SELECT name, avg(age) as avg_age FROM person GROUP BY name")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org