You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/07/24 03:50:22 UTC
[spark] branch branch-3.0 updated: [SPARK-32237][SQL][3.0] Resolve hint in CTE

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new be1b282  [SPARK-32237][SQL][3.0] Resolve hint in CTE
be1b282 is described below

commit be1b28209d8b6ac1f46e8899b85f7cb691a97051
Author: LantaoJin <ji...@gmail.com>
AuthorDate: Fri Jul 24 03:48:16 2020 +0000

    [SPARK-32237][SQL][3.0] Resolve hint in CTE
    
    ### What changes were proposed in this pull request?
    The backport of #29062
    
    This PR is to move `Substitution` rule before `Hints` rule in `Analyzer` to avoid hint in CTE not working.
    
    ### Why are the changes needed?
    Below SQL in Spark3.0 will throw AnalysisException, but it works in Spark2.x
    ```sql
    WITH cte AS (SELECT /*+ REPARTITION(3) */ T.id, T.data FROM $t1 T)
    SELECT cte.id, cte.data FROM cte
    ```
    ```
    Failed to analyze query: org.apache.spark.sql.AnalysisException: cannot resolve '`cte.id`' given input columns: [cte.data, cte.id]; line 3 pos 7;
    'Project ['cte.id, 'cte.data]
    +- SubqueryAlias cte
       +- Project [id#21L, data#22]
          +- SubqueryAlias T
             +- SubqueryAlias testcat.ns1.ns2.tbl
                +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl
    
    'Project ['cte.id, 'cte.data]
    +- SubqueryAlias cte
       +- Project [id#21L, data#22]
          +- SubqueryAlias T
             +- SubqueryAlias testcat.ns1.ns2.tbl
                +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Add a unit test
    
    Closes #29201 from LantaoJin/SPARK-32237_branch-3.0.
    
    Lead-authored-by: LantaoJin <ji...@gmail.com>
    Co-authored-by: Alan Jin <ji...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 10 ++++-----
 .../sql/catalyst/analysis/AnalysisSuite.scala      | 25 +++++++++++++++++++++-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 +++++++++++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bd5a797..89454c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -200,16 +200,16 @@ class Analyzer(
   val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil
 
   lazy val batches: Seq[Batch] = Seq(
-    Batch("Hints", fixedPoint,
-      new ResolveHints.ResolveJoinStrategyHints(conf),
-      new ResolveHints.ResolveCoalesceHints(conf)),
-    Batch("Simple Sanity Check", Once,
-      LookupFunctions),
     Batch("Substitution", fixedPoint,
       CTESubstitution,
       WindowsSubstitution,
       EliminateUnions,
       new SubstituteUnresolvedOrdinals(conf)),
+    Batch("Hints", fixedPoint,
+      new ResolveHints.ResolveJoinStrategyHints(conf),
+      new ResolveHints.ResolveCoalesceHints(conf)),
+    Batch("Simple Sanity Check", Once,
+      LookupFunctions),
     Batch("Resolution", fixedPoint,
       ResolveTableValuedFunctions ::
       ResolveNamespace(catalogManager) ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 8db2036..453a4e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,7 +25,7 @@ import org.apache.log4j.Level
 import org.scalatest.Matchers
 
 import org.apache.spark.api.python.PythonEvalType
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -879,4 +879,27 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       Seq("Intersect can only be performed on tables with the compatible column types. " +
         "timestamp <> double at the second column of the second table"))
   }
+
+  test("SPARK-32237: Hint in CTE") {
+    val plan = With(
+      Project(
+        Seq(UnresolvedAttribute("cte.a")),
+        UnresolvedRelation(TableIdentifier("cte"))
+      ),
+      Seq(
+        (
+          "cte",
+          SubqueryAlias(
+            AliasIdentifier("cte"),
+            UnresolvedHint(
+              "REPARTITION",
+              Seq(Literal(3)),
+              Project(testRelation.output, testRelation)
+            )
+          )
+        )
+      )
+    )
+    assertAnalysisSuccess(plan)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6fab47d..f5dba4c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3468,6 +3468,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("SPARK-32237: Hint in CTE") {
+    withTable("t") {
+      sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS id")
+      checkAnswer(
+        sql("""
+              |WITH cte AS (SELECT /*+ REPARTITION(3) */ * FROM t)
+              |SELECT * FROM cte
+            """.stripMargin),
+        Row(1) :: Nil)
+    }
+  }
+
   test("SPARK-32372: ResolveReferences.dedupRight should only rewrite attributes for ancestor " +
     "plans of the conflict plan") {
     sql("SELECT name, avg(age) as avg_age FROM person GROUP BY name")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org