You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2016/09/21 13:53:48 UTC

spark git commit: [SPARK-17590][SQL] Analyze CTE definitions at once and allow CTE subquery to define CTE

Repository: spark
Updated Branches:
  refs/heads/master dd7561d33 -> 248922fd4


[SPARK-17590][SQL] Analyze CTE definitions at once and allow CTE subquery to define CTE

## What changes were proposed in this pull request?

We substitute logical plan with CTE definitions in the analyzer rule CTESubstitution. A CTE definition can be used in the logical plan for multiple times, and its analyzed logical plan should be the same. We should not analyze CTE definitions multiple times when they are reused in the query.

By analyzing CTE definitions before substitution, we can support defining CTE in subquery.

## How was this patch tested?

Jenkins tests.

Author: Liang-Chi Hsieh <si...@tw.ibm.com>

Closes #15146 from viirya/cte-analysis-once.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/248922fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/248922fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/248922fd

Branch: refs/heads/master
Commit: 248922fd4fb7c11a40304431e8cc667a8911a906
Parents: dd7561d
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Authored: Wed Sep 21 06:53:42 2016 -0700
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Wed Sep 21 06:53:42 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  2 +-
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  5 ++--
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  2 +-
 .../org/apache/spark/sql/SubquerySuite.scala    | 25 ++++++++++++++++++++
 4 files changed, 29 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/248922fd/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 7023c0c..de2f9ee 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -262,7 +262,7 @@ ctes
     ;
 
 namedQuery
-    : name=identifier AS? '(' queryNoWith ')'
+    : name=identifier AS? '(' query ')'
     ;
 
 tableProvider

http://git-wip-us.apache.org/repos/asf/spark/blob/248922fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index cc62d5e..ae8869f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -116,15 +116,14 @@ class Analyzer(
   )
 
   /**
-   * Substitute child plan with cte definitions
+   * Analyze cte definitions and substitute child plan with analyzed cte definitions.
    */
   object CTESubstitution extends Rule[LogicalPlan] {
-    // TODO allow subquery to define CTE
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators  {
       case With(child, relations) =>
         substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) {
           case (resolved, (name, relation)) =>
-            resolved :+ name -> ResolveRelations(substituteCTE(relation, resolved))
+            resolved :+ name -> execute(substituteCTE(relation, resolved))
         })
       case other => other
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/248922fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 69d68fa..12a70b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -108,7 +108,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * This is only used for Common Table Expressions.
    */
   override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) {
-    SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith), None)
+    SubqueryAlias(ctx.name.getText, plan(ctx.query), None)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/248922fd/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 52387b4..eab4505 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -76,6 +76,31 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     )
   }
 
+  test("define CTE in CTE subquery") {
+    checkAnswer(
+      sql(
+        """
+          | with t2 as (with t1 as (select 1 as b, 2 as c) select b, c from t1)
+          | select a from (select 1 as a union all select 2 as a) t
+          | where a = (select max(b) from t2)
+        """.stripMargin),
+      Array(Row(1))
+    )
+    checkAnswer(
+      sql(
+        """
+          | with t2 as (with t1 as (select 1 as b, 2 as c) select b, c from t1),
+          | t3 as (
+          |   with t4 as (select 1 as d, 3 as e)
+          |   select * from t4 cross join t2 where t2.b = t4.d
+          | )
+          | select a from (select 1 as a union all select 2 as a)
+          | where a = (select max(d) from t3)
+        """.stripMargin),
+      Array(Row(1))
+    )
+  }
+
   test("uncorrelated scalar subquery in CTE") {
     checkAnswer(
       sql("with t2 as (select 1 as b, 2 as c) " +


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org