You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/08/06 03:35:57 UTC

spark git commit: [SPARK-20963][SQL] Support column aliases for join relations in FROM clause

Repository: spark
Updated Branches:
  refs/heads/master 41568e9a0 -> 990efad1c


[SPARK-20963][SQL] Support column aliases for join relations in FROM clause

## What changes were proposed in this pull request?
This pr added parsing rules to support column aliases for join relations in FROM clause.
This pr is a sub-task of #18079.

## How was this patch tested?
Added tests in `AnalysisSuite`, `PlanParserSuite,` and `SQLQueryTestSuite`.

Author: Takeshi Yamamuro <ya...@apache.org>

Closes #18772 from maropu/SPARK-20963-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/990efad1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/990efad1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/990efad1

Branch: refs/heads/master
Commit: 990efad1c62dec8f80debb6a1b11bdd030142768
Parents: 41568e9
Author: Takeshi Yamamuro <ya...@apache.org>
Authored: Sat Aug 5 20:35:54 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Sat Aug 5 20:35:54 2017 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 10 ++---
 .../spark/sql/catalyst/parser/AstBuilder.scala  | 44 +++++++++++++-------
 .../sql/catalyst/analysis/AnalysisSuite.scala   | 24 ++++++++++-
 .../sql/catalyst/parser/PlanParserSuite.scala   | 13 ++++++
 .../sql-tests/inputs/table-aliases.sql          |  7 ++++
 .../sql-tests/results/table-aliases.sql.out     | 28 ++++++++++++-
 6 files changed, 104 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 4534b7d..954955b 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -473,11 +473,11 @@ identifierComment
     ;
 
 relationPrimary
-    : tableIdentifier sample? tableAlias                   #tableName
-    | '(' queryNoWith ')' sample? tableAlias               #aliasedQuery
-    | '(' relation ')' sample? (AS? strictIdentifier)?     #aliasedRelation
-    | inlineTable                                          #inlineTableDefault2
-    | functionTable                                        #tableValuedFunction
+    : tableIdentifier sample? tableAlias      #tableName
+    | '(' queryNoWith ')' sample? tableAlias  #aliasedQuery
+    | '(' relation ')' sample? tableAlias     #aliasedRelation
+    | inlineTable                             #inlineTableDefault2
+    | functionTable                           #tableValuedFunction
     ;
 
 inlineTable

http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5935017..532d6ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -739,12 +739,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   /**
    * Create an alias (SubqueryAlias) for a join relation. This is practically the same as
    * visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different
-   * hooks.
+   * hooks. We could add alias names for output columns, for example:
+   * {{{
+   *   SELECT a, b, c, d FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)
+   * }}}
    */
   override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) {
-    plan(ctx.relation)
-      .optionalMap(ctx.sample)(withSample)
-      .optionalMap(ctx.strictIdentifier)(aliasPlan)
+    val relation = plan(ctx.relation).optionalMap(ctx.sample)(withSample)
+    mayApplyAliasPlan(ctx.tableAlias, relation)
   }
 
   /**
@@ -756,32 +758,44 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * }}}
    */
   override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
-    val alias = if (ctx.tableAlias.strictIdentifier == null) {
+    val relation = plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)
+    if (ctx.tableAlias.strictIdentifier == null) {
       // For un-aliased subqueries, use a default alias name that is not likely to conflict with
       // normal subquery names, so that parent operators can only access the columns in subquery by
       // unqualified names. Users can still use this special qualifier to access columns if they
       // know it, but that's not recommended.
-      "__auto_generated_subquery_name"
+      SubqueryAlias("__auto_generated_subquery_name", relation)
     } else {
-      ctx.tableAlias.strictIdentifier.getText
-    }
-    val subquery = SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
-    if (ctx.tableAlias.identifierList != null) {
-      val columnAliases = visitIdentifierList(ctx.tableAlias.identifierList)
-      UnresolvedSubqueryColumnAliases(columnAliases, subquery)
-    } else {
-      subquery
+      mayApplyAliasPlan(ctx.tableAlias, relation)
     }
   }
 
   /**
-   * Create an alias (SubqueryAlias) for a LogicalPlan.
+   * Create an alias ([[SubqueryAlias]]) for a [[LogicalPlan]].
    */
   private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = {
     SubqueryAlias(alias.getText, plan)
   }
 
   /**
+   * If aliases specified in a FROM clause, create a subquery alias ([[SubqueryAlias]]) and
+   * column aliases for a [[LogicalPlan]].
+   */
+  private def mayApplyAliasPlan(tableAlias: TableAliasContext, plan: LogicalPlan): LogicalPlan = {
+    if (tableAlias.strictIdentifier != null) {
+      val subquery = SubqueryAlias(tableAlias.strictIdentifier.getText, plan)
+      if (tableAlias.identifierList != null) {
+        val columnNames = visitIdentifierList(tableAlias.identifierList)
+        UnresolvedSubqueryColumnAliases(columnNames, subquery)
+      } else {
+        subquery
+      }
+    } else {
+      plan
+    }
+  }
+
+  /**
    * Create a Sequence of Strings for a parenthesis enclosed alias list.
    */
   override def visitIdentifierList(ctx: IdentifierListContext): Seq[String] = withOrigin(ctx) {

http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 847713a..4195e95 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.Cross
+import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
 
@@ -490,4 +490,26 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
       Seq("Number of column aliases does not match number of columns. " +
         "Number of column aliases: 5; number of columns: 4."))
   }
+
+  test("SPARK-20963 Support aliases for join relations in FROM clause") {
+    def joinRelationWithAliases(outputNames: Seq[String]): LogicalPlan = {
+      val src1 = LocalRelation('id.int, 'v1.string).as("s1")
+      val src2 = LocalRelation('id.int, 'v2.string).as("s2")
+      UnresolvedSubqueryColumnAliases(
+        outputNames,
+        SubqueryAlias(
+          "dst",
+          src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id"))))
+      ).select(star())
+    }
+    assertAnalysisSuccess(joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
+    assertAnalysisError(
+      joinRelationWithAliases("col1" :: Nil),
+      Seq("Number of column aliases does not match number of columns. " +
+        "Number of column aliases: 1; number of columns: 4."))
+    assertAnalysisError(
+      joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
+      Seq("Number of column aliases does not match number of columns. " +
+        "Number of column aliases: 5; number of columns: 4."))
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 5fa72e1..157d11d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -506,6 +506,19 @@ class PlanParserSuite extends AnalysisTest {
       ).select(star()))
   }
 
+  test("SPARK-20963 Support aliases for join relations in FROM clause") {
+    val src1 = UnresolvedRelation(TableIdentifier("src1")).as("s1")
+    val src2 = UnresolvedRelation(TableIdentifier("src2")).as("s2")
+    assertEqual(
+      "SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)",
+      UnresolvedSubqueryColumnAliases(
+        Seq("a", "b", "c", "d"),
+        SubqueryAlias(
+          "dst",
+          src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id"))))
+      ).select(star()))
+  }
+
   test("inline table") {
     assertEqual("values 1, 2, 3, 4",
       UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))

http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
index 85481cb..4cfd5f2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
@@ -18,3 +18,10 @@ SELECT a AS col1, b AS col2 FROM testData AS t(c, d);
 
 -- Subquery aliases in FROM clause
 SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2);
+
+-- Aliases for join relations in FROM clause
+CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1);
+
+CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2);
+
+SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d);

http://git-wip-us.apache.org/repos/asf/spark/blob/990efad1/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index 4459f31..53b069c 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
+-- Number of queries: 11
 
 
 -- !query 0
@@ -69,3 +69,29 @@ SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
 struct<col1:int,col2:int>
 -- !query 7 output
 1	1
+
+
+-- !query 8
+CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1)
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2)
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)
+-- !query 10 schema
+struct<a:int,b:string,c:int,d:decimal(2,1)>
+-- !query 10 output
+1	a	1	8.5
+2	b	2	1
+3	c	3	3.2


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org