You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/29 17:14:53 UTC
spark git commit: [SPARK-20962][SQL] Support subquery column aliases
in FROM clause
Repository: spark
Updated Branches:
refs/heads/master 92d85637e -> 6550086bb
[SPARK-20962][SQL] Support subquery column aliases in FROM clause
## What changes were proposed in this pull request?
This pr added parsing rules to support subquery column aliases in FROM clause.
This pr is a sub-task of #18079.
## How was this patch tested?
Added tests in `PlanParserSuite` and `SQLQueryTestSuite`.
Author: Takeshi Yamamuro <ya...@apache.org>
Closes #18185 from maropu/SPARK-20962.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6550086b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6550086b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6550086b
Branch: refs/heads/master
Commit: 6550086bbdf4c1581cbfa90550c5a388e531a736
Parents: 92d8563
Author: Takeshi Yamamuro <ya...@apache.org>
Authored: Sat Jul 29 10:14:47 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Sat Jul 29 10:14:47 2017 -0700
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +-
.../spark/sql/catalyst/analysis/Analyzer.scala | 25 ++++++++++++++++++++
.../sql/catalyst/analysis/unresolved.scala | 23 +++++++++++++++++-
.../spark/sql/catalyst/parser/AstBuilder.scala | 18 ++++++++++----
.../sql/catalyst/analysis/AnalysisSuite.scala | 20 ++++++++++++++++
.../sql/catalyst/parser/PlanParserSuite.scala | 13 +++++++++-
.../sql-tests/inputs/table-aliases.sql | 3 +++
.../sql-tests/results/table-aliases.sql.out | 10 +++++++-
8 files changed, 105 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ef9f88a..4534b7d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -474,7 +474,7 @@ identifierComment
relationPrimary
: tableIdentifier sample? tableAlias #tableName
- | '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery
+ | '(' queryNoWith ')' sample? tableAlias #aliasedQuery
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
| inlineTable #inlineTableDefault2
| functionTable #tableValuedFunction
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f987ed8..a6d297c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -141,6 +141,7 @@ class Analyzer(
ResolveFunctions ::
ResolveAliases ::
ResolveSubquery ::
+ ResolveSubqueryColumnAliases ::
ResolveWindowOrder ::
ResolveWindowFrame ::
ResolveNaturalAndUsingJoin ::
@@ -1324,6 +1325,30 @@ class Analyzer(
}
/**
+ * Replaces unresolved column aliases for a subquery with projections.
+ */
+ object ResolveSubqueryColumnAliases extends Rule[LogicalPlan] {
+
+ def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+ case u @ UnresolvedSubqueryColumnAliases(columnNames, child) if child.resolved =>
+ // Resolves output attributes if a query has alias names in its subquery:
+ // e.g., SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
+ val outputAttrs = child.output
+ // Checks if the number of the aliases equals to the number of output columns
+ // in the subquery.
+ if (columnNames.size != outputAttrs.size) {
+ u.failAnalysis("Number of column aliases does not match number of columns. " +
+ s"Number of column aliases: ${columnNames.size}; " +
+ s"number of columns: ${outputAttrs.size}.")
+ }
+ val aliases = outputAttrs.zip(columnNames).map { case (attr, aliasName) =>
+ Alias(attr, aliasName)()
+ }
+ Project(aliases, child)
+ }
+ }
+
+ /**
* Turns projections that contain aggregate expressions into aggregations.
*/
object GlobalAggregates extends Rule[LogicalPlan] {
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index fb32269..b7a704d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
import org.apache.spark.sql.catalyst.parser.ParserUtils
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
import org.apache.spark.sql.catalyst.trees.TreeNode
import org.apache.spark.sql.catalyst.util.quoteIdentifier
import org.apache.spark.sql.types.{DataType, Metadata, StructType}
@@ -423,6 +423,27 @@ case class UnresolvedAlias(
}
/**
+ * Aliased column names resolved by positions for subquery. We could add alias names for output
+ * columns in the subquery:
+ * {{{
+ * // Assign alias names for output columns
+ * SELECT col1, col2 FROM testData AS t(col1, col2);
+ * }}}
+ *
+ * @param outputColumnNames the [[LogicalPlan]] on which this subquery column aliases apply.
+ * @param child the logical plan of this subquery.
+ */
+case class UnresolvedSubqueryColumnAliases(
+ outputColumnNames: Seq[String],
+ child: LogicalPlan)
+ extends UnaryNode {
+
+ override def output: Seq[Attribute] = Nil
+
+ override lazy val resolved = false
+}
+
+/**
* Holds the deserializer expression and the attributes that are available during the resolution
* for it. Deserializer expression is a special kind of expression that is not always resolved by
* children output, but by given attributes, e.g. the `keyDeserializer` in `MapGroups` should be
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 6795be7..0757826 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -750,20 +750,28 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
/**
* Create an alias (SubqueryAlias) for a sub-query. This is practically the same as
* visitAliasedRelation and visitNamedExpression, ANTLR4 however requires us to use 3 different
- * hooks.
+ * hooks. We could add alias names for output columns, for example:
+ * {{{
+ * SELECT col1, col2 FROM testData AS t(col1, col2)
+ * }}}
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
- val alias = if (ctx.strictIdentifier == null) {
+ val alias = if (ctx.tableAlias.strictIdentifier == null) {
// For un-aliased subqueries, use a default alias name that is not likely to conflict with
// normal subquery names, so that parent operators can only access the columns in subquery by
// unqualified names. Users can still use this special qualifier to access columns if they
// know it, but that's not recommended.
"__auto_generated_subquery_name"
} else {
- ctx.strictIdentifier.getText
+ ctx.tableAlias.strictIdentifier.getText
+ }
+ val subquery = SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
+ if (ctx.tableAlias.identifierList != null) {
+ val columnAliases = visitIdentifierList(ctx.tableAlias.identifierList)
+ UnresolvedSubqueryColumnAliases(columnAliases, subquery)
+ } else {
+ subquery
}
-
- SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index be26b1b..9bcf477 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -470,4 +470,24 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
"number of column aliases: 5; number of columns: 4."))
}
+
+ test("SPARK-20962 Support subquery column aliases in FROM clause") {
+ def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = {
+ UnresolvedSubqueryColumnAliases(
+ outputNames,
+ SubqueryAlias(
+ "t",
+ UnresolvedRelation(TableIdentifier("TaBlE3")))
+ ).select(star())
+ }
+ assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
+ assertAnalysisError(
+ tableColumnsWithAliases("col1" :: Nil),
+ Seq("Number of column aliases does not match number of columns. " +
+ "Number of column aliases: 1; number of columns: 4."))
+ assertAnalysisError(
+ tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
+ Seq("Number of column aliases does not match number of columns. " +
+ "Number of column aliases: 5; number of columns: 4."))
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 725bcb8..c7f39ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.parser
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
@@ -495,6 +495,17 @@ class PlanParserSuite extends AnalysisTest {
.select(star()))
}
+ test("SPARK-20962 Support subquery column aliases in FROM clause") {
+ assertEqual(
+ "SELECT * FROM (SELECT a AS x, b AS y FROM t) t(col1, col2)",
+ UnresolvedSubqueryColumnAliases(
+ Seq("col1", "col2"),
+ SubqueryAlias(
+ "t",
+ UnresolvedRelation(TableIdentifier("t")).select('a.as("x"), 'b.as("y")))
+ ).select(star()))
+ }
+
test("inline table") {
assertEqual("values 1, 2, 3, 4",
UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
index c90a9c7..85481cb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
@@ -15,3 +15,6 @@ SELECT * FROM testData AS t(col1);
-- Check alias duplication
SELECT a AS col1, b AS col2 FROM testData AS t(c, d);
+
+-- Subquery aliases in FROM clause
+SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2);
http://git-wip-us.apache.org/repos/asf/spark/blob/6550086b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index 7abbcd8..4459f31 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
+-- Number of queries: 8
-- !query 0
@@ -61,3 +61,11 @@ struct<>
-- !query 6 output
org.apache.spark.sql.AnalysisException
cannot resolve '`a`' given input columns: [t.c, t.d]; line 1 pos 7
+
+
+-- !query 7
+SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
+-- !query 7 schema
+struct<col1:int,col2:int>
+-- !query 7 output
+1 1
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org