You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2017/03/14 11:49:35 UTC
spark git commit: [SPARK-19850][SQL] Allow the use of aliases in SQL function calls

Repository: spark
Updated Branches:
  refs/heads/master 0ee38a39e -> a0b92f73f


[SPARK-19850][SQL] Allow the use of aliases in SQL function calls

## What changes were proposed in this pull request?
We currently cannot use aliases in SQL function calls. This is inconvenient when you try to create a struct. This SQL query for example `select struct(1, 2) st`, will create a struct with column names `col1` and `col2`. This is even more problematic when we want to append a field to an existing struct. For example if we want to a field to struct `st` we would issue the following SQL query `select struct(st.*, 1) as st from src`, the result will be struct `st` with an a column with a non descriptive name `col3` (if `st` itself has 2 fields).

This PR proposes to change this by allowing the use of aliased expression in function parameters. For example `select struct(1 as a, 2 as b) st`, will create a struct with columns `a` & `b`.

## How was this patch tested?
Added a test to `ExpressionParserSuite` and added a test file for `SQLQueryTestSuite`.

Author: Herman van Hovell <hv...@databricks.com>

Closes #17245 from hvanhovell/SPARK-19850.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a0b92f73
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a0b92f73
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a0b92f73

Branch: refs/heads/master
Commit: a0b92f73fed9b91883f08cced1c09724e09e1883
Parents: 0ee38a3
Author: Herman van Hovell <hv...@databricks.com>
Authored: Tue Mar 14 12:49:30 2017 +0100
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Tue Mar 14 12:49:30 2017 +0100

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  7 ++-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  4 +-
 .../catalyst/parser/ExpressionParserSuite.scala |  2 +
 .../test/resources/sql-tests/inputs/struct.sql  | 20 +++++++
 .../resources/sql-tests/results/struct.sql.out  | 60 ++++++++++++++++++++
 5 files changed, 88 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a0b92f73/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 59f93b3..cc3b8fd 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -506,10 +506,10 @@ expression
 
 booleanExpression
     : NOT booleanExpression                                        #logicalNot
+    | EXISTS '(' query ')'                                         #exists
     | predicated                                                   #booleanDefault
     | left=booleanExpression operator=AND right=booleanExpression  #logicalBinary
     | left=booleanExpression operator=OR right=booleanExpression   #logicalBinary
-    | EXISTS '(' query ')'                                         #exists
     ;
 
 // workaround for:
@@ -546,9 +546,10 @@ primaryExpression
     | constant                                                                                 #constantDefault
     | ASTERISK                                                                                 #star
     | qualifiedName '.' ASTERISK                                                               #star
-    | '(' expression (',' expression)+ ')'                                                     #rowConstructor
+    | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
-    | qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' (OVER windowSpec)?  #functionCall
+    | qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
+       (OVER windowSpec)?                                                                      #functionCall
     | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
     | identifier                                                                               #columnReference
     | base=primaryExpression '.' fieldName=identifier                                          #dereference

http://git-wip-us.apache.org/repos/asf/spark/blob/a0b92f73/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3cf11ad..4c9fb2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1016,7 +1016,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
     // Create the function call.
     val name = ctx.qualifiedName.getText
     val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
-    val arguments = ctx.expression().asScala.map(expression) match {
+    val arguments = ctx.namedExpression().asScala.map(expression) match {
       case Seq(UnresolvedStar(None)) if name.toLowerCase == "count" && !isDistinct =>
         // Transform COUNT(*) into COUNT(1).
         Seq(Literal(1))
@@ -1127,7 +1127,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * Create a [[CreateStruct]] expression.
    */
   override def visitRowConstructor(ctx: RowConstructorContext): Expression = withOrigin(ctx) {
-    CreateStruct(ctx.expression.asScala.map(expression))
+    CreateStruct(ctx.namedExpression().asScala.map(expression))
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/a0b92f73/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 2fecb8d..c2e62e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -209,6 +209,7 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
     assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
     assertEqual("`select`(all a, b)", 'select.function('a, 'b))
+    assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
   }
 
   test("window function expressions") {
@@ -278,6 +279,7 @@ class ExpressionParserSuite extends PlanTest {
     // Note that '(a)' will be interpreted as a nested expression.
     assertEqual("(a, b)", CreateStruct(Seq('a, 'b)))
     assertEqual("(a, b, c)", CreateStruct(Seq('a, 'b, 'c)))
+    assertEqual("(a as b, b as c)", CreateStruct(Seq('a as 'b, 'b as 'c)))
   }
 
   test("scalar sub-query") {

http://git-wip-us.apache.org/repos/asf/spark/blob/a0b92f73/sql/core/src/test/resources/sql-tests/inputs/struct.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/struct.sql b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
new file mode 100644
index 0000000..e56344d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
@@ -0,0 +1,20 @@
+CREATE TEMPORARY VIEW tbl_x AS VALUES
+  (1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
+  (2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
+  (3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
+  AS T(ID, ST);
+
+-- Create a struct
+SELECT STRUCT('alpha', 'beta') ST;
+
+-- Create a struct with aliases
+SELECT STRUCT('alpha' AS A, 'beta' AS B) ST;
+
+-- Star expansion in a struct.
+SELECT ID, STRUCT(ST.*) NST FROM tbl_x;
+
+-- Append a column to a struct
+SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;
+
+-- Prepend a column to a struct
+SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;

http://git-wip-us.apache.org/repos/asf/spark/blob/a0b92f73/sql/core/src/test/resources/sql-tests/results/struct.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
new file mode 100644
index 0000000..3e32f46
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -0,0 +1,60 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 6
+
+
+-- !query 0
+CREATE TEMPORARY VIEW tbl_x AS VALUES
+  (1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
+  (2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
+  (3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
+  AS T(ID, ST)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT STRUCT('alpha', 'beta') ST
+-- !query 1 schema
+struct<ST:struct<col1:string,col2:string>>
+-- !query 1 output
+{"col1":"alpha","col2":"beta"}
+
+
+-- !query 2
+SELECT STRUCT('alpha' AS A, 'beta' AS B) ST
+-- !query 2 schema
+struct<ST:struct<A:string,B:string>>
+-- !query 2 output
+{"A":"alpha","B":"beta"}
+
+
+-- !query 3
+SELECT ID, STRUCT(ST.*) NST FROM tbl_x
+-- !query 3 schema
+struct<ID:int,NST:struct<C:string,D:string>>
+-- !query 3 output
+1	{"C":"gamma","D":"delta"}
+2	{"C":"epsilon","D":"eta"}
+3	{"C":"theta","D":"iota"}
+
+
+-- !query 4
+SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x
+-- !query 4 schema
+struct<ID:int,NST:struct<C:string,D:string,E:string>>
+-- !query 4 output
+1	{"C":"gamma","D":"delta","E":"1"}
+2	{"C":"epsilon","D":"eta","E":"2"}
+3	{"C":"theta","D":"iota","E":"3"}
+
+
+-- !query 5
+SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x
+-- !query 5 schema
+struct<ID:int,NST:struct<AA:string,C:string,D:string>>
+-- !query 5 output
+1	{"AA":"1","C":"gamma","D":"delta"}
+2	{"AA":"2","C":"epsilon","D":"eta"}
+3	{"AA":"3","C":"theta","D":"iota"}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org