You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/05 18:42:25 UTC

[spark] branch master updated: [SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1272df2  [SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases
1272df2 is described below

commit 1272df29fe483cecdc3f592d0a7a84b60cb3c558
Author: Peter Toth <pe...@gmail.com>
AuthorDate: Fri Jul 5 11:42:01 2019 -0700

    [SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases
    
    ## What changes were proposed in this pull request?
    
    This PR adds some more WITH test cases as a follow-up to https://github.com/apache/spark/pull/24842
    
    ## How was this patch tested?
    
    Add new UTs.
    
    Closes #24949 from peter-toth/SPARK-28002-follow-up.
    
    Authored-by: Peter Toth <pe...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     |   7 +-
 .../sql/catalyst/parser/PlanParserSuite.scala      |   2 +-
 .../src/test/resources/sql-tests/inputs/cte.sql    |  18 +++
 .../test/resources/sql-tests/results/cte.sql.out   | 138 ++++++++++++++-------
 4 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5eef8db..6c5ad55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -129,7 +129,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       (namedQuery.alias, namedQuery)
     }
     // Check for duplicate names.
-    checkDuplicateKeys(ctes, ctx)
+    val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys
+    if (duplicates.nonEmpty) {
+      throw new ParseException(
+        s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.",
+        ctx)
+    }
     With(plan, ctes)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d48da4a..fb245ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -100,7 +100,7 @@ class PlanParserSuite extends AnalysisTest {
         "cte2" -> ((table("cte1").select(star()), Seq.empty))))
     intercept(
       "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
-      "Found duplicate keys 'cte1'")
+      "CTE definition can't have duplicate names: 'cte1'.")
   }
 
   test("simple select query") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
index ac448eb..d0e145c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
@@ -31,6 +31,24 @@ FROM   CTE1 t1
 WITH t(x) AS (SELECT 1)
 SELECT * FROM t WHERE x = 1;
 
+-- CTE with multiple column aliases
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2;
+
+-- CTE with duplicate column aliases
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t;
+
+-- CTE with empty column alias list is not allowed
+WITH t() AS (SELECT 1)
+SELECT * FROM t;
+
+-- CTEs with duplicate names are not allowed
+WITH
+  t(x) AS (SELECT 1),
+  t(x) AS (SELECT 2)
+SELECT * FROM t;
+
 -- CTE in CTE definition
 WITH t as (
   WITH t2 AS (SELECT 1)
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index b89e29f..9e90908 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 27
 
 
 -- !query 0
@@ -108,40 +108,94 @@ struct<x:int>
 
 
 -- !query 9
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2
+-- !query 9 schema
+struct<x:int,y:int>
+-- !query 9 output
+1	2
+
+
+-- !query 10
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t
+-- !query 10 schema
+struct<x:int,x:int>
+-- !query 10 output
+1	2
+
+
+-- !query 11
+WITH t() AS (SELECT 1)
+SELECT * FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'WITH t()'(line 1, pos 7)
+
+== SQL ==
+WITH t() AS (SELECT 1)
+-------^^^
+SELECT * FROM t
+
+
+-- !query 12
+WITH
+  t(x) AS (SELECT 1),
+  t(x) AS (SELECT 2)
+SELECT * FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+CTE definition can't have duplicate names: 't'.(line 1, pos 0)
+
+== SQL ==
+WITH
+^^^
+  t(x) AS (SELECT 1),
+  t(x) AS (SELECT 2)
+SELECT * FROM t
+
+
+-- !query 13
 WITH t as (
   WITH t2 AS (SELECT 1)
   SELECT * FROM t2
 )
 SELECT * FROM t
--- !query 9 schema
+-- !query 13 schema
 struct<1:int>
--- !query 9 output
+-- !query 13 output
 1
 
 
--- !query 10
+-- !query 14
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 10 schema
+-- !query 14 schema
 struct<max(c):int>
--- !query 10 output
+-- !query 14 output
 1
 
 
--- !query 11
+-- !query 15
 SELECT (
   WITH t AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 11 schema
+-- !query 15 schema
 struct<scalarsubquery():int>
--- !query 11 output
+-- !query 15 output
 1
 
 
--- !query 12
+-- !query 16
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -149,13 +203,13 @@ WITH
     SELECT * FROM t
   )
 SELECT * FROM t2
--- !query 12 schema
+-- !query 16 schema
 struct<1:int>
--- !query 12 output
+-- !query 16 output
 1
 
 
--- !query 13
+-- !query 17
 WITH
   t(c) AS (SELECT 1),
   t2 AS (
@@ -167,13 +221,13 @@ WITH
     )
   )
 SELECT * FROM t2
--- !query 13 schema
+-- !query 17 schema
 struct<scalarsubquery():int>
--- !query 13 output
+-- !query 17 output
 1
 
 
--- !query 14
+-- !query 18
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -185,25 +239,25 @@ WITH
     SELECT * FROM t2
   )
 SELECT * FROM t2
--- !query 14 schema
+-- !query 18 schema
 struct<2:int>
--- !query 14 output
+-- !query 18 output
 2
 
 
--- !query 15
+-- !query 19
 WITH t(c) AS (SELECT 1)
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 15 schema
+-- !query 19 schema
 struct<max(c):int>
--- !query 15 output
+-- !query 19 output
 2
 
 
--- !query 16
+-- !query 20
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   SELECT max(c) AS c FROM (
@@ -211,13 +265,13 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 16 schema
+-- !query 20 schema
 struct<sum(c):bigint>
--- !query 16 output
+-- !query 20 output
 2
 
 
--- !query 17
+-- !query 21
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   WITH t(c) AS (SELECT 2)
@@ -226,25 +280,25 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 17 schema
+-- !query 21 schema
 struct<sum(c):bigint>
--- !query 17 output
+-- !query 21 output
 3
 
 
--- !query 18
+-- !query 22
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 18 schema
+-- !query 22 schema
 struct<scalarsubquery():int>
--- !query 18 output
+-- !query 22 output
 1
 
 
--- !query 19
+-- !query 23
 WITH t AS (SELECT 1)
 SELECT (
   SELECT (
@@ -252,13 +306,13 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 19 schema
+-- !query 23 schema
 struct<scalarsubquery():int>
--- !query 19 output
+-- !query 23 output
 1
 
 
--- !query 20
+-- !query 24
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
@@ -267,23 +321,23 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 20 schema
+-- !query 24 schema
 struct<scalarsubquery():int>
--- !query 20 output
+-- !query 24 output
 1
 
 
--- !query 21
+-- !query 25
 DROP VIEW IF EXISTS t
--- !query 21 schema
+-- !query 25 schema
 struct<>
--- !query 21 output
+-- !query 25 output
 
 
 
--- !query 22
+-- !query 26
 DROP VIEW IF EXISTS t2
--- !query 22 schema
+-- !query 26 schema
 struct<>
--- !query 22 output
+-- !query 26 output
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org