You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/05 18:42:25 UTC
[spark] branch master updated: [SPARK-28002][SQL][FOLLOWUP] Fix
duplicate CTE error message and add more test cases
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1272df2 [SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases
1272df2 is described below
commit 1272df29fe483cecdc3f592d0a7a84b60cb3c558
Author: Peter Toth <pe...@gmail.com>
AuthorDate: Fri Jul 5 11:42:01 2019 -0700
[SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases
## What changes were proposed in this pull request?
This PR adds some more WITH test cases as a follow-up to https://github.com/apache/spark/pull/24842
## How was this patch tested?
Add new UTs.
Closes #24949 from peter-toth/SPARK-28002-follow-up.
Authored-by: Peter Toth <pe...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../spark/sql/catalyst/parser/AstBuilder.scala | 7 +-
.../sql/catalyst/parser/PlanParserSuite.scala | 2 +-
.../src/test/resources/sql-tests/inputs/cte.sql | 18 +++
.../test/resources/sql-tests/results/cte.sql.out | 138 ++++++++++++++-------
4 files changed, 121 insertions(+), 44 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5eef8db..6c5ad55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -129,7 +129,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
(namedQuery.alias, namedQuery)
}
// Check for duplicate names.
- checkDuplicateKeys(ctes, ctx)
+ val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys
+ if (duplicates.nonEmpty) {
+ throw new ParseException(
+ s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.",
+ ctx)
+ }
With(plan, ctes)
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d48da4a..fb245ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -100,7 +100,7 @@ class PlanParserSuite extends AnalysisTest {
"cte2" -> ((table("cte1").select(star()), Seq.empty))))
intercept(
"with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
- "Found duplicate keys 'cte1'")
+ "CTE definition can't have duplicate names: 'cte1'.")
}
test("simple select query") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
index ac448eb..d0e145c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
@@ -31,6 +31,24 @@ FROM CTE1 t1
WITH t(x) AS (SELECT 1)
SELECT * FROM t WHERE x = 1;
+-- CTE with multiple column aliases
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2;
+
+-- CTE with duplicate column aliases
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t;
+
+-- CTE with empty column alias list is not allowed
+WITH t() AS (SELECT 1)
+SELECT * FROM t;
+
+-- CTEs with duplicate names are not allowed
+WITH
+ t(x) AS (SELECT 1),
+ t(x) AS (SELECT 2)
+SELECT * FROM t;
+
-- CTE in CTE definition
WITH t as (
WITH t2 AS (SELECT 1)
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index b89e29f..9e90908 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 27
-- !query 0
@@ -108,40 +108,94 @@ struct<x:int>
-- !query 9
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2
+-- !query 9 schema
+struct<x:int,y:int>
+-- !query 9 output
+1 2
+
+
+-- !query 10
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t
+-- !query 10 schema
+struct<x:int,x:int>
+-- !query 10 output
+1 2
+
+
+-- !query 11
+WITH t() AS (SELECT 1)
+SELECT * FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'WITH t()'(line 1, pos 7)
+
+== SQL ==
+WITH t() AS (SELECT 1)
+-------^^^
+SELECT * FROM t
+
+
+-- !query 12
+WITH
+ t(x) AS (SELECT 1),
+ t(x) AS (SELECT 2)
+SELECT * FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+CTE definition can't have duplicate names: 't'.(line 1, pos 0)
+
+== SQL ==
+WITH
+^^^
+ t(x) AS (SELECT 1),
+ t(x) AS (SELECT 2)
+SELECT * FROM t
+
+
+-- !query 13
WITH t as (
WITH t2 AS (SELECT 1)
SELECT * FROM t2
)
SELECT * FROM t
--- !query 9 schema
+-- !query 13 schema
struct<1:int>
--- !query 9 output
+-- !query 13 output
1
--- !query 10
+-- !query 14
SELECT max(c) FROM (
WITH t(c) AS (SELECT 1)
SELECT * FROM t
)
--- !query 10 schema
+-- !query 14 schema
struct<max(c):int>
--- !query 10 output
+-- !query 14 output
1
--- !query 11
+-- !query 15
SELECT (
WITH t AS (SELECT 1)
SELECT * FROM t
)
--- !query 11 schema
+-- !query 15 schema
struct<scalarsubquery():int>
--- !query 11 output
+-- !query 15 output
1
--- !query 12
+-- !query 16
WITH
t AS (SELECT 1),
t2 AS (
@@ -149,13 +203,13 @@ WITH
SELECT * FROM t
)
SELECT * FROM t2
--- !query 12 schema
+-- !query 16 schema
struct<1:int>
--- !query 12 output
+-- !query 16 output
1
--- !query 13
+-- !query 17
WITH
t(c) AS (SELECT 1),
t2 AS (
@@ -167,13 +221,13 @@ WITH
)
)
SELECT * FROM t2
--- !query 13 schema
+-- !query 17 schema
struct<scalarsubquery():int>
--- !query 13 output
+-- !query 17 output
1
--- !query 14
+-- !query 18
WITH
t AS (SELECT 1),
t2 AS (
@@ -185,25 +239,25 @@ WITH
SELECT * FROM t2
)
SELECT * FROM t2
--- !query 14 schema
+-- !query 18 schema
struct<2:int>
--- !query 14 output
+-- !query 18 output
2
--- !query 15
+-- !query 19
WITH t(c) AS (SELECT 1)
SELECT max(c) FROM (
WITH t(c) AS (SELECT 2)
SELECT * FROM t
)
--- !query 15 schema
+-- !query 19 schema
struct<max(c):int>
--- !query 15 output
+-- !query 19 output
2
--- !query 16
+-- !query 20
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
SELECT max(c) AS c FROM (
@@ -211,13 +265,13 @@ SELECT sum(c) FROM (
SELECT * FROM t
)
)
--- !query 16 schema
+-- !query 20 schema
struct<sum(c):bigint>
--- !query 16 output
+-- !query 20 output
2
--- !query 17
+-- !query 21
WITH t(c) AS (SELECT 1)
SELECT sum(c) FROM (
WITH t(c) AS (SELECT 2)
@@ -226,25 +280,25 @@ SELECT sum(c) FROM (
SELECT * FROM t
)
)
--- !query 17 schema
+-- !query 21 schema
struct<sum(c):bigint>
--- !query 17 output
+-- !query 21 output
3
--- !query 18
+-- !query 22
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
SELECT * FROM t
)
--- !query 18 schema
+-- !query 22 schema
struct<scalarsubquery():int>
--- !query 18 output
+-- !query 22 output
1
--- !query 19
+-- !query 23
WITH t AS (SELECT 1)
SELECT (
SELECT (
@@ -252,13 +306,13 @@ SELECT (
SELECT * FROM t
)
)
--- !query 19 schema
+-- !query 23 schema
struct<scalarsubquery():int>
--- !query 19 output
+-- !query 23 output
1
--- !query 20
+-- !query 24
WITH t AS (SELECT 1)
SELECT (
WITH t AS (SELECT 2)
@@ -267,23 +321,23 @@ SELECT (
SELECT * FROM t
)
)
--- !query 20 schema
+-- !query 24 schema
struct<scalarsubquery():int>
--- !query 20 output
+-- !query 24 output
1
--- !query 21
+-- !query 25
DROP VIEW IF EXISTS t
--- !query 21 schema
+-- !query 25 schema
struct<>
--- !query 21 output
+-- !query 25 output
--- !query 22
+-- !query 26
DROP VIEW IF EXISTS t2
--- !query 22 schema
+-- !query 26 schema
struct<>
--- !query 22 output
+-- !query 26 output
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org