You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/07/07 12:04:38 UTC
spark git commit: [SPARK-21335][SQL] support un-aliased subquery
Repository: spark
Updated Branches:
refs/heads/master 56536e999 -> fef081309
[SPARK-21335][SQL] support un-aliased subquery
## What changes were proposed in this pull request?
un-aliased subquery is supported by Spark SQL for a long time. Its semantic was not well defined and had confusing behaviors, and it's not a standard SQL syntax, so we disallowed it in https://issues.apache.org/jira/browse/SPARK-20690 .
However, this is a breaking change, and we do have existing queries using un-aliased subquery. We should add the support back and fix its semantic.
This PR fixes the un-aliased subquery by assigning a default alias name.
After this PR, there is no syntax change from branch 2.2 to master, but we invalid a weird use case:
`SELECT v.i from (SELECT i FROM v)`. Now this query will throw analysis exception because users should not be able to use the qualifier inside a subquery.
## How was this patch tested?
new regression test
Author: Wenchen Fan <we...@databricks.com>
Closes #18559 from cloud-fan/sub-query.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fef08130
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fef08130
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fef08130
Branch: refs/heads/master
Commit: fef081309fc28efe8e136f363d85d7ccd9466e61
Parents: 56536e9
Author: Wenchen Fan <we...@databricks.com>
Authored: Fri Jul 7 20:04:30 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Fri Jul 7 20:04:30 2017 +0800
----------------------------------------------------------------------
.../spark/sql/catalyst/parser/AstBuilder.scala | 16 ++--
.../catalyst/plans/logical/LogicalPlan.scala | 2 +-
.../sql/catalyst/parser/PlanParserSuite.scala | 13 ----
.../resources/sql-tests/inputs/group-by.sql | 2 +-
.../test/resources/sql-tests/inputs/limit.sql | 2 +-
.../sql-tests/inputs/string-functions.sql | 2 +-
.../subquery/in-subquery/in-set-operations.sql | 2 +-
.../negative-cases/invalid-correlation.sql | 2 +-
.../scalar-subquery-predicate.sql | 2 +-
.../test/resources/sql-tests/inputs/union.sql | 4 +-
.../results/columnresolution-negative.sql.out | 16 ++--
.../sql-tests/results/group-by.sql.out | 2 +-
.../resources/sql-tests/results/limit.sql.out | 2 +-
.../sql-tests/results/string-functions.sql.out | 6 +-
.../in-subquery/in-set-operations.sql.out | 2 +-
.../negative-cases/invalid-correlation.sql.out | 2 +-
.../scalar-subquery-predicate.sql.out | 2 +-
.../results/subquery/subquery-in-from.sql.out | 20 +----
.../resources/sql-tests/results/union.sql.out | 4 +-
.../org/apache/spark/sql/CachedTableSuite.scala | 82 +++++++-------------
.../org/apache/spark/sql/SQLQuerySuite.scala | 13 ++++
.../org/apache/spark/sql/SubquerySuite.scala | 8 +-
22 files changed, 83 insertions(+), 123 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b6a4686..4d72590 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -751,15 +751,17 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* hooks.
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
- // The unaliased subqueries in the FROM clause are disallowed. Instead of rejecting it in
- // parser rules, we handle it here in order to provide better error message.
- if (ctx.strictIdentifier == null) {
- throw new ParseException("The unaliased subqueries in the FROM clause are not supported.",
- ctx)
+ val alias = if (ctx.strictIdentifier == null) {
+ // For un-aliased subqueries, use a default alias name that is not likely to conflict with
+ // normal subquery names, so that parent operators can only access the columns in subquery by
+ // unqualified names. Users can still use this special qualifier to access columns if they
+ // know it, but that's not recommended.
+ "__auto_generated_subquery_name"
+ } else {
+ ctx.strictIdentifier.getText
}
- aliasPlan(ctx.strictIdentifier,
- plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
+ SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 8649603..9b440cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -253,7 +253,7 @@ abstract class LogicalPlan
// More than one match.
case ambiguousReferences =>
- val referenceNames = ambiguousReferences.map(_._1).mkString(", ")
+ val referenceNames = ambiguousReferences.map(_._1.qualifiedName).mkString(", ")
throw new AnalysisException(
s"Reference '$name' is ambiguous, could be: $referenceNames.")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 5b2573f..6dad097 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -450,19 +450,6 @@ class PlanParserSuite extends AnalysisTest {
| (select id from t0)) as u_1
""".stripMargin,
plan.union(plan).union(plan).as("u_1").select('id))
-
- }
-
- test("aliased subquery") {
- val errMsg = "The unaliased subqueries in the FROM clause are not supported"
-
- assertEqual("select a from (select id as a from t0) tt",
- table("t0").select('id.as("a")).as("tt").select('a))
- intercept("select a from (select id as a from t0)", errMsg)
-
- assertEqual("from (select id as a from t0) tt select a",
- table("t0").select('id.as("a")).as("tt").select('a))
- intercept("from (select id as a from t0) select a", errMsg)
}
test("scalar sub-query") {
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index bc21207..1e13845 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -34,7 +34,7 @@ SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a),
FROM testData;
-- Aggregate with foldable input and multiple distinct groups.
-SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a;
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
-- Aliases in SELECT could be used in GROUP BY
SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/limit.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql
index df555bd..f21912a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql
@@ -21,7 +21,7 @@ SELECT * FROM testdata LIMIT true;
SELECT * FROM testdata LIMIT 'a';
-- limit within a subquery
-SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3;
+SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3;
-- limit ALL
SELECT * FROM testdata WHERE key < 3 LIMIT ALL;
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 20c0390..c95f481 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -7,7 +7,7 @@ select 'a' || 'b' || 'c';
-- Check if catalyst combine nested `Concat`s
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t;
+FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10));
-- replace function
select replace('abc', 'b', '123');
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
index 42f84e9..5c371d2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
@@ -394,7 +394,7 @@ FROM (SELECT *
FROM t1)) t4
WHERE t4.t2b IN (SELECT Min(t3b)
FROM t3
- WHERE t4.t2a = t3a)) T;
+ WHERE t4.t2a = t3a));
-- UNION, UNION ALL, UNION DISTINCT, INTERSECT and EXCEPT for NOT IN
-- TC 01.12
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
index f3f0c76..e22cade 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
@@ -23,7 +23,7 @@ AND t2b = (SELECT max(avg)
FROM (SELECT t2b, avg(t2b) avg
FROM t2
WHERE t2a = t1.t1b
- ) T
+ )
)
;
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index dbe8d76..fb0d07f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -19,7 +19,7 @@ AND c.cv = (SELECT max(avg)
FROM (SELECT c1.cv, avg(c1.cv) avg
FROM c c1
WHERE c1.ck = p.pk
- GROUP BY c1.cv) T);
+ GROUP BY c1.cv));
create temporary view t1 as select * from values
('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/inputs/union.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
index 63bc044..e57d69e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/union.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -5,7 +5,7 @@ CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2);
SELECT *
FROM (SELECT * FROM t1
UNION ALL
- SELECT * FROM t1) T;
+ SELECT * FROM t1);
-- Type Coerced Union
SELECT *
@@ -13,7 +13,7 @@ FROM (SELECT * FROM t1
UNION ALL
SELECT * FROM t2
UNION ALL
- SELECT * FROM t2) T;
+ SELECT * FROM t2);
-- Regression test for SPARK-18622
SELECT a
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 9e60e59..b5a4f5c 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -72,7 +72,7 @@ SELECT i1 FROM t1, mydb1.t1
struct<>
-- !query 8 output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 9
@@ -81,7 +81,7 @@ SELECT t1.i1 FROM t1, mydb1.t1
struct<>
-- !query 9 output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 10
@@ -99,7 +99,7 @@ SELECT i1 FROM t1, mydb2.t1
struct<>
-- !query 11 output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 12
@@ -108,7 +108,7 @@ SELECT t1.i1 FROM t1, mydb2.t1
struct<>
-- !query 12 output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 13
@@ -125,7 +125,7 @@ SELECT i1 FROM t1, mydb1.t1
struct<>
-- !query 14 output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 15
@@ -134,7 +134,7 @@ SELECT t1.i1 FROM t1, mydb1.t1
struct<>
-- !query 15 output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 16
@@ -143,7 +143,7 @@ SELECT i1 FROM t1, mydb2.t1
struct<>
-- !query 16 output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 17
@@ -152,7 +152,7 @@ SELECT t1.i1 FROM t1, mydb2.t1
struct<>
-- !query 17 output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
-- !query 18
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index e23ebd4..986bb01 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -134,7 +134,7 @@ struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min
-- !query 14
-SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
-- !query 14 schema
struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
-- !query 14 output
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/limit.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
index afdd6df..146abe6 100644
--- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
@@ -93,7 +93,7 @@ The limit expression must be integer type, but got string;
-- !query 10
-SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3
+SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3
-- !query 10 schema
struct<id:bigint>
-- !query 10 output
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 52eb554..b0ae9d7 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -30,20 +30,20 @@ abc
-- !query 3
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t
+FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
-- !query 3 schema
struct<plan:string>
-- !query 3 output
== Parsed Logical Plan ==
'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x]
-+- 'SubqueryAlias t
++- 'SubqueryAlias __auto_generated_subquery_name
+- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x]
+- 'UnresolvedTableValuedFunction range, [10]
== Analyzed Logical Plan ==
col: string
Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x]
-+- SubqueryAlias t
++- SubqueryAlias __auto_generated_subquery_name
+- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL]
+- Range (0, 10, step=1, splits=None)
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
index 5780f49..e06f920 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
@@ -496,7 +496,7 @@ FROM (SELECT *
FROM t1)) t4
WHERE t4.t2b IN (SELECT Min(t3b)
FROM t3
- WHERE t4.t2a = t3a)) T
+ WHERE t4.t2a = t3a))
-- !query 13 schema
struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(2,-2),t2h:timestamp,t2i:date>
-- !query 13 output
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index ca3930b..e4b1a2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -40,7 +40,7 @@ AND t2b = (SELECT max(avg)
FROM (SELECT t2b, avg(t2b) avg
FROM t2
WHERE t2a = t1.t1b
- ) T
+ )
)
-- !query 3 schema
struct<>
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index 1d5dddc..8b29300 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -39,7 +39,7 @@ AND c.cv = (SELECT max(avg)
FROM (SELECT c1.cv, avg(c1.cv) avg
FROM c c1
WHERE c1.ck = p.pk
- GROUP BY c1.cv) T)
+ GROUP BY c1.cv))
-- !query 3 schema
struct<pk:int,cv:int>
-- !query 3 output
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
index 1455355..50370df 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
@@ -37,26 +37,14 @@ struct<key:int,value:string>
-- !query 4
SELECT * FROM (SELECT * FROM testData) WHERE key = 1
-- !query 4 schema
-struct<>
+struct<key:int,value:string>
-- !query 4 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-The unaliased subqueries in the FROM clause are not supported.(line 1, pos 14)
-
-== SQL ==
-SELECT * FROM (SELECT * FROM testData) WHERE key = 1
---------------^^^
+1 1
-- !query 5
FROM (SELECT * FROM testData WHERE key = 1) SELECT *
-- !query 5 schema
-struct<>
+struct<key:int,value:string>
-- !query 5 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-The unaliased subqueries in the FROM clause are not supported.(line 1, pos 5)
-
-== SQL ==
-FROM (SELECT * FROM testData WHERE key = 1) SELECT *
------^^^
+1 1
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/resources/sql-tests/results/union.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index 865b3ae..d123b7f 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -22,7 +22,7 @@ struct<>
SELECT *
FROM (SELECT * FROM t1
UNION ALL
- SELECT * FROM t1) T
+ SELECT * FROM t1)
-- !query 2 schema
struct<c1:int,c2:string>
-- !query 2 output
@@ -38,7 +38,7 @@ FROM (SELECT * FROM t1
UNION ALL
SELECT * FROM t2
UNION ALL
- SELECT * FROM t2) T
+ SELECT * FROM t2)
-- !query 3 schema
struct<c1:decimal(11,1),c2:string>
-- !query 3 output
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 506cc25..3e4f619 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -631,13 +631,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
val ds2 =
sql(
"""
- |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
+ |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1)
|WHERE
- |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
+ |c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
|OR
|EXISTS (SELECT c1 FROM t3)
|OR
- |tt.c1 IN (SELECT c1 FROM t4)
+ |c1 IN (SELECT c1 FROM t4)
""".stripMargin)
assert(getNumInMemoryRelations(ds2) == 4)
}
@@ -683,20 +683,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
Seq(1).toDF("c1").createOrReplaceTempView("t1")
Seq(2).toDF("c1").createOrReplaceTempView("t2")
- sql(
+ val sql1 =
"""
|SELECT * FROM t1
|WHERE
|NOT EXISTS (SELECT * FROM t2)
- """.stripMargin).cache()
+ """.stripMargin
+ sql(sql1).cache()
- val cachedDs =
- sql(
- """
- |SELECT * FROM t1
- |WHERE
- |NOT EXISTS (SELECT * FROM t2)
- """.stripMargin)
+ val cachedDs = sql(sql1)
assert(getNumInMemoryRelations(cachedDs) == 1)
// Additional predicate in the subquery plan should cause a cache miss
@@ -717,20 +712,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
Seq(1).toDF("c1").createOrReplaceTempView("t2")
// Simple correlated predicate in subquery
- sql(
+ val sqlText =
"""
|SELECT * FROM t1
|WHERE
|t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1)
- """.stripMargin).cache()
+ """.stripMargin
+ sql(sqlText).cache()
- val cachedDs =
- sql(
- """
- |SELECT * FROM t1
- |WHERE
- |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1)
- """.stripMargin)
+ val cachedDs = sql(sqlText)
assert(getNumInMemoryRelations(cachedDs) == 1)
}
}
@@ -741,22 +731,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
spark.catalog.cacheTable("t1")
// underlying table t1 is cached as well as the query that refers to it.
- val ds =
- sql(
+ val sqlText =
"""
|SELECT * FROM t1
|WHERE
|NOT EXISTS (SELECT * FROM t1)
- """.stripMargin)
+ """.stripMargin
+ val ds = sql(sqlText)
assert(getNumInMemoryRelations(ds) == 2)
- val cachedDs =
- sql(
- """
- |SELECT * FROM t1
- |WHERE
- |NOT EXISTS (SELECT * FROM t1)
- """.stripMargin).cache()
+ val cachedDs = sql(sqlText).cache()
assert(getNumInMemoryTablesRecursively(cachedDs.queryExecution.sparkPlan) == 3)
}
}
@@ -769,45 +753,31 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
Seq(1).toDF("c1").createOrReplaceTempView("t4")
// Nested predicate subquery
- sql(
+ val sql1 =
"""
|SELECT * FROM t1
|WHERE
|c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1))
- """.stripMargin).cache()
+ """.stripMargin
+ sql(sql1).cache()
- val cachedDs =
- sql(
- """
- |SELECT * FROM t1
- |WHERE
- |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1))
- """.stripMargin)
+ val cachedDs = sql(sql1)
assert(getNumInMemoryRelations(cachedDs) == 1)
// Scalar subquery and predicate subquery
- sql(
+ val sql2 =
"""
- |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
+ |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1)
|WHERE
- |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
+ |c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
|OR
|EXISTS (SELECT c1 FROM t3)
|OR
- |tt.c1 IN (SELECT c1 FROM t4)
- """.stripMargin).cache()
+ |c1 IN (SELECT c1 FROM t4)
+ """.stripMargin
+ sql(sql2).cache()
- val cachedDs2 =
- sql(
- """
- |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
- |WHERE
- |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
- |OR
- |EXISTS (SELECT c1 FROM t3)
- |OR
- |tt.c1 IN (SELECT c1 FROM t4)
- """.stripMargin)
+ val cachedDs2 = sql(sql2)
assert(getNumInMemoryRelations(cachedDs2) == 1)
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5171aae..472ff73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2638,4 +2638,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
}
}
+
+ test("SPARK-21335: support un-aliased subquery") {
+ withTempView("v") {
+ Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
+ checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1))
+
+ val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)"))
+ assert(e.message ==
+ "cannot resolve '`v.i`' given input columns: [__auto_generated_subquery_name.i]")
+
+ checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1))
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fef08130/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index c0a3b5a..7bcb419 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -112,7 +112,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
| with t4 as (select 1 as d, 3 as e)
| select * from t4 cross join t2 where t2.b = t4.d
| )
- | select a from (select 1 as a union all select 2 as a) t
+ | select a from (select 1 as a union all select 2 as a)
| where a = (select max(d) from t3)
""".stripMargin),
Array(Row(1))
@@ -606,8 +606,8 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
| select cntPlusOne + 1 as cntPlusTwo from (
| select cnt + 1 as cntPlusOne from (
| select sum(r.c) s, count(*) cnt from r where l.a = r.c having cnt = 0
- | ) t1
- | ) t2
+ | )
+ | )
|) = 2""".stripMargin),
Row(1) :: Row(1) :: Row(null) :: Row(null) :: Nil)
}
@@ -655,7 +655,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
"""
| select c1 from onerow t1
| where exists (select 1
- | from (select 1 as c1 from onerow t2 LIMIT 1) t2
+ | from (select c1 from onerow t2 LIMIT 1) t2
| where t1.c1=t2.c1)""".stripMargin),
Row(1) :: Nil)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org