You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2021/01/05 23:49:30 UTC
[spark] branch branch-3.1 updated: [SPARK-34012][SQL] Keep behavior
consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere`
is true with migration guide
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new d729158 [SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
d729158 is described below
commit d7291582ebaf815f89474c76d8a35b49172b1ecf
Author: angerszhu <an...@gmail.com>
AuthorDate: Wed Jan 6 08:48:24 2021 +0900
[SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
### What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate
But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true` .
This PR fix this issue and add UT.
### Why are the changes needed?
Keep consistent behavior of migration guide.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
added UT
Closes #31039 from AngersZhuuuu/SPARK-25780-Follow-up.
Authored-by: angerszhu <an...@gmail.com>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
(cherry picked from commit e279ed304475a6d5a9fbf739fe9ed32ef58171cb)
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../spark/sql/catalyst/parser/AstBuilder.scala | 6 ++-
.../test/resources/sql-tests/inputs/group-by.sql | 10 ++++
.../resources/sql-tests/results/group-by.sql.out | 63 +++++++++++++++++++++-
3 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a22383c..9d74ac9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -714,7 +714,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
val withProject = if (aggregationClause == null && havingClause != null) {
if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) {
// If the legacy conf is set, treat HAVING without GROUP BY as WHERE.
- withHavingClause(havingClause, createProject())
+ val predicate = expression(havingClause.booleanExpression) match {
+ case p: Predicate => p
+ case e => Cast(e, BooleanType)
+ }
+ Filter(predicate, createProject())
} else {
// According to SQL standard, HAVING without GROUP BY means global aggregate.
withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 81e2204..6ee1014 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -86,6 +86,16 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0;
SELECT id FROM range(10) HAVING id > 0;
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
+
-- Test data
CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
(1, true), (1, false),
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 75bda87..cc07cd6 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 57
+-- Number of queries: 62
-- !query
@@ -278,6 +278,67 @@ grouping expressions sequence is empty, and '`id`' is not an aggregate function.
-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere true
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING true
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [(max(`id`) > CAST(0 AS BIGINT))]
+Invalid expressions: [max(`id`)]
+
+
+-- !query
+SELECT id FROM range(10) HAVING id > 0
+-- !query schema
+struct<id:bigint>
+-- !query output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere false
+
+
+-- !query
CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
(1, true), (1, false),
(2, true),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org