You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2021/01/05 23:49:30 UTC

[spark] branch branch-3.1 updated: [SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new d729158  [SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
d729158 is described below

commit d7291582ebaf815f89474c76d8a35b49172b1ecf
Author: angerszhu <an...@gmail.com>
AuthorDate: Wed Jan 6 08:48:24 2021 +0900

    [SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
    
    ### What changes were proposed in this pull request?
    In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate
    But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true`   .
    This PR fix this issue and add UT.
    
    ### Why are the changes needed?
    Keep consistent behavior of migration guide.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    added UT
    
    Closes #31039 from AngersZhuuuu/SPARK-25780-Follow-up.
    
    Authored-by: angerszhu <an...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit e279ed304475a6d5a9fbf739fe9ed32ef58171cb)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  6 ++-
 .../test/resources/sql-tests/inputs/group-by.sql   | 10 ++++
 .../resources/sql-tests/results/group-by.sql.out   | 63 +++++++++++++++++++++-
 3 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a22383c..9d74ac9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -714,7 +714,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val withProject = if (aggregationClause == null && havingClause != null) {
       if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) {
         // If the legacy conf is set, treat HAVING without GROUP BY as WHERE.
-        withHavingClause(havingClause, createProject())
+        val predicate = expression(havingClause.booleanExpression) match {
+          case p: Predicate => p
+          case e => Cast(e, BooleanType)
+        }
+        Filter(predicate, createProject())
       } else {
         // According to SQL standard, HAVING without GROUP BY means global aggregate.
         withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 81e2204..6ee1014 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -86,6 +86,16 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0;
 
 SELECT id FROM range(10) HAVING id > 0;
 
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
+
 -- Test data
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 75bda87..cc07cd6 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 57
+-- Number of queries: 62
 
 
 -- !query
@@ -278,6 +278,67 @@ grouping expressions sequence is empty, and '`id`' is not an aggregate function.
 
 
 -- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	true
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING true
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [(max(`id`) > CAST(0 AS BIGINT))]
+Invalid expressions: [max(`id`)]
+
+
+-- !query
+SELECT id FROM range(10) HAVING id > 0
+-- !query schema
+struct<id:bigint>
+-- !query output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	false
+
+
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),
   (2, true),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org