You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2021/01/06 11:55:39 UTC
[spark] branch branch-2.4 updated: [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sqllegacy.parser.havingWithoutGroupByAsWhere` is true with migration guide

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new d442146  [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sqllegacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
d442146 is described below

commit d442146964a981dd7f074c4954f7fed2752124e8
Author: angerszhu <an...@gmail.com>
AuthorDate: Wed Jan 6 20:54:47 2021 +0900

    [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sqllegacy.parser.havingWithoutGroupByAsWhere` is true with migration guide
    
    ### What changes were proposed in this pull request?
    In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate
    But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true`   .
    This PR fix this issue and add UT.
    
    NOTE: This backport comes from #31039
    
    ### Why are the changes needed?
    Keep consistent behavior of migration guide.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    added UT
    
    Closes #31050 from AngersZhuuuu/SPARK-34012-2.4.
    
    Authored-by: angerszhu <an...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  6 ++-
 .../test/resources/sql-tests/inputs/group-by.sql   | 10 ++++
 .../resources/sql-tests/results/group-by.sql.out   | 60 +++++++++++++++++++++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 90e7d1c..4c4e4f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -467,7 +467,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
         val withProject = if (aggregation == null && having != null) {
           if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) {
             // If the legacy conf is set, treat HAVING without GROUP BY as WHERE.
-            withHaving(having, createProject())
+            val predicate = expression(having) match {
+              case p: Predicate => p
+              case e => Cast(e, BooleanType)
+            }
+            Filter(predicate, createProject())
           } else {
             // According to SQL standard, HAVING without GROUP BY means global aggregate.
             withHaving(having, Aggregate(Nil, namedExpressions, withFilter))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 433db71..0c40a8c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -80,3 +80,13 @@ SELECT 1 FROM range(10) HAVING true;
 SELECT 1 FROM range(10) HAVING MAX(id) > 0;
 
 SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index f9d1ee8..d23a58a 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
+-- Number of queries: 35
 
 
 -- !query 0
@@ -275,3 +275,61 @@ struct<>
 -- !query 29 output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 30
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query 30 schema
+struct<key:string,value:string>
+-- !query 30 output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	true
+
+
+-- !query 31
+SELECT 1 FROM range(10) HAVING true
+-- !query 31 schema
+struct<1:int>
+-- !query 31 output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query 32
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query 32 schema
+struct<>
+-- !query 32 output
+java.lang.UnsupportedOperationException
+Cannot evaluate expression: max(input[0, bigint, false])
+
+
+-- !query 33
+SELECT id FROM range(10) HAVING id > 0
+-- !query 33 schema
+struct<id:bigint>
+-- !query 33 output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 34
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query 34 schema
+struct<key:string,value:string>
+-- !query 34 output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	false


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org