You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/10/18 07:26:13 UTC
[spark] branch branch-2.4 updated: [SPARK-33131][SQL][2.4] Fix grouping sets with having clause can not resolve qualified col name

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 1d9ffad  [SPARK-33131][SQL][2.4] Fix grouping sets with having clause can not resolve qualified col name
1d9ffad is described below

commit 1d9ffadb87912ed7530ea50cf0fd1d3977b68080
Author: ulysses <yo...@weidian.com>
AuthorDate: Sun Oct 18 00:23:26 2020 -0700

    [SPARK-33131][SQL][2.4] Fix grouping sets with having clause can not resolve qualified col name
    
    This is [#30029](https://github.com/apache/spark/pull/30029) backport for branch-2.4.
    
    ### What changes were proposed in this pull request?
    
    Correct the resolution of having clause.
    
    ### Why are the changes needed?
    
    Grouping sets construct new aggregate lost the qualified name of grouping expression. Here is a example:
    ```
    -- Works resolved by `ResolveReferences`
    select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having c1 = 1
    
    -- Works because of the extra expression c1
    select c1 as c2 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1
    
    -- Failed
    select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1
    ```
    
    It wroks with `Aggregate` without grouping sets through `ResolveReferences`, but Grouping sets not works since the exprId has been changed.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, bug fix.
    
    ### How was this patch tested?
    
    add test.
    
    Closes #30075 from ulysses-you/SPARK-33131-branch-2.4.
    
    Authored-by: ulysses <yo...@weidian.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  2 +-
 .../src/test/resources/sql-tests/inputs/having.sql |  6 ++++
 .../resources/sql-tests/results/having.sql.out     | 32 ++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index aaaf707..dc40a6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -358,7 +358,7 @@ class Analyzer(
      */
     private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = {
       groupByExprs.map {
-        case e: NamedExpression => Alias(e, e.name)()
+        case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier)
         case other => Alias(other, other.toString)()
       }
     }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql
index ccc633c..5ae187a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql
@@ -24,3 +24,9 @@ SELECT SUM(a) AS b, hour('2020-01-01 12:12:12') AS fake FROM VALUES (1, 10), (2,
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY GROUPING SETS ((b), (a, b)) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY CUBE(a, b) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) HAVING b > 10;
+
+-- SPARK-33131: Grouping sets with having clause can not resolve qualified col name.
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index 79d6416..ee1956ee 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -81,3 +81,35 @@ SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b)
 struct<b:bigint>
 -- !query 8 output
 2
+
+
+-- !query 9
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query 10
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query 11
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query 12
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org