You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/10/17 05:17:08 UTC
[spark] branch branch-3.0 updated: [SPARK-33131][SQL][3.0] Fix grouping sets with having clause can not resolve qualified col name

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new fab10f0  [SPARK-33131][SQL][3.0] Fix grouping sets with having clause can not resolve qualified col name
fab10f0 is described below

commit fab10f0dd37ecc8f77d5d8115118ec0a74b59da0
Author: ulysses <yo...@weidian.com>
AuthorDate: Fri Oct 16 22:11:13 2020 -0700

    [SPARK-33131][SQL][3.0] Fix grouping sets with having clause can not resolve qualified col name
    
    This is [#30029](https://github.com/apache/spark/pull/30029) backport for branch-3.0.
    
    ### What changes were proposed in this pull request?
    
    Correct the resolution of having clause.
    
    ### Why are the changes needed?
    
    Grouping sets construct new aggregate lost the qualified name of grouping expression. Here is a example:
    ```
    -- Works resolved by `ResolveReferences`
    select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having c1 = 1
    
    -- Works because of the extra expression c1
    select c1 as c2 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1
    
    -- Failed
    select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1
    ```
    
    It wroks with `Aggregate` without grouping sets through `ResolveReferences`, but Grouping sets not works since the exprId has been changed.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, bug fix.
    
    ### How was this patch tested?
    
    add test.
    
    Closes #30077 from ulysses-you/SPARK-33131-branch-3.0.
    
    Authored-by: ulysses <yo...@weidian.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  2 +-
 .../src/test/resources/sql-tests/inputs/having.sql |  6 ++++
 .../resources/sql-tests/results/having.sql.out     | 32 ++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 729d316..bf3d4f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -457,7 +457,7 @@ class Analyzer(
      */
     private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = {
       groupByExprs.map {
-        case e: NamedExpression => Alias(e, e.name)()
+        case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier)
         case other => Alias(other, other.toString)()
       }
     }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql
index 3b75be1..2799b1a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql
@@ -24,3 +24,9 @@ SELECT SUM(a) AS b, CAST('2020-01-01' AS DATE) AS fake FROM VALUES (1, 10), (2,
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY GROUPING SETS ((b), (a, b)) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY CUBE(a, b) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) HAVING b > 10;
+
+-- SPARK-33131: Grouping sets with having clause can not resolve qualified col name.
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index 1b3ac78..6508143 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -81,3 +81,35 @@ SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b)
 struct<b:bigint>
 -- !query output
 2
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org