You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/07/28 05:36:34 UTC
[spark] branch branch-3.2 updated: [SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 993ffaf  [SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields
993ffaf is described below

commit 993ffafc3e2e7f5b2ce7057a9fec9b061153c462
Author: allisonwang-db <al...@databricks.com>
AuthorDate: Wed Jul 28 13:35:17 2021 +0800

    [SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields
    
    ### What changes were proposed in this pull request?
    This PR fixes an issue in `ResolveAggregateFunctions` where non-aggregated nested fields in ORDER BY and HAVING are not resolved correctly. This is because nested fields are resolved as aliases that fail to be semantically equal to any grouping/aggregate expressions.
    
    ### Why are the changes needed?
    To fix an analyzer issue.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Unit tests.
    
    Closes #33498 from allisonwang-db/spark-36275-resolve-agg-func.
    
    Authored-by: allisonwang-db <al...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 23a6ffa5dc6d2330ea1c3e2b0890328e7d2d0f5d)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  6 +++--
 .../sql/catalyst/analysis/AnalysisSuite.scala      | 26 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ee7b342..6e571e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2553,8 +2553,10 @@ class Analyzer(override val catalogManager: CatalogManager)
               // a table `t` has two columns `c1` and `c2`, for query `SELECT ... FROM t
               // GROUP BY c1 HAVING c2 = 0`, even though we can resolve column `c2` here, we
               // should undo it later and fail with "Column c2 not found".
-              agg.child.resolve(u.nameParts, resolver).map(TempResolvedColumn(_, u.nameParts))
-                .getOrElse(u)
+              agg.child.resolve(u.nameParts, resolver).map({
+                case a: Alias => TempResolvedColumn(a.child, u.nameParts)
+                case o => TempResolvedColumn(o, u.nameParts)
+              }).getOrElse(u)
             } catch {
               case _: AnalysisException => u
             }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 5cef243..6ddc6b7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1115,4 +1115,30 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       Seq("grouping_id() can only be used with GroupingSets/Cube/Rollup"),
       false)
   }
+
+  test("SPARK-36275: Resolve aggregate functions should work with nested fields") {
+    assertAnalysisSuccess(parsePlan(
+      """
+        |SELECT c.x, SUM(c.y)
+        |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+        |GROUP BY c.x
+        |HAVING c.x > 1
+        |""".stripMargin))
+
+    assertAnalysisSuccess(parsePlan(
+      """
+        |SELECT c.x, SUM(c.y)
+        |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+        |GROUP BY c.x
+        |ORDER BY c.x
+        |""".stripMargin))
+
+    assertAnalysisError(parsePlan(
+     """
+        |SELECT c.x
+        |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+        |GROUP BY c.x
+        |ORDER BY c.x + c.y
+        |""".stripMargin), "cannot resolve 'c.y' given input columns: [x]" :: Nil)
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org