You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/07/28 05:36:34 UTC
[spark] branch branch-3.2 updated: [SPARK-36275][SQL]
ResolveAggregateFunctions should works with nested fields
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 993ffaf [SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields
993ffaf is described below
commit 993ffafc3e2e7f5b2ce7057a9fec9b061153c462
Author: allisonwang-db <al...@databricks.com>
AuthorDate: Wed Jul 28 13:35:17 2021 +0800
[SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields
### What changes were proposed in this pull request?
This PR fixes an issue in `ResolveAggregateFunctions` where non-aggregated nested fields in ORDER BY and HAVING are not resolved correctly. This is because nested fields are resolved as aliases that fail to be semantically equal to any grouping/aggregate expressions.
### Why are the changes needed?
To fix an analyzer issue.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests.
Closes #33498 from allisonwang-db/spark-36275-resolve-agg-func.
Authored-by: allisonwang-db <al...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 23a6ffa5dc6d2330ea1c3e2b0890328e7d2d0f5d)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--
.../sql/catalyst/analysis/AnalysisSuite.scala | 26 ++++++++++++++++++++++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ee7b342..6e571e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2553,8 +2553,10 @@ class Analyzer(override val catalogManager: CatalogManager)
// a table `t` has two columns `c1` and `c2`, for query `SELECT ... FROM t
// GROUP BY c1 HAVING c2 = 0`, even though we can resolve column `c2` here, we
// should undo it later and fail with "Column c2 not found".
- agg.child.resolve(u.nameParts, resolver).map(TempResolvedColumn(_, u.nameParts))
- .getOrElse(u)
+ agg.child.resolve(u.nameParts, resolver).map({
+ case a: Alias => TempResolvedColumn(a.child, u.nameParts)
+ case o => TempResolvedColumn(o, u.nameParts)
+ }).getOrElse(u)
} catch {
case _: AnalysisException => u
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 5cef243..6ddc6b7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1115,4 +1115,30 @@ class AnalysisSuite extends AnalysisTest with Matchers {
Seq("grouping_id() can only be used with GroupingSets/Cube/Rollup"),
false)
}
+
+ test("SPARK-36275: Resolve aggregate functions should work with nested fields") {
+ assertAnalysisSuccess(parsePlan(
+ """
+ |SELECT c.x, SUM(c.y)
+ |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+ |GROUP BY c.x
+ |HAVING c.x > 1
+ |""".stripMargin))
+
+ assertAnalysisSuccess(parsePlan(
+ """
+ |SELECT c.x, SUM(c.y)
+ |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+ |GROUP BY c.x
+ |ORDER BY c.x
+ |""".stripMargin))
+
+ assertAnalysisError(parsePlan(
+ """
+ |SELECT c.x
+ |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
+ |GROUP BY c.x
+ |ORDER BY c.x + c.y
+ |""".stripMargin), "cannot resolve 'c.y' given input columns: [x]" :: Nil)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org