You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/03 19:15:51 UTC
[spark] branch branch-3.4 updated: [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new e140bf719e3 [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]
e140bf719e3 is described below
commit e140bf719e3e8d7347f5d00b2ebaf77d6a5b2210
Author: Jiaan Geng <be...@163.com>
AuthorDate: Sat Jun 3 22:15:15 2023 +0300
[SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]
### What changes were proposed in this pull request?
This PR used to backport https://github.com/apache/spark/pull/41436 to 3.4
### Why are the changes needed?
Fix the bug doesn't display column's sql for Percentile[Cont|Disc].
### Does this PR introduce _any_ user-facing change?
'Yes'.
Users could see the correct sql information.
### How was this patch tested?
Test cases updated.
Closes #41445 from beliefer/SPARK-43956_followup.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../expressions/aggregate/percentiles.scala | 4 ++--
.../sql-tests/results/percentiles.sql.out | 24 +++++++++++-----------
.../results/postgreSQL/aggregates_part4.sql.out | 8 ++++----
.../udf/postgreSQL/udf-aggregates_part4.sql.out | 8 ++++----
4 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index 81bc7e51499..8447a5f9b51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -368,7 +368,7 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean
override def sql(isDistinct: Boolean): String = {
val distinct = if (isDistinct) "DISTINCT " else ""
val direction = if (reverse) " DESC" else ""
- s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+ s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
}
override protected def withNewChildrenInternal(
newLeft: Expression, newRight: Expression): PercentileCont =
@@ -408,7 +408,7 @@ case class PercentileDisc(
override def sql(isDistinct: Boolean): String = {
val distinct = if (isDistinct) "DISTINCT " else ""
val direction = if (reverse) " DESC" else ""
- s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+ s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
}
override protected def withNewChildrenInternal(
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
index 38319875c71..cd99ded56bf 100644
--- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -144,7 +144,7 @@ SELECT
FROM basic_pays
ORDER BY salary
-- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...]
-- !query output
Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
@@ -344,7 +344,7 @@ FROM basic_pays
WINDOW w AS (PARTITION BY department)
ORDER BY salary
-- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...]
-- !query output
Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
@@ -380,7 +380,7 @@ WHERE salary > 8900
WINDOW w AS (PARTITION BY department)
ORDER BY salary
-- !query schema
-struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER [...]
+struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GRO [...]
-- !query output
Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 9998.0
Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 9441.0
@@ -594,7 +594,7 @@ SELECT
percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC)
FROM intervals
-- !query schema
-struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month>
+struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
-- !query output
0-10 2-6
@@ -608,7 +608,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
-- !query output
0 0 00:00:10.000000000 0 00:00:30.000000000
1 0 00:00:12.500000000 0 00:00:17.500000000
@@ -626,7 +626,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
-- !query output
0 0 00:10:00.000000000 0 00:30:00.000000000
1 0 00:12:30.000000000 0 00:17:30.000000000
@@ -641,7 +641,7 @@ SELECT
percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC)
FROM intervals
-- !query schema
-struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month>
+struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
-- !query output
0-10 2-6
@@ -655,7 +655,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
-- !query output
0 0 00:00:10.000000000 0 00:00:30.000000000
1 0 00:00:10.000000000 0 00:00:20.000000000
@@ -673,7 +673,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
-- !query output
0 0 00:10:00.000000000 0 00:30:00.000000000
1 0 00:10:00.000000000 0 00:20:00.000000000
@@ -689,7 +689,7 @@ SELECT
percentile_cont(0.5) WITHIN GROUP (ORDER BY dt)
FROM intervals
-- !query schema
-struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval year to month>
+struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt):interval year to month>
-- !query output
1-8 1-8 1-8
@@ -704,7 +704,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second>
+struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY ym):interval day to second>
-- !query output
0 0 00:00:20.000000000 0 00:00:20.000000000 0 00:00:20.000000000
1 0 00:00:15.000000000 0 00:00:15.000000000 0 00:00:15.000000000
@@ -723,7 +723,7 @@ FROM intervals
GROUP BY k
ORDER BY k
-- !query schema
-struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second>
+struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2):interval day to second>
-- !query output
0 0 00:20:00.000000000 0 00:20:00.000000000 0 00:20:00.000000000
1 0 00:15:00.000000000 0 00:15:00.000000000 0 00:15:00.000000000
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
index 45f00b36f16..1aaa514eb13 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
@@ -2,7 +2,7 @@
-- !query
select percentile_cont(0.5) within group (order by b) from aggtest
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
-- !query output
53.44850015640259
@@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
-- !query
select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
-- !query output
53.44850015640259 431.77260909229517
@@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
-- !query
select percentile_cont(0.5) within group (order by thousand) from tenk1
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
-- !query output
499.5
@@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
-- !query
select percentile_disc(0.5) within group (order by thousand) from tenk1
-- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
-- !query output
499.0
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
index 45f00b36f16..1aaa514eb13 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
@@ -2,7 +2,7 @@
-- !query
select percentile_cont(0.5) within group (order by b) from aggtest
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
-- !query output
53.44850015640259
@@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
-- !query
select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
-- !query output
53.44850015640259 431.77260909229517
@@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
-- !query
select percentile_cont(0.5) within group (order by thousand) from tenk1
-- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
-- !query output
499.5
@@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
-- !query
select percentile_disc(0.5) within group (order by thousand) from tenk1
-- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
-- !query output
499.0
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org