You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/03 19:15:51 UTC

[spark] branch branch-3.4 updated: [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new e140bf719e3 [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]
e140bf719e3 is described below

commit e140bf719e3e8d7347f5d00b2ebaf77d6a5b2210
Author: Jiaan Geng <be...@163.com>
AuthorDate: Sat Jun 3 22:15:15 2023 +0300

    [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc]
    
    ### What changes were proposed in this pull request?
    This PR used to backport https://github.com/apache/spark/pull/41436 to 3.4
    
    ### Why are the changes needed?
    Fix the bug doesn't display column's sql for Percentile[Cont|Disc].
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'.
    Users could see the correct sql information.
    
    ### How was this patch tested?
    Test cases updated.
    
    Closes #41445 from beliefer/SPARK-43956_followup.
    
    Authored-by: Jiaan Geng <be...@163.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../expressions/aggregate/percentiles.scala        |  4 ++--
 .../sql-tests/results/percentiles.sql.out          | 24 +++++++++++-----------
 .../results/postgreSQL/aggregates_part4.sql.out    |  8 ++++----
 .../udf/postgreSQL/udf-aggregates_part4.sql.out    |  8 ++++----
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index 81bc7e51499..8447a5f9b51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -368,7 +368,7 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean
   override def sql(isDistinct: Boolean): String = {
     val distinct = if (isDistinct) "DISTINCT " else ""
     val direction = if (reverse) " DESC" else ""
-    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
   }
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): PercentileCont =
@@ -408,7 +408,7 @@ case class PercentileDisc(
   override def sql(isDistinct: Boolean): String = {
     val distinct = if (isDistinct) "DISTINCT " else ""
     val direction = if (reverse) " DESC" else ""
-    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
   }
 
   override protected def withNewChildrenInternal(
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
index 38319875c71..cd99ded56bf 100644
--- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -144,7 +144,7 @@ SELECT
 FROM basic_pays
 ORDER BY salary
 -- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...]
 -- !query output
 Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
 Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
@@ -344,7 +344,7 @@ FROM basic_pays
 WINDOW w AS (PARTITION BY department)
 ORDER BY salary
 -- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...]
 -- !query output
 Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
 Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
@@ -380,7 +380,7 @@ WHERE salary > 8900
 WINDOW w AS (PARTITION BY department)
 ORDER BY salary
 -- !query schema
-struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER  [...]
+struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GRO [...]
 -- !query output
 Jeff Firrelli	Accounting	8992	9998.0	9998.0	9998.0	9998.0	9998.0
 Julie Firrelli	Sales	9181	9441.0	9441.0	9441.0	9441.0	9441.0
@@ -594,7 +594,7 @@ SELECT
   percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC)
 FROM intervals
 -- !query schema
-struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month>
+struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
 -- !query output
 0-10	2-6
 
@@ -608,7 +608,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
 -- !query output
 0	0 00:00:10.000000000	0 00:00:30.000000000
 1	0 00:00:12.500000000	0 00:00:17.500000000
@@ -626,7 +626,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
 -- !query output
 0	0 00:10:00.000000000	0 00:30:00.000000000
 1	0 00:12:30.000000000	0 00:17:30.000000000
@@ -641,7 +641,7 @@ SELECT
   percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC)
 FROM intervals
 -- !query schema
-struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month>
+struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
 -- !query output
 0-10	2-6
 
@@ -655,7 +655,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
 -- !query output
 0	0 00:00:10.000000000	0 00:00:30.000000000
 1	0 00:00:10.000000000	0 00:00:20.000000000
@@ -673,7 +673,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second>
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
 -- !query output
 0	0 00:10:00.000000000	0 00:30:00.000000000
 1	0 00:10:00.000000000	0 00:20:00.000000000
@@ -689,7 +689,7 @@ SELECT
   percentile_cont(0.5) WITHIN GROUP (ORDER BY dt)
 FROM intervals
 -- !query schema
-struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval year to month>
+struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt):interval year to month>
 -- !query output
 1-8	1-8	1-8
 
@@ -704,7 +704,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second>
+struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY ym):interval day to second>
 -- !query output
 0	0 00:00:20.000000000	0 00:00:20.000000000	0 00:00:20.000000000
 1	0 00:00:15.000000000	0 00:00:15.000000000	0 00:00:15.000000000
@@ -723,7 +723,7 @@ FROM intervals
 GROUP BY k
 ORDER BY k
 -- !query schema
-struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second>
+struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2):interval day to second>
 -- !query output
 0	0 00:20:00.000000000	0 00:20:00.000000000	0 00:20:00.000000000
 1	0 00:15:00.000000000	0 00:15:00.000000000	0 00:15:00.000000000
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
index 45f00b36f16..1aaa514eb13 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
@@ -2,7 +2,7 @@
 -- !query
 select percentile_cont(0.5) within group (order by b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
 -- !query output
 53.44850015640259
 
@@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
 -- !query output
 53.44850015640259	431.77260909229517
 
@@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
 -- !query
 select percentile_cont(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.5
 
@@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_disc(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.0
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
index 45f00b36f16..1aaa514eb13 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
@@ -2,7 +2,7 @@
 -- !query
 select percentile_cont(0.5) within group (order by b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
 -- !query output
 53.44850015640259
 
@@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
 -- !query output
 53.44850015640259	431.77260909229517
 
@@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
 -- !query
 select percentile_cont(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.5
 
@@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_disc(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.0


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org