You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/06/03 05:23:56 UTC
[spark] branch master updated: [SPARK-39320][SQL] Support aggregate function `MEDIAN`
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN`
9e6f2dd7268 is described below
commit 9e6f2dd72686a9ac44fd4573b5a408f8a8e55fe1
Author: Jiaan Geng <be...@163.com>
AuthorDate: Fri Jun 3 08:23:22 2022 +0300
[SPARK-39320][SQL] Support aggregate function `MEDIAN`
### What changes were proposed in this pull request?
Many mainstream database supports aggregate function `MEDIAN`.
**Syntax:**
Aggregate function
`MEDIAN( <expr> )`
Window function
`MEDIAN( <expr> ) OVER ( [ PARTITION BY <expr2> ] )`
**Arguments:**
expr: The expression must evaluate to a numeric data type (INTEGER, FLOAT, DECIMAL, or equivalent).
**Examples**:
```
select k, median(v) from aggr group by k order by k;
+---+-----------+
| K | MEDIAN(V) |
|---+-----------|
| 1 | 20.00000 |
| 2 | 22.50000 |
| 3 | NULL |
+---+-----------+
```
### Why are the changes needed?
The mainstream database supports `MEDIAN` show below:
**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/median.html
**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/MEDIAN.html#GUID-DE15705A-AC18-4416-8487-B9E1D70CE01A
**ClickHouse**
https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/median
**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_MEDIAN.html
**Teradata**
https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Ordered-Analytical/Window-Aggregate-Functions/MEDIAN
**DB2**
https://www.ibm.com/docs/en/db2/11.5?topic=functions-median
**Vertica**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/MEDIANAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_____20
**H2**
http://www.h2database.com/html/functions-aggregate.html#median
**Sybase**
https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.infocenter.dc01776.1601/doc/html/san1278453109663.html
**Exasol**
https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/median.htm
**Yellowbrick**
https://www.yellowbrick.com/docs/5.2/ybd_sqlref/median.html
**Mariadb**
https://mariadb.com/kb/en/median/
**Singlestore**
https://docs.singlestore.com/db/v7.6/en/reference/sql-reference/aggregate-functions/median.html
**InfluxDB**
https://docs.influxdata.com/flux/v0.x/stdlib/universe/median/
### Does this PR introduce _any_ user-facing change?
'No'.
New feature.
### How was this patch tested?
New tests.
Closes #36714 from beliefer/SPARK-39320.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../sql/catalyst/analysis/CheckAnalysis.scala | 5 +-
.../sql/catalyst/analysis/FunctionRegistry.scala | 1 +
.../expressions/aggregate/percentiles.scala | 26 +-
.../sql-functions/sql-expression-schema.md | 1 +
.../test/resources/sql-tests/inputs/group-by.sql | 29 --
.../resources/sql-tests/inputs/percentiles.sql | 212 +++++++++++
.../src/test/resources/sql-tests/inputs/window.sql | 112 ------
.../resources/sql-tests/results/group-by.sql.out | 70 +---
.../sql-tests/results/percentiles.sql.out | 417 +++++++++++++++++++++
.../resources/sql-tests/results/window.sql.out | 229 +----------
10 files changed, 661 insertions(+), 441 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index ed2e9ba2b6b..7635918279a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, PercentileCont, PercentileDisc}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc}
import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE}
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
@@ -243,7 +243,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
// Only allow window functions with an aggregate expression or an offset window
// function or a Pandas window UDF.
w.windowFunction match {
- case agg @ AggregateExpression(_: PercentileCont | _: PercentileDisc, _, _, _, _)
+ case agg @ AggregateExpression(
+ _: PercentileCont | _: PercentileDisc | _: Median, _, _, _, _)
if w.windowSpec.orderSpec.nonEmpty || w.windowSpec.frameSpecification !=
SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) =>
failAnalysis(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 2155fb2efeb..09d3cd30cab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -477,6 +477,7 @@ object FunctionRegistry {
expression[Min]("min"),
expression[MinBy]("min_by"),
expression[Percentile]("percentile"),
+ expression[Median]("median"),
expression[Skewness]("skewness"),
expression[ApproximatePercentile]("percentile_approx"),
expression[ApproximatePercentile]("approx_percentile", true),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index d55f0b7a831..3acb2f2cc97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.types._
@@ -359,6 +359,30 @@ case class Percentile(
)
}
+@ExpressionDescription(
+ usage = "_FUNC_(col) - Returns the median of numeric or ansi interval column `col`.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(col) FROM VALUES (0), (10) AS tab(col);
+ 5.0
+ > SELECT _FUNC_(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+ 5.0
+ """,
+ group = "agg_funcs",
+ since = "3.4.0")
+case class Median(child: Expression)
+ extends AggregateFunction
+ with RuntimeReplaceableAggregate
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+ private lazy val percentile = new Percentile(child, Literal(0.5, DoubleType))
+ override def replacement: Expression = percentile
+ override def nodeName: String = "median"
+ override def inputTypes: Seq[AbstractDataType] = percentile.inputTypes.take(1)
+ override protected def withNewChildInternal(
+ newChild: Expression): Median = this.copy(child = newChild)
+}
+
/**
* Return a percentile value based on a continuous distribution of
* numeric or ansi interval column at the given percentage (specified in ORDER BY clause).
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 2d4eef39405..3bfc342939f 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -368,6 +368,7 @@
| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<max_by(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.Median | median | SELECT median(col) FROM VALUES (0), (10) AS tab(col) | struct<median(col):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, 0.3, 1):double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 5fba8616641..187a843e509 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -7,9 +7,6 @@
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b);
-CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
-(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
-AS aggr(k, v);
-- Aggregate with empty GroupBy expressions.
SELECT a, COUNT(b) FROM testData;
@@ -247,29 +244,3 @@ SELECT
FROM VALUES
(1,4),(2,3),(1,4),(2,4) AS v(a,b)
GROUP BY a;
-
--- SPARK-37676: Support ANSI Aggregation Function: percentile_cont
-SELECT
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr;
-SELECT
- k,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k;
-
--- SPARK-37691: Support ANSI Aggregation Function: percentile_disc
-SELECT
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr;
-SELECT
- k,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
new file mode 100644
index 00000000000..db584df0d8a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
@@ -0,0 +1,212 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
+(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
+AS aggr(k, v);
+
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary);
+
+SELECT
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr;
+
+SELECT
+ k,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr;
+
+SELECT
+ k,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+ median(v),
+ percentile(v, 0.5),
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+ k,
+ median(v),
+ percentile(v, 0.5),
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w,
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+ percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WHERE salary > 8900
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
+
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 66c6c7ba172..8f8963c5558 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -465,115 +465,3 @@ SELECT
SUM(salary) OVER w sum_salary
FROM
basic_pays;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
-FROM basic_pays
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
- percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WHERE salary > 8900
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary;
-
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 5c87f754dbf..50f933dfc44 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 88
+-- Number of queries: 83
-- !query
@@ -12,16 +12,6 @@ struct<>
--- !query
-CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
-(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
-AS aggr(k, v)
--- !query schema
-struct<>
--- !query output
-
-
-
-- !query
SELECT a, COUNT(b) FROM testData
-- !query schema
@@ -858,61 +848,3 @@ struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>>
-- !query output
1 [4,4] [4,4]
2 [3,4] [3,4]
-
-
--- !query
-SELECT
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
--- !query schema
-struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-10.0 30.0
-
-
--- !query
-SELECT
- k,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k
--- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-0 10.0 30.0
-1 12.5 17.5
-2 17.5 26.25
-3 60.0 60.0
-4 NULL NULL
-
-
--- !query
-SELECT
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
--- !query schema
-struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-10.0 30.0
-
-
--- !query
-SELECT
- k,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k
--- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-0 10.0 30.0
-1 10.0 20.0
-2 10.0 30.0
-3 60.0 60.0
-4 NULL NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
new file mode 100644
index 00000000000..65acc59b9cb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -0,0 +1,417 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 23
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
+(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
+AS aggr(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+-- !query schema
+struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
+-- !query output
+10.0 30.0
+
+
+-- !query
+SELECT
+ k,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
+-- !query output
+0 10.0 30.0
+1 12.5 17.5
+2 17.5 26.25
+3 60.0 60.0
+4 NULL NULL
+
+
+-- !query
+SELECT
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+-- !query schema
+struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
+-- !query output
+10.0 30.0
+
+
+-- !query
+SELECT
+ k,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
+-- !query output
+0 10.0 30.0
+1 10.0 20.0
+2 10.0 30.0
+3 60.0 60.0
+4 NULL NULL
+
+
+-- !query
+SELECT
+ median(v),
+ percentile(v, 0.5),
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+-- !query output
+20.0 20.0 20.0
+
+
+-- !query
+SELECT
+ k,
+ median(v),
+ percentile(v, 0.5),
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+-- !query output
+0 20.0 20.0 20.0
+1 15.0 15.0 15.0
+2 22.5 22.5 22.5
+3 60.0 60.0 60.0
+4 NULL NULL NULL
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+-- !query output
+Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
+Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
+Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0
+Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0
+Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0
+Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0
+William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0
+Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0
+Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0
+Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0
+Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0
+Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0
+George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0
+Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0
+Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0
+Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0
+Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_cont'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_disc'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'median'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_cont'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_disc'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'median'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
+-- !query output
+Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
+Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
+Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0
+Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0
+Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0
+Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0
+William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0
+Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0
+Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0
+Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0
+Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0
+Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0
+George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0
+Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0
+Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0
+Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0
+Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w,
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+ percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WHERE salary > 8900
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER [...]
+-- !query output
+Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 9998.0
+Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 9441.0
+Steve Patterson Sales 9441 9441.0 9441.0 9441.0 9441.0 9441.0
+Mary Patterson Accounting 9998 9998.0 9998.0 9998.0 9998.0 9998.0
+Loui Bondur SCM 10449 10944.5 10944.5 10586.0 10944.5 11303.0
+George Vanauf Sales 10563 9441.0 9441.0 9441.0 9441.0 9441.0
+Barry Jones SCM 10586 10944.5 10944.5 10586.0 10944.5 11303.0
+Pamela Castillo SCM 11303 10944.5 10944.5 10586.0 10944.5 11303.0
+Gerard Bondur Accounting 11472 9998.0 9998.0 9998.0 9998.0 9998.0
+Larry Bott SCM 11798 10944.5 10944.5 10586.0 10944.5 11303.0
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_cont'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_disc'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'median'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_cont'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+ percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'percentile_disc'.
+
+
+-- !query
+SELECT
+ employee_name,
+ department,
+ salary,
+ median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot specify order by or frame for 'median'.
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 955ee0c5e6f..5d1f7a44f0d 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 66
+-- Number of queries: 55
-- !query
@@ -1223,230 +1223,3 @@ struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Window specification w is not defined in the WINDOW clause.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
--- !query output
-Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
-Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
-Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0
-Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0
-Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0
-Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0
-William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0
-Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0
-Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0
-Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0
-Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0
-Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0
-George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0
-Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0
-Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0
-Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0
-Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...]
--- !query output
-Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0
-Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0
-Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0
-Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0
-Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0
-Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0
-William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0
-Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0
-Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0
-Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0
-Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0
-Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0
-George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0
-Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0
-Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0
-Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0
-Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
- percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WHERE salary > 8900
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,perc [...]
--- !query output
-Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0
-Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0
-Steve Patterson Sales 9441 9441.0 9441.0 9441.0 9441.0
-Mary Patterson Accounting 9998 9998.0 9998.0 9998.0 9998.0
-Loui Bondur SCM 10449 10944.5 10586.0 10944.5 11303.0
-George Vanauf Sales 10563 9441.0 9441.0 9441.0 9441.0
-Barry Jones SCM 10586 10944.5 10586.0 10944.5 11303.0
-Pamela Castillo SCM 11303 10944.5 10586.0 10944.5 11303.0
-Gerard Bondur Accounting 11472 9998.0 9998.0 9998.0 9998.0
-Larry Bott SCM 11798 10944.5 10586.0 10944.5 11303.0
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
- employee_name,
- department,
- salary,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
- percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org