You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/06/22 17:25:38 UTC
[pinot] branch master updated: [multistage] Add support for RANK and DENSE_RANK ranking window functions (#10700)
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 3a326b0656 [multistage] Add support for RANK and DENSE_RANK ranking window functions (#10700)
3a326b0656 is described below
commit 3a326b06569a6b88362d27eb61995c38ac5237c1
Author: Sonam Mandal <so...@linkedin.com>
AuthorDate: Thu Jun 22 10:25:33 2023 -0700
[multistage] Add support for RANK and DENSE_RANK ranking window functions (#10700)
* Add support for RANK and DENSE_RANK window functions
* Empty-Commit
* Fix tests after rebase
* Empty-Commit
---
.../rules/PinotWindowExchangeNodeInsertRule.java | 7 +-
.../pinot/query/planner/plannode/WindowNode.java | 6 +-
.../pinot/query/QueryEnvironmentTestBase.java | 5 +
.../resources/queries/WindowFunctionPlans.json | 874 ++++++++++-
.../runtime/operator/WindowAggregateOperator.java | 64 +-
.../operator/WindowAggregateOperatorTest.java | 71 +-
.../test/resources/queries/WindowFunctions.json | 1574 +++++++++++++++++++-
7 files changed, 2574 insertions(+), 27 deletions(-)
diff --git a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotWindowExchangeNodeInsertRule.java b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotWindowExchangeNodeInsertRule.java
index da9a8d98ed..b3f1176705 100644
--- a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotWindowExchangeNodeInsertRule.java
+++ b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotWindowExchangeNodeInsertRule.java
@@ -52,7 +52,9 @@ import org.apache.calcite.tools.RelBuilderFactory;
* Special rule for Pinot, this rule is fixed to always insert an exchange or sort exchange below the WINDOW node.
* TODO:
* 1. Add support for more than one window group
- * 2. Add support for functions other than aggregation functions (AVG, COUNT, MAX, MIN, SUM, BOOL_AND, BOOL_OR)
+ * 2. Add support for functions other than:
+ * a. Aggregation functions (AVG, COUNT, MAX, MIN, SUM, BOOL_AND, BOOL_OR)
+ * b. Ranking functions (ROW_NUMBER, RANK, DENSE_RANK)
* 3. Add support for custom frames
*/
public class PinotWindowExchangeNodeInsertRule extends RelOptRule {
@@ -62,7 +64,8 @@ public class PinotWindowExchangeNodeInsertRule extends RelOptRule {
// Supported window functions
// OTHER_FUNCTION supported are: BOOL_AND, BOOL_OR
private static final Set<SqlKind> SUPPORTED_WINDOW_FUNCTION_KIND = ImmutableSet.of(SqlKind.SUM, SqlKind.SUM0,
- SqlKind.MIN, SqlKind.MAX, SqlKind.COUNT, SqlKind.ROW_NUMBER, SqlKind.OTHER_FUNCTION);
+ SqlKind.MIN, SqlKind.MAX, SqlKind.COUNT, SqlKind.ROW_NUMBER, SqlKind.RANK, SqlKind.DENSE_RANK,
+ SqlKind.OTHER_FUNCTION);
public PinotWindowExchangeNodeInsertRule(RelBuilderFactory factory) {
super(operand(LogicalWindow.class, any()), factory, null);
diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/WindowNode.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/WindowNode.java
index 810859ae22..d51da18d8d 100644
--- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/WindowNode.java
+++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/WindowNode.java
@@ -53,11 +53,11 @@ public class WindowNode extends AbstractPlanNode {
/**
* Enum to denote the type of window frame
- * ROW - ROW type window frame
+ * ROWS - ROWS type window frame
* RANGE - RANGE type window frame
*/
public enum WindowFrameType {
- ROW,
+ ROWS,
RANGE
}
@@ -95,7 +95,7 @@ public class WindowNode extends AbstractPlanNode {
_lowerBound = Integer.MIN_VALUE;
// Upper bound can only be unbounded following or current row for now
_upperBound = windowGroup.upperBound.isUnbounded() ? Integer.MAX_VALUE : 0;
- _windowFrameType = windowGroup.isRows ? WindowFrameType.ROW : WindowFrameType.RANGE;
+ _windowFrameType = windowGroup.isRows ? WindowFrameType.ROWS : WindowFrameType.RANGE;
// TODO: Constants are used to store constants needed such as the frame literals. For now just save this, need to
// extract the constant values into bounds as a part of frame support.
diff --git a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
index c5b328e3f2..1054aa25ff 100644
--- a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
+++ b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
@@ -117,6 +117,11 @@ public class QueryEnvironmentTestBase {
new Object[]{"SELECT a.col1, SUM(a.col3) OVER (ORDER BY a.col2, a.col1), MIN(a.col3) OVER (ORDER BY a.col2, "
+ "a.col1) FROM a"},
new Object[]{"SELECT a.col1, ROW_NUMBER() OVER(PARTITION BY a.col2 ORDER BY a.col3) FROM a"},
+ new Object[]{"SELECT RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a"},
+ new Object[]{"SELECT col1, total, rank FROM (SELECT a.col1 as col1, count(*) as total, "
+ + "RANK() OVER(ORDER BY count(*) DESC) AS rank FROM a GROUP BY a.col1) WHERE rank < 5"},
+ new Object[]{"SELECT RANK() OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a"},
+ new Object[]{"SELECT DENSE_RANK() OVER(ORDER BY a.col1) FROM a"},
new Object[]{"SELECT a.col1, SUM(a.col3) OVER (ORDER BY a.col2), MIN(a.col3) OVER (ORDER BY a.col2) FROM a"},
new Object[]{"SELECT /*+ skipLeafStageGroupByAggregation */ a.col1, SUM(a.col3) FROM a WHERE a.col3 >= 0"
+ " AND a.col2 = 'a' GROUP BY a.col1"},
diff --git a/pinot-query-planner/src/test/resources/queries/WindowFunctionPlans.json b/pinot-query-planner/src/test/resources/queries/WindowFunctionPlans.json
index 2f75b4b2ce..c342447cd6 100644
--- a/pinot-query-planner/src/test/resources/queries/WindowFunctionPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/WindowFunctionPlans.json
@@ -1238,6 +1238,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) only rank",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) only with select alias",
"sql": "EXPLAIN PLAN FOR SELECT SUM(a.col3) OVER(ORDER BY a.col2) AS sum FROM a",
@@ -1265,6 +1278,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) and select col dense_rank",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(ORDER BY a.col2 DESC) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1 DESC] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) row_number and select col",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, ROW_NUMBER() OVER(ORDER BY a.col2) FROM a",
@@ -1294,6 +1320,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) rank and select col with global order by on different column as inside over",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(ORDER BY a.col2) FROM a ORDER BY a.col1",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$0], dir0=[ASC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$0], dir0=[ASC])",
+ "\n LogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) row_number and select col with global order by on same column as inside over",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, ROW_NUMBER() OVER(ORDER BY a.col2) FROM a ORDER BY a.col2",
@@ -1310,6 +1352,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) dense_rank and select col with global order by on same column as inside over",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(ORDER BY a.col2) FROM a ORDER BY a.col2",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$2], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) row_number and select col with order by on row number",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, ROW_NUMBER() OVER(ORDER BY a.col2) as row_number FROM a ORDER BY row_number DESC",
@@ -1326,6 +1384,38 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) RANK and select col with order by on row number",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(ORDER BY a.col2) as rank FROM a ORDER BY rank DESC",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$1], dir0=[DESC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$1], dir0=[DESC])",
+ "\n LogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
+ {
+ "description": "single OVER(ORDER BY) dense_rank and select col with order by on row number",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(ORDER BY a.col2) as dense_rank FROM a ORDER BY dense_rank DESC",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$1], dir0=[DESC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$1], dir0=[DESC])",
+ "\n LogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) and select col with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(ORDER BY a.col2) AS avg FROM a",
@@ -1353,6 +1443,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) rank and select col with select alias",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, RANK() OVER(ORDER BY a.col2) AS rank FROM a",
+ "notes": "TODO: Look into why aliases are getting ignored in the final plan",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) with default frame",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(ORDER BY a.col1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM a",
@@ -1414,6 +1518,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) rank and select col with LIMIT",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(ORDER BY a.col2) FROM a LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[]], isSortOnSender=[false], isSortOnReceiver=[false])",
+ "\n LogicalSort(fetch=[10])",
+ "\n LogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) and select col with global order by with LIMIT",
"sql": "EXPLAIN PLAN FOR SELECT a.col2, MIN(a.col3) OVER(ORDER BY a.col1 DESC) FROM a ORDER BY a.col1 LIMIT 10",
@@ -1430,6 +1550,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) dense_rank and select col with global order by with LIMIT",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, DENSE_RANK() OVER(ORDER BY a.col1 DESC) FROM a ORDER BY a.col3 LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC], fetch=[10])",
+ "\n LogicalProject(col2=[$1], EXPR$1=[$3], col3=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [0 DESC] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) and transform col",
"sql": "EXPLAIN PLAN FOR SELECT SUBSTR(a.col1, 0, 2), COUNT(a.col2) OVER(ORDER BY a.col3) FROM a",
@@ -1443,6 +1579,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) rank and transform col",
+ "sql": "EXPLAIN PLAN FOR SELECT SUBSTR(a.col1, 0, 2), RANK() OVER(ORDER BY a.col3) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col3=[$2], $1=[SUBSTR($0, 0, 2)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) select col and filter",
"sql": "EXPLAIN PLAN FOR SELECT a.col2, AVG(a.col3) OVER(ORDER BY a.col2) FROM a WHERE a.col3 > 10 AND a.col3 <= 500",
@@ -1457,6 +1606,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) dense_rank select col and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, DENSE_RANK() OVER(ORDER BY a.col2) FROM a WHERE a.col3 > 10 AND a.col3 <= 500",
+ "output": [
+ "Execution Plan",
+ "\nLogicalWindow(window#0=[window(order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1])",
+ "\n LogicalFilter(condition=[AND(>($2, 10), <=($2, 500))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) with select transform and filter",
"sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), AVG(a.col3) OVER(ORDER BY a.col2) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
@@ -1485,6 +1647,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) rank with select transform and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), RANK() OVER(ORDER BY a.col2) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1], $1=[CONCAT($0, '-', $1)])",
+ "\n LogicalFilter(condition=[OR(AND(<>($0, 'bar'), <>($0, 'foo')), >=($2, 42))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(ORDER BY) with transform on order by key",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
@@ -1511,6 +1687,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(ORDER BY) dense_rank with transform on order by key",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject($0=[CONCAT($0, '-', $1)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key only",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(ORDER BY a.col1), COUNT(a.col2) OVER(ORDER BY a.col1) FROM a",
@@ -1524,6 +1713,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key only - rank functions",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(ORDER BY a.col1), DENSE_RANK() OVER(ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key only with select alias",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(ORDER BY a.col1) AS max, COUNT(a.col2) OVER(ORDER BY a.col1) AS count FROM a",
@@ -1551,6 +1753,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key and select col - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col3, DENSE_RANK() OVER(ORDER BY a.col1), RANK() OVER(ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col3=[$1], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key and select col with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(ORDER BY a.col1) AS avg, MIN(a.col3) OVER(ORDER BY a.col1) AS min FROM a",
@@ -1564,6 +1779,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key and select col with select alias, one ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, DENSE_RANK() OVER(ORDER BY a.col1) AS dense_rank, MIN(a.col3) OVER(ORDER BY a.col1) AS min FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [DENSE_RANK(), MIN($1)])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key with default frame for one and not the other",
"sql": "EXPLAIN PLAN FOR SELECT COUNT(a.col3) OVER(ORDER BY a.col2 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), MIN(a.col3) OVER(ORDER BY a.col2) FROM a",
@@ -1577,6 +1805,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key with default frame for one and not the other - one ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT COUNT(a.col3) OVER(ORDER BY a.col2 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), RANK() OVER(ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [COUNT($1), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key and select col with global order by",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, SUM(a.col3) OVER(ORDER BY a.col2, a.col1 DESC), AVG(a.col3) OVER(ORDER BY a.col2, a.col1 DESC) FROM a ORDER BY a.col1 DESC",
@@ -1593,6 +1834,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key and select col with global order by ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(ORDER BY a.col2, a.col1 DESC), AVG(a.col3) OVER(ORDER BY a.col2, a.col1 DESC) FROM a ORDER BY a.col1 DESC",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$0], dir0=[DESC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$0], dir0=[DESC])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$3], EXPR$2=[/(CAST($4):DOUBLE NOT NULL, $5)])",
+ "\n LogicalWindow(window#0=[window(order by [1, 0 DESC] aggs [RANK(), SUM($2), COUNT($2)])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1, 0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key and select col with LIMIT",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(ORDER BY a.col1), MIN(a.col3) OVER(ORDER BY a.col1) FROM a LIMIT 10",
@@ -1625,6 +1882,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s on the same key and select col with global order by with LIMIT - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(ORDER BY a.col2, a.col1 DESC), RANK() OVER(ORDER BY a.col2, a.col1 DESC) FROM a ORDER BY a.col2, a.col1 DESC LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[3, 0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC], fetch=[10])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$2], EXPR$2=[$3], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(order by [1, 0 DESC] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1, 0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key and transform col",
"sql": "EXPLAIN PLAN FOR SELECT REVERSE(a.col1), SUM(a.col3) OVER(ORDER BY a.col2), MAX(a.col3) OVER(ORDER BY a.col2) FROM a",
@@ -1652,6 +1925,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY)s ranking functinos on the same key and transform col",
+ "sql": "EXPLAIN PLAN FOR SELECT REVERSE(a.col1), DENSE_RANK() OVER(ORDER BY a.col2), RANK() OVER(ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1], $1=[REVERSE($0)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(ORDER BY)s on the same key select col and filter",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(ORDER BY a.col1), COUNT(a.col1) OVER(ORDER BY a.col1) FROM a WHERE a.col3 > 42 AND a.col1 IN ('vader', 'chewbacca', 'yoda')",
@@ -1681,7 +1967,21 @@
]
},
{
- "description": "multiple OVER(ORDER BY) with transform on partition key",
+ "description": "multiple OVER(ORDER BY)s on the same key with select transform and filter - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), RANK() OVER(ORDER BY a.col1), DENSE_RANK() OVER(ORDER BY a.col1) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], $1=[REVERSE(CONCAT($0, ' ', $1))])",
+ "\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'baz'), <>($1, 'foo'))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
+ {
+ "description": "multiple OVER(ORDER BY) with transform on order by key",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))), COUNT(a.col1) OVER(ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))) FROM a",
"output": [
"Execution Plan",
@@ -1693,6 +1993,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(ORDER BY) with transform on order by key - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))), COUNT(a.col1) OVER(ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(order by [1] aggs [DENSE_RANK(), COUNT($0)])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], $1=[REVERSE(CONCAT($0, '-', $1))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) only",
"sql": "EXPLAIN PLAN FOR SELECT SUM(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a",
@@ -1706,6 +2019,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) only",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) only with alias",
"sql": "EXPLAIN PLAN FOR SELECT SUM(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) AS sum FROM a",
@@ -1760,6 +2086,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) dense_rank and select col",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [1] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[1]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) and select col with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) AS avg FROM a",
@@ -1773,6 +2112,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) and select col with select alias - ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) AS rank FROM a",
+ "notes": "TODO: Look into why aliases are getting ignored in the final plan",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [1] aggs [RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[1]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) with default frame",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM a",
@@ -1802,6 +2155,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) dense_rank and select col with global order by",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a ORDER BY a.col1",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC])",
+ "\n LogicalProject(col2=[$1], EXPR$1=[$2], col1=[$0])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) and select col with LIMIT",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a LIMIT 10",
@@ -1850,6 +2219,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) rank and select col with global order by with LIMIT",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a ORDER BY a.col1 LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC], fetch=[10])",
+ "\n LogicalProject(col2=[$1], EXPR$1=[$2], col1=[$0])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) and transform col",
"sql": "EXPLAIN PLAN FOR SELECT SUBSTR(a.col1, 0, 2), COUNT(a.col2) OVER(PARTITION BY a.col3 ORDER BY a.col3) FROM a",
@@ -1890,6 +2275,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) dense_rank select col and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a WHERE a.col3 > 10 AND a.col3 <= 500",
+ "output": [
+ "Execution Plan",
+ "\nLogicalWindow(window#0=[window(partition {0} order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col2=[$1])",
+ "\n LogicalFilter(condition=[AND(>($2, 10), <=($2, 500))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) with select transform and filter",
"sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
@@ -1904,6 +2302,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) rank with select transform and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col2=[$1], $1=[CONCAT($0, '-', $1)])",
+ "\n LogicalFilter(condition=[OR(AND(<>($0, 'bar'), <>($0, 'foo')), >=($2, 42))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) with transform on partition key",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(PARTITION BY CONCAT(a.col1, '-', a.col2) ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
@@ -1917,6 +2329,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k1) with transform on partition key",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(PARTITION BY CONCAT(a.col1, '-', a.col2) ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject($0=[CONCAT($0, '-', $1)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k1) but order by has different direction and select col",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2 DESC) FROM a",
@@ -1982,6 +2407,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key only - ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1), COUNT(a.col2) OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [DENSE_RANK(), COUNT($1)])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key only with select alias",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) AS max, COUNT(a.col2) OVER(PARTITION BY a.col1 ORDER BY a.col1) AS count FROM a",
@@ -2009,6 +2447,18 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col - ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) AS avg, MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) AS min FROM a",
@@ -2035,6 +2485,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key with default frame for one and not the other ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT COUNT(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [COUNT($1), DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col with global order by",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, SUM(a.col3) OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1), AVG(a.col3) OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1) FROM a ORDER BY a.col2, a.col1",
@@ -2051,6 +2514,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col with global order by ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1), RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1) FROM a ORDER BY a.col2, a.col1",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[ASC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[3, 0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[ASC])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$2], EXPR$2=[$3], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0, 1} order by [1, 0] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0, 1]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col with LIMIT",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a LIMIT 10",
@@ -2083,6 +2562,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and select col with global order by with LIMIT",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1), DENSE_RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col2, a.col1) FROM a ORDER BY a.col2, a.col1 LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[ASC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[3, 0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[10])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$2], EXPR$2=[$3], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0, 1} order by [1, 0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0, 1]])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key and transform col",
"sql": "EXPLAIN PLAN FOR SELECT REVERSE(a.col1), SUM(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2), MAX(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col2) FROM a",
@@ -2104,19 +2599,46 @@
"\nLogicalProject(col1=[$0], EXPR$1=[/(CAST($2):DOUBLE NOT NULL, $3)], EXPR$2=[$4])",
"\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [SUM($1), COUNT($1), COUNT($0)])])",
"\n PinotLogicalExchange(distribution=[hash[0]])",
- "\n LogicalProject(col1=[$0], col3=[$2])",
- "\n LogicalFilter(condition=[AND(>($2, 42), OR(=($0, 'chewbacca':VARCHAR(9)), =($0, 'vader':VARCHAR(9)), =($0, 'yoda':VARCHAR(9))))])",
+ "\n LogicalProject(col1=[$0], col3=[$2])",
+ "\n LogicalFilter(condition=[AND(>($2, 42), OR(=($0, 'chewbacca':VARCHAR(9)), =($0, 'vader':VARCHAR(9)), =($0, 'yoda':VARCHAR(9))))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key select col and filter ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a WHERE a.col3 > 42 AND a.col1 IN ('vader', 'chewbacca', 'yoda')",
+ "output": [
+ "Execution Plan",
+ "\nLogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0])",
+ "\n LogicalFilter(condition=[AND(>($2, 42), OR(=($0, 'chewbacca':VARCHAR(9)), =($0, 'vader':VARCHAR(9)), =($0, 'yoda':VARCHAR(9))))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key with select transform and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1), MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3], $2=[$4])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [MIN($1), MAX($1)])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0], col3=[$2], $2=[REVERSE(CONCAT($0, ' ', $1))])",
+ "\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'baz'), <>($1, 'foo'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
]
},
{
- "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key with select transform and filter",
- "sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1), MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key with select transform and filter ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col1), MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
"output": [
"Execution Plan",
"\nLogicalProject($0=[$2], $1=[$3], $2=[$4])",
- "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [MIN($1), MAX($1)])])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [DENSE_RANK(), MAX($1)])])",
"\n PinotLogicalExchange(distribution=[hash[0]])",
"\n LogicalProject(col1=[$0], col3=[$2], $2=[REVERSE(CONCAT($0, ' ', $1))])",
"\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'baz'), <>($1, 'foo'))])",
@@ -2151,6 +2673,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k1) ranking functions with transform on partition key",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))), DENSE_RANK() OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY REVERSE(CONCAT(a.col1, '-', a.col2))) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject($0=[REVERSE(CONCAT($0, '-', $1))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k1)s on the same key but order by has different direction and select col",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1 DESC), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col1 DESC) FROM a",
@@ -2203,6 +2738,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) ranking function only",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [0] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[1]], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) only with select alias",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1) AS avg FROM a",
@@ -2255,6 +2803,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) dense_rank and select col",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[1]], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) and select col with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1) AS avg FROM a",
@@ -2313,6 +2874,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) rank and select col with global order by",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a ORDER BY a.col1",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC])",
+ "\n LogicalProject(col2=[$1], EXPR$1=[$2], col1=[$0])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) and select col with LIMIT",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a LIMIT 10",
@@ -2345,6 +2922,22 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) dense_rank and select col with global order by with LIMIT",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a ORDER BY a.col1 LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$2], dir0=[ASC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$2], dir0=[ASC], fetch=[10])",
+ "\n LogicalProject(col2=[$1], EXPR$1=[$2], col1=[$0])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) and transform col",
"sql": "EXPLAIN PLAN FOR SELECT SUBSTR(a.col1, 0, 2), COUNT(a.col2) OVER(PARTITION BY a.col3 ORDER BY a.col1) FROM a",
@@ -2372,6 +2965,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) rank select col and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col2, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a WHERE a.col3 > 10 AND a.col3 <= 500",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col2=[$1], $1=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalFilter(condition=[AND(>($2, 10), <=($2, 500))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) with select transform and filter",
"sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), AVG(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
@@ -2400,6 +3007,20 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) rank with select transform and filter",
+ "sql": "EXPLAIN PLAN FOR SELECT CONCAT(a.col1, '-', a.col2), RANK() OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a where a.col1 NOT IN ('foo', 'bar') OR a.col3 >= 42",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [0] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[1]], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], $2=[CONCAT($0, '-', $1)])",
+ "\n LogicalFilter(condition=[OR(AND(<>($0, 'bar'), <>($0, 'foo')), >=($2, 42))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "single OVER(PARTITION BY k1 ORDER BY k2) with transform on partition key and order key",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(PARTITION BY CONCAT(a.col1, '-', a.col2) ORDER BY REVERSE(a.col2)) FROM a",
@@ -2426,6 +3047,19 @@
"\n"
]
},
+ {
+ "description": "single OVER(PARTITION BY k1 ORDER BY k2) dense_rank with transform on partition key and order key",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(PARTITION BY CONCAT(a.col1, '-', a.col2) ORDER BY REVERSE(a.col2)) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [0] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[1]], collation=[[0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject($0=[REVERSE($1)], $1=[CONCAT($0, '-', $1)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key only (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col3), COUNT(a.col2) OVER(PARTITION BY a.col1 ORDER BY a.col3) FROM a",
@@ -2439,6 +3073,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key only (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col3), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col3) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [MAX($1), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key only (single window group) with select alias",
"sql": "EXPLAIN PLAN FOR SELECT MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col3) AS max, COUNT(a.col2) OVER(PARTITION BY a.col1 ORDER BY a.col3) AS count FROM a",
@@ -2466,6 +3113,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2), RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col (single window group) with select alias",
"sql": "EXPLAIN PLAN FOR SELECT a.col1 AS value1, AVG(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2) AS avg, MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2) AS min FROM a",
@@ -2492,6 +3152,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key with default frame for one and not the other (single window group) - ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT COUNT(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$3], $1=[$4])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [COUNT($2), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col with global order by (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, SUM(a.col3) OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1), AVG(a.col3) OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1) FROM a ORDER BY a.col2, a.col1 DESC",
@@ -2508,6 +3181,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col with global order by (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1), DENSE_RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1) FROM a ORDER BY a.col2, a.col1 DESC",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC], offset=[0])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[3, 0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$3], EXPR$2=[$4], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0, 1} order by [2, 0] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0, 1]], collation=[[2, 0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col with LIMIT (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a LIMIT 10",
@@ -2540,6 +3229,22 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and select col with global order by with LIMIT (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, DENSE_RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1), RANK() OVER(PARTITION BY a.col2, a.col1 ORDER BY a.col3, a.col1) FROM a ORDER BY a.col2, a.col1 DESC LIMIT 10",
+ "output": [
+ "Execution Plan",
+ "\nLogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC], offset=[0], fetch=[10])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[3, 0 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalSort(sort0=[$3], sort1=[$0], dir0=[ASC], dir1=[DESC], fetch=[10])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$3], EXPR$2=[$4], col2=[$1])",
+ "\n LogicalWindow(window#0=[window(partition {0, 1} order by [2, 0] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0, 1]], collation=[[2, 0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key and transform col (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT REVERSE(a.col1), SUM(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1), MAX(a.col3) OVER(PARTITION BY a.col2 ORDER BY a.col1) FROM a",
@@ -2581,6 +3286,20 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key select col and filter (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a WHERE a.col3 > 42 AND a.col1 IN ('vader', 'chewbacca', 'yoda')",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$2], $2=[$3])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1])",
+ "\n LogicalFilter(condition=[AND(>($2, 42), OR(=($0, 'chewbacca':VARCHAR(9)), =($0, 'vader':VARCHAR(9)), =($0, 'yoda':VARCHAR(9))))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key with select transform and filter (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), MIN(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2), MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
@@ -2595,6 +3314,20 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2)s on the same key with select transform and filter (single window group) - ranking function",
+ "sql": "EXPLAIN PLAN FOR SELECT REVERSE(CONCAT(a.col1, ' ', a.col2)), DENSE_RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2), RANK() OVER(PARTITION BY a.col1 ORDER BY a.col2) FROM a where a.col2 NOT IN ('foo', 'bar', 'baz')",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject($0=[$2], $1=[$3], $2=[$4])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], $2=[REVERSE(CONCAT($0, ' ', $1))])",
+ "\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'baz'), <>($1, 'foo'))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "multiple OVER(PARTITION BY k1 ORDER BY k2) with transform on partition key (single window group)",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY CONCAT(a.col1, '-', a.col2)), COUNT(a.col1) OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
@@ -2608,6 +3341,19 @@
"\n"
]
},
+ {
+ "description": "multiple OVER(PARTITION BY k1 ORDER BY k2) with transform on partition key (single window group) - ranking functions",
+ "sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY CONCAT(a.col1, '-', a.col2)), RANK() OVER(PARTITION BY REVERSE(CONCAT(a.col1, '-', a.col2)) ORDER BY CONCAT(a.col1, '-', a.col2)) FROM a",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(EXPR$0=[/(CAST($3):DOUBLE NOT NULL, $4)], EXPR$1=[$5])",
+ "\n LogicalWindow(window#0=[window(partition {2} order by [1] aggs [SUM($0), COUNT($0), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[2]], collation=[[1]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col3=[$2], $1=[CONCAT($0, '-', $1)], $2=[REVERSE(CONCAT($0, '-', $1))])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "Window function with JOIN example",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, b.col1, SUM(a.col3) OVER (PARTITION BY a.col1) FROM a JOIN b ON a.col1 = b.col2",
@@ -2657,6 +3403,21 @@
"\n"
]
},
+ {
+ "description": "Window function using ranking function with GROUP BY example with aggregation used within ORDER BY clause in OVER",
+ "sql": "EXPLAIN PLAN FOR SELECT a.col1, COUNT(*), DENSE_RANK() OVER(ORDER BY COUNT(*) desc, a.col1 asc), RANK() OVER(ORDER BY COUNT(*) desc, a.col1 asc) from a GROUP BY a.col1, a.col3",
+ "output": [
+ "Execution Plan",
+ "\nLogicalWindow(window#0=[window(order by [1 DESC, 0] aggs [DENSE_RANK(), RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1 DESC, 0]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], EXPR$1=[$2])",
+ "\n LogicalAggregate(group=[{0, 1}], EXPR$1=[$SUM0($2)])",
+ "\n PinotLogicalExchange(distribution=[hash[0, 1]])",
+ "\n LogicalAggregate(group=[{0, 2}], EXPR$1=[COUNT()])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "Window function with GROUP BY example with aggregation used within ORDER BY clause in OVER with PARTITION BY",
"sql": "EXPLAIN PLAN FOR SELECT a.col1, COUNT(*), MAX(a.col3) OVER(PARTITION BY a.col1 ORDER BY COUNT(*) desc, a.col1 asc) from a GROUP BY a.col1, a.col3",
@@ -2686,6 +3447,20 @@
"\n"
]
},
+ {
+ "description": "Window function CTE: rank/dense_rank WITH statement having OVER with PARTITION BY ORDER BY",
+ "sql": "EXPLAIN PLAN FOR WITH windowfunc AS (SELECT a.col1, RANK() OVER(PARTITION BY a.col2 ORDER BY a.col3) as rank, DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col3) as dense_rank from a) SELECT a.col1, a.rank, a.dense_rank FROM windowfunc AS a where a.dense_rank < 5",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(col1=[$0], $1=[$3], $2=[$4])",
+ "\n LogicalFilter(condition=[<($4, 5)])",
+ "\n LogicalWindow(window#0=[window(partition {1} order by [2] aggs [RANK(), DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[1]], collation=[[2]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "Window function subquery: row_number having OVER with PARTITION BY ORDER BY",
"sql": "EXPLAIN PLAN FOR SELECT row_number, col2, col3 FROM (SELECT ROW_NUMBER() OVER(PARTITION BY a.col2 ORDER BY a.col3 DESC) as row_number, a.col2, a.col3 FROM a) WHERE row_number <= 10",
@@ -2700,6 +3475,21 @@
"\n"
]
},
+ {
+ "description": "Window function subquery: rank having OVER with PARTITION BY ORDER BY and a GROUP BY",
+ "sql": "EXPLAIN PLAN FOR SELECT col1, total, rank FROM (SELECT a.col1 as col1, count(*) as total, RANK() OVER(ORDER BY count(*) DESC) AS rank FROM a GROUP BY a.col1) WHERE rank = 1",
+ "output": [
+ "Execution Plan",
+ "\nLogicalFilter(condition=[=($2, 1)])",
+ "\n LogicalWindow(window#0=[window(order by [1 DESC] aggs [RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash], collation=[[1 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalAggregate(group=[{0}], total=[$SUM0($1)])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalAggregate(group=[{0}], total=[COUNT()])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n"
+ ]
+ },
{
"description": "Window function subquery with join using row_number",
"sql": "EXPLAIN PLAN FOR SELECT row_number, col2, col3 FROM (SELECT a.col2 as col2, a.col3 as col3, ROW_NUMBER() OVER(PARTITION BY a.col2 ORDER BY a.col3 DESC) as row_number FROM a INNER JOIN b ON a.col1 = b.col2 WHERE a.col3 > 100 AND b.col1 IN ('douglas adams', 'brandon sanderson')) where row_number = 1",
@@ -2721,15 +3511,43 @@
"\n LogicalTableScan(table=[[b]])",
"\n"
]
+ },
+ {
+ "description": "Window function subquery with join using dense_rank",
+ "sql": "EXPLAIN PLAN FOR SELECT dense_rank, col2, col3 FROM (SELECT a.col2 as col2, a.col3 as col3, DENSE_RANK() OVER(PARTITION BY a.col2 ORDER BY a.col3 DESC) as dense_rank FROM a INNER JOIN b ON a.col1 = b.col2 WHERE a.col3 > 100 AND b.col1 IN ('douglas adams', 'brandon sanderson')) where dense_rank > 15",
+ "output": [
+ "Execution Plan",
+ "\nLogicalProject(dense_rank=[$2], col2=[$0], col3=[$1])",
+ "\n LogicalFilter(condition=[>($2, 15)])",
+ "\n LogicalWindow(window#0=[window(partition {0} order by [1 DESC] aggs [DENSE_RANK()])])",
+ "\n PinotLogicalSortExchange(distribution=[hash[0]], collation=[[1 DESC]], isSortOnSender=[false], isSortOnReceiver=[true])",
+ "\n LogicalProject(col2=[$1], col3=[$2])",
+ "\n LogicalJoin(condition=[=($0, $3)], joinType=[inner])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+ "\n LogicalFilter(condition=[>($2, 100)])",
+ "\n LogicalTableScan(table=[[a]])",
+ "\n PinotLogicalExchange(distribution=[hash[0]])",
+ "\n LogicalProject(col2=[$1])",
+ "\n LogicalFilter(condition=[OR(=($0, 'brandon sanderson':VARCHAR(17)), =($0, 'douglas adams':VARCHAR(17)))])",
+ "\n LogicalTableScan(table=[[b]])",
+ "\n"
+ ]
}
]
},
"exception_throwing_window_function_planning_tests": {
"queries": [
{
- "description": "unsupported window functions such as row_number()",
+ "description": "unsupported window functions such as NTILE()",
+ "notes": "not yet supported",
+ "sql": "EXPLAIN PLAN FOR SELECT NTILE(5) OVER(PARTITION BY a.col1 ORDER BY a.col3) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "unsupported window functions such as LAG()",
"notes": "not yet supported",
- "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(PARTITION BY a.col1 ORDER BY a.col3) FROM a",
+ "sql": "EXPLAIN PLAN FOR SELECT LAG(a.col2) OVER(PARTITION BY a.col1 ORDER BY a.col3) FROM a",
"expectedException": "Error explain query plan for.*"
},
{
@@ -2846,6 +3664,46 @@
"description": "Apache Calcite failures with ROW_NUMBER() window functions - passing argument to ROW_NUMBER() should fail",
"sql": "EXPLAIN PLAN FOR SELECT ROW_NUMBER(a.col3) OVER(PARTITION BY a.col2) FROM a",
"expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with RANK() window functions - needs ORDER BY",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(PARTITION BY a.col2) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with RANK() window functions - needs ORDER BY",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER() FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with DENSE_RANK() window functions - needs ORDER BY",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(PARTITION BY a.col2) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with DENSE_RANK() window functions - needs ORDER BY",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER() FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with DENSE_RANK() window functions - passing argument to RANK() should fail",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK(a.col3) OVER(ORDER BY a.col1) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with DENSE_RANK() window functions - passing argument to DENSE_RANK() should fail",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK(a.col3) OVER(ORDER BY a.col1) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with RANK() window functions - cannot take default frame specification, even though same as actual frame if no frame specified",
+ "sql": "EXPLAIN PLAN FOR SELECT RANK() OVER(ORDER BY a.col1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM a",
+ "expectedException": "Error explain query plan for.*"
+ },
+ {
+ "description": "Apache Calcite failures with DENSE_RANK() window functions - cannot take default frame specification, even though same as actual frame if no frame specified",
+ "sql": "EXPLAIN PLAN FOR SELECT DENSE_RANK() OVER(ORDER BY a.col1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM a",
+ "expectedException": "Error explain query plan for.*"
}
]
}
diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java
index 9791a72d42..0bb1433a00 100644
--- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java
+++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java
@@ -55,7 +55,7 @@ import org.slf4j.LoggerFactory;
*
* The window functions supported today are:
* Aggregation: SUM/COUNT/MIN/MAX/AVG/BOOL_OR/BOOL_AND aggregations [RANGE window type only]
- * Ranking: ROW_NUMBER ranking functions [ROWS window type only]
+ * Ranking: ROW_NUMBER [ROWS window type only], RANK, DENSE_RANK [RANGE window type only] ranking functions
* Value: [none]
*
* Unlike the AggregateOperator which will output one row per group, the WindowAggregateOperator
@@ -82,6 +82,8 @@ public class WindowAggregateOperator extends MultiStageOperator {
// List of window functions which can only be applied as ROWS window frame type
private static final Set<String> ROWS_ONLY_FUNCTION_NAMES = ImmutableSet.of("ROW_NUMBER");
+ // List of ranking window functions whose output depends on the ordering of input rows and not on the actual values
+ private static final Set<String> RANKING_FUNCTION_NAMES = ImmutableSet.of("RANK", "DENSE_RANK");
private final MultiStageOperator _inputOperator;
private final List<RexExpression> _groupSet;
@@ -191,7 +193,7 @@ public class WindowAggregateOperator extends MultiStageOperator {
}
if (ROWS_ONLY_FUNCTION_NAMES.contains(functionName)) {
- Preconditions.checkState(_windowFrame.getWindowFrameType() == WindowNode.WindowFrameType.ROW
+ Preconditions.checkState(_windowFrame.getWindowFrameType() == WindowNode.WindowFrameType.ROWS
&& _windowFrame.isUpperBoundCurrentRow(),
String.format("%s must be of ROW frame type and have CURRENT ROW as the upper bound", functionName));
} else {
@@ -225,12 +227,13 @@ public class WindowAggregateOperator extends MultiStageOperator {
List<Object[]> rows = new ArrayList<>(_numRows);
if (_windowFrame.getWindowFrameType() == WindowNode.WindowFrameType.RANGE) {
// All aggregation window functions only support RANGE type today (SUM/AVG/MIN/MAX/COUNT/BOOL_AND/BOOL_OR)
+ // RANK and DENSE_RANK ranking window functions also only support RANGE type today
for (Map.Entry<Key, List<Object[]>> e : _partitionRows.entrySet()) {
Key partitionKey = e.getKey();
List<Object[]> rowList = e.getValue();
for (Object[] existingRow : rowList) {
Object[] row = new Object[existingRow.length + _aggCalls.size()];
- Key orderKey = _isPartitionByOnly ? emptyOrderKey
+ Key orderKey = (_isPartitionByOnly && CollectionUtils.isEmpty(_orderSetInfo.getOrderSet())) ? emptyOrderKey
: AggregationUtils.extractRowKey(existingRow, _orderSetInfo.getOrderSet());
System.arraycopy(existingRow, 0, row, 0, existingRow.length);
for (int i = 0; i < _windowAccumulators.length; i++) {
@@ -298,7 +301,7 @@ public class WindowAggregateOperator extends MultiStageOperator {
_partitionRows.computeIfAbsent(key, k -> new ArrayList<>()).add(row);
// Only need to accumulate the aggregate function values for RANGE type. ROW type can be calculated as
// we output the rows since the aggregation value depends on the neighboring rows.
- Key orderKey = _isPartitionByOnly ? emptyOrderKey
+ Key orderKey = (_isPartitionByOnly && CollectionUtils.isEmpty(_orderSetInfo.getOrderSet())) ? emptyOrderKey
: AggregationUtils.extractRowKey(row, _orderSetInfo.getOrderSet());
int aggCallsSize = _aggCalls.size();
for (int i = 0; i < aggCallsSize; i++) {
@@ -412,14 +415,46 @@ public class WindowAggregateOperator extends MultiStageOperator {
}
}
+ private static class MergeRank implements AggregationUtils.Merger {
+
+ @Override
+ public Long init(Object other, DataSchema.ColumnDataType dataType) {
+ return 1L;
+ }
+
+ @Override
+ public Long merge(Object left, Object right) {
+ // RANK always increase by the number of duplicate entries seen for the given ORDER BY key.
+ return ((Number) left).longValue() + ((Number) right).longValue();
+ }
+ }
+
+ private static class MergeDenseRank implements AggregationUtils.Merger {
+
+ @Override
+ public Long init(Object other, DataSchema.ColumnDataType dataType) {
+ return 1L;
+ }
+
+ @Override
+ public Long merge(Object left, Object right) {
+ long rightValueInLong = ((Number) right).longValue();
+ // DENSE_RANK always increase the rank by 1, irrespective of the number of duplicate ORDER BY keys seen
+ return (rightValueInLong == 0L) ? ((Number) left).longValue() : ((Number) left).longValue() + 1L;
+ }
+ }
+
private static class WindowAggregateAccumulator extends AggregationUtils.Accumulator {
private static final Map<String, Function<DataSchema.ColumnDataType, AggregationUtils.Merger>> WIN_AGG_MERGERS =
ImmutableMap.<String, Function<DataSchema.ColumnDataType, AggregationUtils.Merger>>builder()
.putAll(AggregationUtils.Accumulator.MERGERS)
.put("ROW_NUMBER", cdt -> new MergeRowNumber())
+ .put("RANK", cdt -> new MergeRank())
+ .put("DENSE_RANK", cdt -> new MergeDenseRank())
.build();
private final boolean _isPartitionByOnly;
+ private final boolean _isRankingWindowFunction;
// Fields needed only for RANGE frame type queries (ORDER BY)
private final Map<Key, OrderKeyResult> _orderByResults = new HashMap<>();
@@ -429,6 +464,7 @@ public class WindowAggregateOperator extends MultiStageOperator {
DataSchema inputSchema, OrderSetInfo orderSetInfo) {
super(aggCall, merger, functionName, inputSchema);
_isPartitionByOnly = CollectionUtils.isEmpty(orderSetInfo.getOrderSet()) || orderSetInfo.isPartitionByOnly();
+ _isRankingWindowFunction = RANKING_FUNCTION_NAMES.contains(functionName);
}
/**
@@ -452,7 +488,8 @@ public class WindowAggregateOperator extends MultiStageOperator {
* RANGE key and not to the row ordering. This should only be called for RANGE type queries.
*/
public void accumulateRangeResults(Key key, Key orderKey, Object[] row) {
- if (_isPartitionByOnly) {
+ // Ranking functions don't use the row value, thus cannot reuse the AggregationUtils accumulate function for them
+ if (_isPartitionByOnly && !_isRankingWindowFunction) {
accumulate(key, row);
return;
}
@@ -464,15 +501,21 @@ public class WindowAggregateOperator extends MultiStageOperator {
: _orderByResults.get(key).getOrderByResults().get(previousOrderKeyIfPresent);
Object value = _inputRef == -1 ? _literal : row[_inputRef];
+ // The ranking functions do not depend on the actual value of the data, but are calculated based on the
+ // position of the data ordered by the ORDER BY key. Thus they need to be handled differently and require setting
+ // whether the rank has changed or not and if changed then by how much.
_orderByResults.putIfAbsent(key, new OrderKeyResult());
if (currentRes == null) {
+ value = _isRankingWindowFunction ? 0 : value;
_orderByResults.get(key).addOrderByResult(orderKey, _merger.init(value, _dataType));
} else {
Object mergedResult;
if (orderKey.equals(previousOrderKeyIfPresent)) {
+ value = _isRankingWindowFunction ? 0 : value;
mergedResult = _merger.merge(currentRes, value);
} else {
Object previousValue = _orderByResults.get(key).getOrderByResults().get(previousOrderKeyIfPresent);
+ value = _isRankingWindowFunction ? _orderByResults.get(key).getCountOfDuplicateOrderByKeys() : value;
mergedResult = _merger.merge(previousValue, value);
}
_orderByResults.get(key).addOrderByResult(orderKey, mergedResult);
@@ -480,7 +523,7 @@ public class WindowAggregateOperator extends MultiStageOperator {
}
public Object getRangeResultForKeys(Key key, Key orderKey) {
- if (_isPartitionByOnly) {
+ if (_isPartitionByOnly && !_isRankingWindowFunction) {
return _results.get(key);
} else {
return _orderByResults.get(key).getOrderByResults().get(orderKey);
@@ -494,16 +537,21 @@ public class WindowAggregateOperator extends MultiStageOperator {
static class OrderKeyResult {
final Map<Key, Object> _orderByResults;
Key _previousOrderByKey;
+ // Store the counts of duplicate ORDER BY keys seen for this PARTITION BY key for calculating RANK/DENSE_RANK
+ long _countOfDuplicateOrderByKeys;
OrderKeyResult() {
_orderByResults = new HashMap<>();
_previousOrderByKey = null;
+ _countOfDuplicateOrderByKeys = 0;
}
public void addOrderByResult(Key orderByKey, Object value) {
// We expect to get the rows in order based on the ORDER BY key so it is safe to blindly assign the
// current key as the previous key
_orderByResults.put(orderByKey, value);
+ _countOfDuplicateOrderByKeys = (_previousOrderByKey != null && _previousOrderByKey.equals(orderByKey))
+ ? _countOfDuplicateOrderByKeys + 1 : 1;
_previousOrderByKey = orderByKey;
}
@@ -514,6 +562,10 @@ public class WindowAggregateOperator extends MultiStageOperator {
public Key getPreviousOrderByKey() {
return _previousOrderByKey;
}
+
+ public long getCountOfDuplicateOrderByKeys() {
+ return _countOfDuplicateOrderByKeys;
+ }
}
}
}
diff --git a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java
index fc54b8941e..8d5eff64da 100644
--- a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java
+++ b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java
@@ -389,12 +389,12 @@ public class WindowAggregateOperatorTest {
}
@Test(expectedExceptions = IllegalStateException.class, expectedExceptionsMessageRegExp = ".*Unexpected aggregation "
- + "function name: RANK.*")
+ + "function name: NTILE.*")
public void testShouldThrowOnUnknownRankAggFunction() {
- // TODO: Remove this test when support is added for RANK functions
+ // TODO: Remove this test when support is added for NTILE function
// Given:
List<RexExpression> calls = ImmutableList.of(
- new RexExpression.FunctionCall(SqlKind.RANK, FieldSpec.DataType.INT, "RANK", ImmutableList.of()));
+ new RexExpression.FunctionCall(SqlKind.RANK, FieldSpec.DataType.INT, "NTILE", ImmutableList.of()));
List<RexExpression> group = ImmutableList.of(new RexExpression.InputRef(0));
DataSchema outSchema = new DataSchema(new String[]{"unknown"}, new DataSchema.ColumnDataType[]{DOUBLE});
DataSchema inSchema = new DataSchema(new String[]{"unknown"}, new DataSchema.ColumnDataType[]{DOUBLE});
@@ -406,6 +406,67 @@ public class WindowAggregateOperatorTest {
WindowNode.WindowFrameType.RANGE, Collections.emptyList(), outSchema, inSchema);
}
+ @Test
+ public void testRankDenseRankRankingFunctions() {
+ // Given:
+ List<RexExpression> calls = ImmutableList.of(
+ new RexExpression.FunctionCall(SqlKind.RANK, FieldSpec.DataType.INT, "RANK", ImmutableList.of()),
+ new RexExpression.FunctionCall(SqlKind.DENSE_RANK, FieldSpec.DataType.INT, "DENSE_RANK", ImmutableList.of()));
+ List<RexExpression> group = ImmutableList.of(new RexExpression.InputRef(0));
+ List<RexExpression> order = ImmutableList.of(new RexExpression.InputRef(1));
+
+ DataSchema inSchema = new DataSchema(new String[]{"group", "arg"}, new DataSchema.ColumnDataType[]{INT, STRING});
+ // Input should be in sorted order on the order by key as SortExchange will handle pre-sorting the data
+ Mockito.when(_input.nextBlock())
+ .thenReturn(OperatorTestUtil.block(inSchema, new Object[]{3, "and"}, new Object[]{2, "bar"},
+ new Object[]{2, "foo"}, new Object[]{1, "foo"}))
+ .thenReturn(OperatorTestUtil.block(inSchema, new Object[]{1, "foo"}, new Object[]{2, "foo"},
+ new Object[]{1, "numb"}, new Object[]{2, "the"}, new Object[]{3, "true"}))
+ .thenReturn(TransferableBlockUtils.getEndOfStreamTransferableBlock());
+
+ DataSchema outSchema = new DataSchema(new String[]{"group", "arg", "rank", "dense_rank"},
+ new DataSchema.ColumnDataType[]{INT, STRING, LONG, LONG});
+
+ // When:
+ WindowAggregateOperator operator =
+ new WindowAggregateOperator(OperatorTestUtil.getDefaultContext(), _input, group, order,
+ Collections.emptyList(), Collections.emptyList(), calls, Integer.MIN_VALUE, 0,
+ WindowNode.WindowFrameType.RANGE, Collections.emptyList(), outSchema, inSchema);
+
+ TransferableBlock result = operator.getNextBlock();
+ while (result.isNoOpBlock()) {
+ result = operator.getNextBlock();
+ }
+ TransferableBlock eosBlock = operator.getNextBlock();
+ List<Object[]> resultRows = result.getContainer();
+ Map<Integer, List<Object[]>> expectedPartitionToRowsMap = new HashMap<>();
+ expectedPartitionToRowsMap.put(1, Arrays.asList(new Object[]{1, "foo", 1L, 1L}, new Object[]{1, "foo", 1L, 1L},
+ new Object[]{1, "numb", 3L, 2L}));
+ expectedPartitionToRowsMap.put(2, Arrays.asList(new Object[]{2, "bar", 1L, 1L}, new Object[]{2, "foo", 2L, 2L},
+ new Object[]{2, "foo", 2L, 2L}, new Object[]{2, "the", 4L, 3L}));
+ expectedPartitionToRowsMap.put(3, Arrays.asList(new Object[]{3, "and", 1L, 1L}, new Object[]{3, "true", 2L, 2L}));
+
+ Integer previousPartitionKey = null;
+ Map<Integer, List<Object[]>> resultsPartitionToRowsMap = new HashMap<>();
+ for (Object[] row : resultRows) {
+ Integer currentPartitionKey = (Integer) row[0];
+ if (!currentPartitionKey.equals(previousPartitionKey)) {
+ Assert.assertFalse(resultsPartitionToRowsMap.containsKey(currentPartitionKey));
+ }
+ resultsPartitionToRowsMap.computeIfAbsent(currentPartitionKey, k -> new ArrayList<>()).add(row);
+ previousPartitionKey = currentPartitionKey;
+ }
+
+ resultsPartitionToRowsMap.forEach((key, value) -> {
+ List<Object[]> expectedRows = expectedPartitionToRowsMap.get(key);
+ Assert.assertEquals(value.size(), expectedRows.size());
+ for (int i = 0; i < value.size(); i++) {
+ Assert.assertEquals(value.get(i), expectedRows.get(i));
+ }
+ });
+ Assert.assertTrue(eosBlock.isEndOfStreamBlock(), "Second block is EOS (done processing)");
+ }
+
@Test
public void testRowNumberRankingFunction() {
// Given:
@@ -430,7 +491,7 @@ public class WindowAggregateOperatorTest {
WindowAggregateOperator operator =
new WindowAggregateOperator(OperatorTestUtil.getDefaultContext(), _input, group, order,
Collections.emptyList(), Collections.emptyList(), calls, Integer.MIN_VALUE, 0,
- WindowNode.WindowFrameType.ROW, Collections.emptyList(), outSchema, inSchema);
+ WindowNode.WindowFrameType.ROWS, Collections.emptyList(), outSchema, inSchema);
TransferableBlock result = operator.getNextBlock();
while (result.isNoOpBlock()) {
@@ -561,7 +622,7 @@ public class WindowAggregateOperatorTest {
WindowAggregateOperator operator =
new WindowAggregateOperator(OperatorTestUtil.getDefaultContext(), _input, group, Collections.emptyList(),
Collections.emptyList(), Collections.emptyList(), calls, Integer.MIN_VALUE, Integer.MAX_VALUE,
- WindowNode.WindowFrameType.ROW, Collections.emptyList(), outSchema, inSchema);
+ WindowNode.WindowFrameType.ROWS, Collections.emptyList(), outSchema, inSchema);
}
@Test
diff --git a/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json b/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json
index afc989fded..dd25411d26 100644
--- a/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json
+++ b/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json
@@ -140,6 +140,29 @@
[768]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank",
+ "sql": "SELECT RANK() OVER(ORDER BY string_col) FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1],
+ [1],
+ [1],
+ [1],
+ [1],
+ [6],
+ [6],
+ [8],
+ [8],
+ [8],
+ [8],
+ [12],
+ [13],
+ [13],
+ [15],
+ [16]
+ ]
+ },
{
"description": "Single empty OVER() sum with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(), int_col FROM {tbl}",
@@ -208,6 +231,52 @@
["h", 16, 150]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank with select columns (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col, int_col), int_col FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 3, 42],
+ ["a", 3, 42],
+ ["a", 3, 42],
+ ["b", 6, 3],
+ ["b", 7, 100],
+ ["c", 8, 2],
+ ["c", 9, 3],
+ ["c", 10, 101],
+ ["c", 11, 150],
+ ["d", 12, 42],
+ ["e", 13, 42],
+ ["e", 13, 42],
+ ["g", 15, 3],
+ ["h", 16, 150]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select columns (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY string_col, int_col), int_col FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["b", 3, 3],
+ ["b", 4, 100],
+ ["c", 5, 2],
+ ["c", 6, 3],
+ ["c", 7, 101],
+ ["c", 8, 150],
+ ["d", 9, 42],
+ ["e", 10, 42],
+ ["e", 10, 42],
+ ["g", 11, 3],
+ ["h", 12, 150]
+ ]
+ },
{
"description": "Single OVER(ORDER BY) sum with one DESC column with select columns (two ORDER BY columns for deterministic output)",
"sql": "SELECT string_col, SUM(int_col) OVER(ORDER BY string_col DESC, int_col), int_col FROM {tbl}",
@@ -277,6 +346,29 @@
["a", 16, 2]
]
},
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with two DESC columns with select columns (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY string_col DESC, int_col DESC), int_col FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["h", 1, 150],
+ ["g", 2, 3],
+ ["e", 3, 42],
+ ["e", 3, 42],
+ ["d", 4, 42],
+ ["c", 5, 150],
+ ["c", 6, 101],
+ ["c", 7, 3],
+ ["c", 8, 2],
+ ["b", 9, 100],
+ ["b", 10, 3],
+ ["a", 11, 42],
+ ["a", 11, 42],
+ ["a", 11, 42],
+ ["a", 12, 2],
+ ["a", 12, 2]
+ ]
+ },
{
"description": "Single OVER(ORDER BY) sum with second DESC column with select columns (two ORDER BY columns for deterministic output)",
"sql": "SELECT string_col, SUM(int_col) OVER(ORDER BY string_col, int_col DESC), int_col FROM {tbl}",
@@ -462,6 +554,145 @@
["h", 16]
]
},
+ {
+ "description": "Single OVER(ORDER BY) row_number with select col with global order by",
+ "sql": "SELECT string_col, ROW_NUMBER() OVER(ORDER BY string_col) as row_number FROM {tbl} ORDER BY row_number",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 2],
+ ["a", 3],
+ ["a", 4],
+ ["a", 5],
+ ["b", 6],
+ ["b", 7],
+ ["c", 8],
+ ["c", 9],
+ ["c", 10],
+ ["c", 11],
+ ["d", 12],
+ ["e", 13],
+ ["e", 14],
+ ["g", 15],
+ ["h", 16]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) rank with select col with global order by",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col, int_col) as rank FROM {tbl} ORDER BY rank, string_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 3],
+ ["a", 3],
+ ["a", 3],
+ ["b", 6],
+ ["b", 7],
+ ["c", 8],
+ ["c", 9],
+ ["c", 10],
+ ["c", 11],
+ ["d", 12],
+ ["e", 13],
+ ["e", 13],
+ ["g", 15],
+ ["h", 16]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) rank with select col with global order by",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY string_col, int_col) as rank FROM {tbl} ORDER BY rank, string_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["b", 1],
+ ["c", 1],
+ ["d", 1],
+ ["e", 1],
+ ["e", 1],
+ ["g", 1],
+ ["h", 1],
+ ["b", 2],
+ ["c", 2],
+ ["a", 3],
+ ["a", 3],
+ ["a", 3],
+ ["c", 3],
+ ["c", 4]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select col with global order by",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY string_col, int_col) as dense_rank FROM {tbl} ORDER BY dense_rank, string_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 2],
+ ["a", 2],
+ ["a", 2],
+ ["b", 3],
+ ["b", 4],
+ ["c", 5],
+ ["c", 6],
+ ["c", 7],
+ ["c", 8],
+ ["d", 9],
+ ["e", 10],
+ ["e", 10],
+ ["g", 11],
+ ["h", 12]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select col with global order by",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY string_col, int_col) as dense_rank FROM {tbl} ORDER BY dense_rank, string_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["b", 1],
+ ["c", 1],
+ ["d", 1],
+ ["e", 1],
+ ["e", 1],
+ ["g", 1],
+ ["h", 1],
+ ["a", 2],
+ ["a", 2],
+ ["a", 2],
+ ["b", 2],
+ ["c", 2],
+ ["c", 3],
+ ["c", 4]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) rank with select col with global order by",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col) FROM {tbl} ORDER BY string_col",
+ "keepOutputRowOrder": false,
+ "comments": "Can't enable keeping the row order as the ordering is only based on the string_col and can change on sorting",
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["b", 6],
+ ["b", 6],
+ ["c", 8],
+ ["c", 8],
+ ["c", 8],
+ ["c", 8],
+ ["d", 12],
+ ["e", 13],
+ ["e", 13],
+ ["g", 15],
+ ["h", 16]
+ ]
+ },
{
"description": "Single empty OVER() count with select col with global order by with LIMIT",
"sql": "SELECT string_col, COUNT(int_col) OVER() FROM {tbl} ORDER BY string_col LIMIT 5",
@@ -557,6 +788,14 @@
["a", 1]
]
},
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select col and filter",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY bool_col) FROM {tbl} WHERE string_col = 'a' AND bool_col = false",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1]
+ ]
+ },
{
"description": "Single empty OVER() with select col and filter which matches no rows",
"sql": "SELECT string_col, COUNT(bool_col) OVER() FROM {tbl} WHERE string_col = 'a' AND bool_col = false AND int_col > 200",
@@ -634,6 +873,19 @@
[400, 6]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank with select col and filter (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT double_col, RANK() OVER(ORDER BY string_col, double_col) FROM {tbl} WHERE string_col IN ('b', 'c')",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1],
+ [100, 2],
+ [1.01, 3],
+ [1.5, 4],
+ [100, 5],
+ [400, 6]
+ ]
+ },
{
"description": "Single empty OVER() with select transform and filter",
"sql": "SELECT CONCAT(string_col, bool_col, '-'), MAX(int_col) OVER() FROM {tbl} where int_col < 50 OR double_col = 1.01",
@@ -693,6 +945,46 @@
["g-true", 13]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank with select transform and filter (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT CONCAT(string_col, bool_col, '-'), RANK() OVER(ORDER BY string_col, bool_col) FROM {tbl} where int_col < 50 OR double_col = 1.01",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a-false", 1],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["b-false", 6],
+ ["c-false", 7],
+ ["c-false", 7],
+ ["c-true", 9],
+ ["d-false", 10],
+ ["e-false", 11],
+ ["e-true", 12],
+ ["g-true", 13]
+ ]
+ },
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select transform and filter (two ORDER BY columns for deterministic output)",
+ "sql": "SELECT CONCAT(string_col, bool_col, '-'), DENSE_RANK() OVER(ORDER BY string_col, bool_col) FROM {tbl} where int_col < 50 OR double_col = 1.01",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a-false", 1],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["b-false", 3],
+ ["c-false", 4],
+ ["c-false", 4],
+ ["c-true", 5],
+ ["d-false", 6],
+ ["e-false", 7],
+ ["e-true", 8],
+ ["g-true", 9]
+ ]
+ },
{
"description": "Single empty OVER() with group by",
"sql": "SELECT MAX({tbl}.int_col) OVER() FROM {tbl} GROUP BY int_col",
@@ -731,6 +1023,19 @@
[6]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank with group by",
+ "sql": "SELECT RANK() OVER(ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1],
+ [2],
+ [3],
+ [4],
+ [5],
+ [6]
+ ]
+ },
{
"description": "Single empty OVER() with select col and group by",
"sql": "SELECT string_col, MIN({tbl}.int_col) OVER() FROM {tbl} GROUP BY string_col, int_col",
@@ -806,6 +1111,19 @@
[300, 6]
]
},
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with agg col and group by",
+ "sql": "SELECT SUM(int_col), DENSE_RANK() OVER(ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [6, 1],
+ [9, 2],
+ [252, 3],
+ [100, 4],
+ [101, 5],
+ [300, 6]
+ ]
+ },
{
"description": "Single empty OVER() with select col, agg col and group by",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER() FROM {tbl} GROUP BY int_col",
@@ -870,6 +1188,19 @@
[150, 300, 6]
]
},
+ {
+ "description": "Single OVER(ORDER BY) rank with select col, agg col and group by with global order by",
+ "sql": "SELECT int_col, SUM(int_col), RANK() OVER(ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY int_col ORDER BY int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [2, 6, 1],
+ [3, 9, 2],
+ [42, 252, 3],
+ [100, 100, 4],
+ [101, 101, 5],
+ [150, 300, 6]
+ ]
+ },
{
"description": "Single empty OVER() with select col, agg col and group by with a filter",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER() FROM {tbl} WHERE int_col < 100 GROUP BY int_col",
@@ -899,6 +1230,16 @@
[42, 252, 3]
]
},
+ {
+ "description": "Single OVER(ORDER BY) dense_rank with select col, agg col and group by with a filter",
+ "sql": "SELECT int_col, SUM(int_col), DENSE_RANK() OVER(ORDER BY {tbl}.int_col) FROM {tbl} WHERE int_col < 100 GROUP BY int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [2, 6, 1],
+ [3, 9, 2],
+ [42, 252, 3]
+ ]
+ },
{
"description": "Single empty OVER() with select col, agg col and group by with a filter that matches no rows",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER() FROM {tbl} WHERE int_col > 200 GROUP BY int_col",
@@ -955,6 +1296,29 @@
[768, 16]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s ranking functinos",
+ "sql": "SELECT RANK() OVER(ORDER BY string_col), DENSE_RANK() OVER(ORDER BY string_col) FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1],
+ [1, 1],
+ [1, 1],
+ [1, 1],
+ [1, 1],
+ [6, 2],
+ [6, 2],
+ [8, 3],
+ [8, 3],
+ [8, 3],
+ [8, 3],
+ [12, 4],
+ [13, 5],
+ [13, 5],
+ [15, 6],
+ [16, 7]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(), int_col, MAX(double_col) OVER() FROM {tbl}",
@@ -1000,6 +1364,29 @@
["h", 768, 150, 400]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select columns (two ORDER BY columns for deterministic output) - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col, int_col), int_col, DENSE_RANK() OVER(ORDER BY string_col, int_col) FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 2, 1],
+ ["a", 1, 2, 1],
+ ["a", 3, 42, 2],
+ ["a", 3, 42, 2],
+ ["a", 3, 42, 2],
+ ["b", 6, 3, 3],
+ ["b", 7, 100, 4],
+ ["c", 8, 2, 5],
+ ["c", 9, 3, 6],
+ ["c", 10, 101, 7],
+ ["c", 11, 150, 8],
+ ["d", 12, 42, 9],
+ ["e", 13, 42, 10],
+ ["e", 13, 42, 10],
+ ["g", 15, 3, 11],
+ ["h", 16, 150, 12]
+ ]
+ },
{
"description": "Multiple OVER(ORDER BY)s with one DESC column with select columns (two ORDER BY columns for deterministic output)",
"sql": "SELECT string_col, SUM(int_col) OVER(ORDER BY string_col DESC, int_col), int_col, MAX(double_col) OVER(ORDER BY string_col DESC, int_col) FROM {tbl}",
@@ -1023,6 +1410,29 @@
["a", 768, 42, 400]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with one DESC column with select columns (two ORDER BY columns for deterministic output) - ranking functions",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY string_col DESC, int_col), int_col, MAX(double_col) OVER(ORDER BY string_col DESC, int_col) FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["h", 1, 150, 1.53],
+ ["g", 2, 3, 100],
+ ["e", 3, 42, 100],
+ ["e", 3, 42, 100],
+ ["d", 4, 42, 100],
+ ["c", 5, 2, 400],
+ ["c", 6, 3, 400],
+ ["c", 7, 101, 400],
+ ["c", 8, 150, 400],
+ ["b", 9, 3, 400],
+ ["b", 10, 100, 400],
+ ["a", 11, 2, 400],
+ ["a", 11, 2, 400],
+ ["a", 12, 42, 400],
+ ["a", 12, 42, 400],
+ ["a", 12, 42, 400]
+ ]
+ },
{
"description": "Multiple OVER(ORDER BY)s with two DESC columns with select columns (two ORDER BY columns for deterministic output)",
"sql": "SELECT string_col, SUM(int_col) OVER(ORDER BY string_col DESC, int_col DESC), int_col, MAX(double_col) OVER(ORDER BY string_col DESC, int_col DESC) FROM {tbl}",
@@ -1069,6 +1479,29 @@
["h", 768, 150, 400]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with second DESC column with select columns (two ORDER BY columns for deterministic output) with alias - ranking functions",
+ "sql": "SELECT string_col, SUM(int_col) OVER(ORDER BY string_col, int_col DESC) as sum, int_col, RANK() OVER(ORDER BY string_col, int_col DESC) as rank FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 126, 42, 1],
+ ["a", 126, 42, 1],
+ ["a", 126, 42, 1],
+ ["a", 130, 2, 4],
+ ["a", 130, 2, 4],
+ ["b", 230, 100, 6],
+ ["b", 233, 3, 7],
+ ["c", 383, 150, 8],
+ ["c", 484, 101, 9],
+ ["c", 487, 3, 10],
+ ["c", 489, 2, 11],
+ ["d", 531, 42, 12],
+ ["e", 615, 42, 13],
+ ["e", 615, 42, 13],
+ ["g", 618, 3, 15],
+ ["h", 768, 150, 16]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select columns with alias",
"sql": "SELECT string_col AS str, MAX(double_col) OVER() AS max, int_col, SUM(int_col) OVER() AS sum FROM {tbl}",
@@ -1161,6 +1594,30 @@
[false, 2, 1.53, 106.69]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select columns and default frame on one but not the other (cannot use RANGE with multiple ORDER BY columns so not checking row order) - ranking functions",
+ "sql": "SELECT bool_col, MIN(int_col) OVER(ORDER BY int_col RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), double_col, RANK() OVER(ORDER BY int_col) FROM {tbl}",
+ "comments": "Calcite validation fails if RANGE is used with multiple ORDER BY columns. We don't support ROWS yet. Without frame specification the default is RANGE though.",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [true, 2, 300, 1],
+ [true, 2, 400, 1],
+ [false, 2, 400, 1],
+ [true, 2, 100, 4],
+ [true, 2, 100, 4],
+ [false, 2, 100, 4],
+ [true, 2, 50.5, 7],
+ [false, 2, 42, 7],
+ [true, 2, 75, 7],
+ [false, 2, 42, 7],
+ [true, 2, 50.5, 7],
+ [false, 2, 42, 7],
+ [false, 2, 1, 13],
+ [false, 2, 1.01, 14],
+ [false, 2, 1.5, 15],
+ [false, 2, 1.53, 15]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col with global order by",
"sql": "SELECT string_col, AVG(double_col) OVER(), COUNT(string_col) OVER() FROM {tbl} ORDER BY string_col",
@@ -1207,6 +1664,29 @@
["h", 106.69, 16]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select col with global order by - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col), DENSE_RANK() OVER(ORDER BY string_col) FROM {tbl} ORDER BY string_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["b", 6, 2],
+ ["b", 6, 2],
+ ["c", 8, 3],
+ ["c", 8, 3],
+ ["c", 8, 3],
+ ["c", 8, 3],
+ ["d", 12, 4],
+ ["e", 13, 5],
+ ["e", 13, 5],
+ ["g", 15, 6],
+ ["h", 16, 7]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col with global order by with LIMIT",
"sql": "SELECT string_col, COUNT(bool_col) OVER(), AVG(int_col) OVER() FROM {tbl} ORDER BY string_col LIMIT 5",
@@ -1234,6 +1714,20 @@
["b", 7, 33.2857143]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select col with global order by with LIMIT - ranking functions",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(ORDER BY string_col), AVG(int_col) OVER(ORDER BY string_col) FROM {tbl} ORDER BY string_col LIMIT 6",
+ "comments": "Cannot use LIMIT without ORDER BY since the results can change and we cannot verify exact row outputs",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 26],
+ ["a", 1, 26],
+ ["a", 1, 26],
+ ["a", 1, 26],
+ ["a", 1, 26],
+ ["b", 2, 33.2857143]
+ ]
+ },
{
"description": "Multiple empty OVER()s and transform col",
"sql": "SELECT UPPER(CONCAT(string_col, bool_col, '-')), AVG(int_col) OVER(), MIN(double_col) OVER() FROM {tbl}",
@@ -1279,6 +1773,29 @@
["h-false", 48, 1]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s and transform col (two ORDER BY columns for deterministic output) - ranking functions",
+ "sql": "SELECT CONCAT(string_col, bool_col, '-'), RANK() OVER(ORDER BY string_col, bool_col), DENSE_RANK() OVER(ORDER BY string_col, bool_col) FROM {tbl}",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a-false", 1, 1],
+ ["a-true", 2, 2],
+ ["a-true", 2, 2],
+ ["a-true", 2, 2],
+ ["a-true", 2, 2],
+ ["b-false", 6, 3],
+ ["b-false", 6, 3],
+ ["c-false", 8, 4],
+ ["c-false", 8, 4],
+ ["c-false", 8, 4],
+ ["c-true", 11, 5],
+ ["d-false", 12, 6],
+ ["e-false", 13, 7],
+ ["e-true", 14, 8],
+ ["g-true", 15, 9],
+ ["h-false", 16, 10]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col and filter",
"sql": "SELECT string_col, COUNT(bool_col) OVER(), MIN(double_col) OVER() FROM {tbl} WHERE string_col = 'a' AND bool_col != false",
@@ -1320,6 +1837,17 @@
["a", 4, 4]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s ranking functions with select col and filter",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY string_col), DENSE_RANK() OVER(ORDER BY string_col) FROM {tbl} WHERE string_col = 'a' AND bool_col != false",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col and filter that matches no rows",
"sql": "SELECT string_col, COUNT(bool_col) OVER(), MIN(double_col) OVER() FROM {tbl} WHERE string_col = 'a' AND bool_col != false AND int_col > 200",
@@ -1371,6 +1899,19 @@
[359, 100.585]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY) with filter - ranking functions",
+ "sql": "SELECT DENSE_RANK() OVER(ORDER BY string_col), AVG(double_col) OVER(ORDER BY string_col) FROM {tbl} WHERE string_col NOT IN ('a', 'd', 'e', 'g', 'h')",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 50.5],
+ [1, 50.5],
+ [2, 100.585],
+ [2, 100.585],
+ [2, 100.585],
+ [2, 100.585]
+ ]
+ },
{
"description": "Multiple OVER(ORDER BY)s with select col and filter (two ORDER BY columns for deterministic output)",
"sql": "SELECT double_col, SUM(int_col) OVER(ORDER BY string_col, double_col), AVG(double_col) OVER(ORDER BY string_col, double_col) FROM {tbl} WHERE string_col NOT IN ('a', 'd', 'e', 'g', 'h')",
@@ -1384,6 +1925,19 @@
[400, 359, 100.585]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select col and filter (two ORDER BY columns for deterministic output) - ranking functions",
+ "sql": "SELECT double_col, SUM(int_col) OVER(ORDER BY string_col, double_col), RANK() OVER(ORDER BY string_col, double_col) FROM {tbl} WHERE string_col NOT IN ('a', 'd', 'e', 'g', 'h')",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 100, 1],
+ [100, 103, 2],
+ [1.01, 204, 3],
+ [1.5, 354, 4],
+ [100, 357, 5],
+ [400, 359, 6]
+ ]
+ },
{
"description": "Multiple empty OVER() with select transform and filter",
"sql": "SELECT LENGTH(CONCAT(string_col, bool_col, '-')), MAX(int_col) OVER(), COUNT(double_col) OVER() FROM {tbl} where int_col < 50 OR double_col = 1.01",
@@ -1423,6 +1977,26 @@
[6, 101, 13]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select transform and filter - ranking functions",
+ "sql": "SELECT LENGTH(CONCAT(string_col, bool_col, '-')), RANK() OVER(ORDER BY string_col, bool_col), DENSE_RANK() OVER(ORDER BY string_col, bool_col) FROM {tbl} where int_col < 50 OR double_col = 1.01",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [7, 1, 1],
+ [6, 2, 2],
+ [6, 2, 2],
+ [6, 2, 2],
+ [6, 2, 2],
+ [7, 6, 3],
+ [7, 7, 4],
+ [7, 7, 4],
+ [6, 9, 5],
+ [7, 10, 6],
+ [7, 11, 7],
+ [6, 12, 8],
+ [6, 13, 9]
+ ]
+ },
{
"description": "Multiple empty OVER()s with group by",
"sql": "SELECT MAX({tbl}.double_col) OVER(), COUNT({tbl}.double_col) OVER() FROM {tbl} GROUP BY double_col",
@@ -1456,6 +2030,23 @@
[400, 10]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with group by - ranking functions",
+ "sql": "SELECT DENSE_RANK() OVER(ORDER BY {tbl}.double_col), COUNT({tbl}.double_col) OVER(ORDER BY {tbl}.double_col) FROM {tbl} GROUP BY double_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1],
+ [2, 2],
+ [3, 3],
+ [4, 4],
+ [5, 5],
+ [6, 6],
+ [7, 7],
+ [8, 8],
+ [9, 9],
+ [10, 10]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col and group by",
"sql": "SELECT string_col, MIN({tbl}.double_col) OVER(), SUM({tbl}.double_col) OVER() FROM {tbl} GROUP BY string_col, double_col",
@@ -1501,6 +2092,29 @@
["h", 1, 1707.04]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select col and group by - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(ORDER BY {tbl}.string_col), SUM({tbl}.double_col) OVER(ORDER BY {tbl}.string_col) FROM {tbl} GROUP BY string_col, double_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 867.5],
+ ["a", 1, 867.5],
+ ["a", 1, 867.5],
+ ["a", 1, 867.5],
+ ["a", 1, 867.5],
+ ["b", 6, 968.5],
+ ["b", 6, 968.5],
+ ["c", 8, 1471.01],
+ ["c", 8, 1471.01],
+ ["c", 8, 1471.01],
+ ["c", 8, 1471.01],
+ ["d", 12, 1513.01],
+ ["e", 13, 1605.51],
+ ["e", 13, 1605.51],
+ ["g", 15, 1705.51],
+ ["h", 16, 1707.04]
+ ]
+ },
{
"description": "Multiple empty OVER()s with agg col and group by",
"sql": "SELECT SUM(double_col), SUM({tbl}.double_col) OVER(), AVG({tbl}.double_col) OVER() FROM {tbl} GROUP BY double_col",
@@ -1534,6 +2148,23 @@
[800, 972.54, 97.254]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with agg col and group by - ranking functions",
+ "sql": "SELECT SUM(double_col), SUM({tbl}.double_col) OVER(ORDER BY {tbl}.double_col), DENSE_RANK() OVER(ORDER BY {tbl}.double_col) FROM {tbl} GROUP BY double_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1, 1],
+ [1.01, 2.01, 2],
+ [1.5, 3.51, 3],
+ [1.53, 5.04, 4],
+ [126, 47.04, 5],
+ [101, 97.54, 6],
+ [75, 172.54, 7],
+ [300, 272.54, 8],
+ [300, 572.54, 9],
+ [800, 972.54, 10]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col, agg col and group by",
"sql": "SELECT double_col, SUM(double_col), SUM({tbl}.double_col) OVER(), AVG({tbl}.double_col) OVER() FROM {tbl} GROUP BY double_col",
@@ -1567,6 +2198,23 @@
[400, 800, 972.54, 97.254]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s with select col, agg col and group by - ranking function",
+ "sql": "SELECT double_col, SUM(double_col), SUM({tbl}.double_col) OVER(ORDER BY {tbl}.double_col), RANK() OVER(ORDER BY {tbl}.double_col) FROM {tbl} GROUP BY double_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1, 1, 1],
+ [1.01, 1.01, 2.01, 2],
+ [1.5, 1.5, 3.51, 3],
+ [1.53, 1.53, 5.04, 4],
+ [42, 126, 47.04, 5],
+ [50.5, 101, 97.54, 6],
+ [75, 75, 172.54, 7],
+ [100, 300, 272.54, 8],
+ [300, 300, 572.54, 9],
+ [400, 800, 972.54, 10]
+ ]
+ },
{
"description": "Multiple empty OVER() with select col, agg col and group by with global order by",
"sql": "SELECT double_col, SUM(double_col), SUM({tbl}.double_col) OVER(), AVG({tbl}.double_col) OVER() FROM {tbl} GROUP BY double_col ORDER BY double_col",
@@ -1619,6 +2267,23 @@
[400, 800, 10, 10]
]
},
+ {
+ "description": "Multiple OVER(ORDER BY)s ranking functions with select col, agg col and group by with global order by",
+ "sql": "SELECT double_col, SUM(double_col), RANK() OVER(ORDER BY {tbl}.double_col), DENSE_RANK() OVER(ORDER BY {tbl}.double_col) FROM {tbl} GROUP BY double_col ORDER BY double_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [1, 1, 1, 1],
+ [1.01, 1.01, 2, 2],
+ [1.5, 1.5, 3, 3],
+ [1.53, 1.53, 4, 4],
+ [42, 126, 5, 5],
+ [50.5, 101, 6, 6],
+ [75, 75, 7, 7],
+ [100, 300, 8, 8],
+ [300, 300, 9, 9],
+ [400, 800, 10, 10]
+ ]
+ },
{
"description": "Multiple empty OVER()s with select col, agg col and group by with a filter",
"sql": "SELECT double_col, SUM(double_col), SUM({tbl}.double_col) OVER(), AVG({tbl}.double_col) OVER() FROM {tbl} WHERE double_col > 100 GROUP BY double_col",
@@ -1705,6 +2370,14 @@
["a", true, true],
["a", true, true]
]
+ },
+ {
+ "description": "Window function subquery: rank having OVER with PARTITION BY ORDER BY and a GROUP BY",
+ "sql": "SELECT string_col, total, rank, dense_rank FROM(SELECT string_col, count(*) as total, RANK() OVER(ORDER BY count(*) DESC) as rank, DENSE_RANK() OVER(ORDER BY count(*) DESC) as dense_rank FROM {tbl} GROUP BY string_col) WHERE rank < 2",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 5, 1, 1]
+ ]
}
]
},
@@ -1806,6 +2479,30 @@
[150]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) ranking functions",
+ "sql": "SELECT RANK() OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1],
+ [1],
+ [3],
+ [3],
+ [3],
+ [1],
+ [2],
+ [1],
+ [2],
+ [3],
+ [4],
+ [1],
+ [1],
+ [1],
+ [1],
+ [1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) sum with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col), int_col FROM {tbl}",
@@ -1898,6 +2595,30 @@
["h", 1, 150]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) ranking functions with select columns",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col), int_col FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["b", 1, 3],
+ ["b", 2, 100],
+ ["c", 1, -101],
+ ["c", 2, 2],
+ ["c", 3, 3],
+ ["c", 4, 150],
+ ["d", 1, 42],
+ ["e", 1, 42],
+ ["e", 1, 42],
+ ["g", 1, 3],
+ ["h", 1, 150]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k2, k3) sum with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col ORDER BY int_col, bool_col), int_col, bool_col FROM {tbl}",
@@ -1970,6 +2691,30 @@
["h", 1, 150, false]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2, k3 DESC) rank with select columns",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col, bool_col DESC), int_col, bool_col FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2, true],
+ ["a", 1, 2, true],
+ ["a", 3, 42, true],
+ ["a", 3, 42, true],
+ ["a", 5, 42, false],
+ ["b", 1, 3, false],
+ ["b", 2, 100, false],
+ ["c", 1, -101, false],
+ ["c", 2, 2, false],
+ ["c", 3, 3, true],
+ ["c", 4, 150, false],
+ ["d", 1, 42, false],
+ ["e", 1, 42, true],
+ ["e", 2, 42, false],
+ ["g", 1, 3, true],
+ ["h", 1, 150, false]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k2 DESC, k3 DESC) sum with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col ORDER BY int_col DESC, bool_col DESC), int_col, bool_col FROM {tbl}",
@@ -2040,6 +2785,28 @@
["c", 54, 2]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k1) sum with select columns - ranking functions",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY string_col), int_col FROM {tbl}",
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 1, 42],
+ ["a", 1, 42],
+ ["a", 1, 42],
+ ["b", 1, 3],
+ ["b", 1, 100],
+ ["e", 1, 42],
+ ["e", 1, 42],
+ ["d", 1, 42],
+ ["h", 1, 150],
+ ["g", 1, 3],
+ ["c", 1, -101],
+ ["c", 1, 150],
+ ["c", 1, 3],
+ ["c", 1, 2]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k1) sum with select columns with order by DESC",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col ORDER BY string_col DESC), int_col FROM {tbl}",
@@ -2084,6 +2851,28 @@
["c", 4]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k1) rank with select columns with order by DESC",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY string_col DESC) FROM {tbl}",
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["a", 1],
+ ["b", 1],
+ ["b", 1],
+ ["e", 1],
+ ["e", 1],
+ ["d", 1],
+ ["h", 1],
+ ["g", 1],
+ ["c", 1],
+ ["c", 1],
+ ["c", 1],
+ ["c", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) max with select columns with alias",
"sql": "SELECT string_col AS str, MAX(double_col) OVER(PARTITION BY string_col) AS max, int_col FROM {tbl}",
@@ -2154,6 +2943,30 @@
["g", 1, 3]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) dense_rank with select columns with alias",
+ "sql": "SELECT string_col AS str, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col) AS dense_rank, int_col FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["a", 2, 42],
+ ["b", 1, 3],
+ ["b", 2, 100],
+ ["e", 1, 42],
+ ["e", 1, 42],
+ ["d", 1, 42],
+ ["c", 1, -101],
+ ["c", 2, 2],
+ ["c", 3, 3],
+ ["c", 4, 150],
+ ["h", 1, 150],
+ ["g", 1, 3]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) min with select columns and default frame",
"sql": "SELECT bool_col, MIN(int_col) OVER(PARTITION BY bool_col ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), double_col FROM {tbl}",
@@ -2172,9 +2985,33 @@
[false, -101, 42],
[false, -101, 42],
[false, -101, 1.01],
- [false, -101, 1.5],
- [false, -101, -1.53],
- [false, -101, 400]
+ [false, -101, 1.5],
+ [false, -101, -1.53],
+ [false, -101, 400]
+ ]
+ },
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) min with select columns and default frame",
+ "sql": "SELECT bool_col, MIN(int_col) OVER(PARTITION BY bool_col ORDER BY int_col DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), double_col FROM {tbl}",
+ "comments": "Calcite validation fails if more than 1 ORDER BY column is used with RANGE. ROWS is not yet supported. Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [true, 2, 300],
+ [true, 2, 400],
+ [true, 3, 100],
+ [true, 42, 50.5],
+ [true, 42, 75],
+ [true, 42, 50.5],
+ [true, 3, 100],
+ [false, 3, 100],
+ [false, 100, 1],
+ [false, 42, 42],
+ [false, 42, 42],
+ [false, 42, 42],
+ [false, -101, 1.01],
+ [false, 150, 1.5],
+ [false, 150, -1.53],
+ [false, 2, 400]
]
},
{
@@ -2247,6 +3084,29 @@
["h", 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY) row_number with select col with global order by",
+ "sql": "SELECT string_col, ROW_NUMBER() OVER(PARTITION BY string_col) as row_number FROM {tbl} ORDER BY string_col, row_number",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 2],
+ ["a", 3],
+ ["a", 4],
+ ["a", 5],
+ ["b", 1],
+ ["b", 2],
+ ["c", 1],
+ ["c", 2],
+ ["c", 3],
+ ["c", 4],
+ ["d", 1],
+ ["e", 1],
+ ["e", 2],
+ ["g", 1],
+ ["h", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k2) avg with select col with global order by (use two global ORDER BY keys for deterministic ordering)",
"sql": "SELECT string_col, AVG(double_col) OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col",
@@ -2270,6 +3130,29 @@
["h", -1.53]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) avg with select col with global order by (use two global ORDER BY keys for deterministic ordering) - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 3],
+ ["a", 3],
+ ["a", 3],
+ ["b", 1],
+ ["b", 2],
+ ["c", 1],
+ ["c", 2],
+ ["c", 3],
+ ["c", 4],
+ ["d", 1],
+ ["e", 1],
+ ["e", 1],
+ ["g", 1],
+ ["h", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY (2 keys)) avg with select col with global order by",
"sql": "SELECT string_col, bool_col, AVG(double_col) OVER(PARTITION BY string_col, bool_col) FROM {tbl} ORDER BY string_col, bool_col",
@@ -2339,6 +3222,29 @@
["h", false, 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY (2 keys) ORDER BY different key) dense_rank with select col with global order by (added int_col to global order by for deterministic results)",
+ "sql": "SELECT string_col, bool_col, DENSE_RANK() OVER(PARTITION BY string_col, bool_col ORDER BY int_col) as dense_rank FROM {tbl} ORDER BY string_col, bool_col, int_col, dense_rank",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", false, 1],
+ ["a", true, 1],
+ ["a", true, 1],
+ ["a", true, 2],
+ ["a", true, 2],
+ ["b", false, 1],
+ ["b", false, 2],
+ ["c", false, 1],
+ ["c", false, 2],
+ ["c", false, 3],
+ ["c", true, 1],
+ ["d", false, 1],
+ ["e", false, 1],
+ ["e", true, 1],
+ ["g", true, 1],
+ ["h", false, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) count with select col with global order by with LIMIT",
"sql": "SELECT string_col, COUNT(int_col) OVER(PARTITION BY string_col) FROM {tbl} ORDER BY string_col LIMIT 6",
@@ -2368,6 +3274,21 @@
["b", 2]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) ranking functions with select col with global order by with LIMIT (added int_col to ORDER BY list for deterministic results)",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col LIMIT 7",
+ "comments": "Cannot use LIMIT without ORDER BY since the results can change and we cannot verify exact row outputs",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1],
+ ["a", 1],
+ ["a", 3],
+ ["a", 3],
+ ["a", 3],
+ ["b", 1],
+ ["b", 2]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) and transform col",
"sql": "SELECT CONCAT(string_col, bool_col, '-'), AVG(int_col) OVER(PARTITION BY string_col) FROM {tbl}",
@@ -2438,6 +3359,30 @@
["g-true", 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) rank and transform col",
+ "sql": "SELECT CONCAT(string_col, bool_col, '-'), RANK() OVER(PARTITION BY string_col ORDER BY bool_col) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a-false", 1],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["b-false", 1],
+ ["b-false", 1],
+ ["c-false", 1],
+ ["c-false", 1],
+ ["c-false", 1],
+ ["c-true", 4],
+ ["d-false", 1],
+ ["e-false", 1],
+ ["e-true", 2],
+ ["h-false", 1],
+ ["g-true", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col and filter",
"sql": "SELECT string_col, COUNT(bool_col) OVER(PARTITION BY string_col) FROM {tbl} WHERE string_col = 'a' AND bool_col = false",
@@ -2461,6 +3406,15 @@
["a", 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) rank with select col and filter",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY bool_col) FROM {tbl} WHERE string_col = 'a' AND bool_col = false",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col and filter that matches no rows",
"sql": "SELECT string_col, COUNT(bool_col) OVER(PARTITION BY string_col) FROM {tbl} WHERE string_col = 'a' AND bool_col = false AND int_col > 200",
@@ -2515,6 +3469,20 @@
[100, 3]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) with select col and filter - ranking functions",
+ "sql": "SELECT double_col, DENSE_RANK() OVER(PARTITION BY bool_col, string_col ORDER BY int_col) FROM {tbl} WHERE string_col IN ('b', 'c')",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [100, 1],
+ [1, 2],
+ [1.01, 1],
+ [400, 2],
+ [1.5, 3],
+ [100, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k1) with select col and filter",
"sql": "SELECT double_col, SUM(int_col) OVER(PARTITION BY bool_col, string_col ORDER BY bool_col, string_col) FROM {tbl} WHERE string_col IN ('b', 'c')",
@@ -2536,6 +3504,15 @@
[400, 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k1) rank with select col and filter",
+ "sql": "SELECT double_col, RANK() OVER(PARTITION BY bool_col, string_col ORDER BY bool_col, string_col) FROM {tbl} WHERE string_col IN ('b', 'c') AND int_col < 100 AND int_col > 0",
+ "outputs": [
+ [100, 1],
+ [100, 1],
+ [400, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY k1 ORDER BY k1) with select col and filter where ORDER BY is DESC",
"sql": "SELECT double_col, SUM(int_col) OVER(PARTITION BY bool_col, string_col ORDER BY bool_col, string_col DESC) FROM {tbl} WHERE string_col IN ('b', 'c')",
@@ -2625,6 +3602,28 @@
["g-true", 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) dense_rank with select transform and filter",
+ "sql": "SELECT CONCAT(string_col, bool_col, '-'), DENSE_RANK() OVER(PARTITION BY string_col, int_col ORDER BY bool_col) FROM {tbl} where int_col < 50 OR double_col = 1",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a-true", 1],
+ ["a-true", 1],
+ ["a-false", 1],
+ ["a-true", 2],
+ ["a-true", 2],
+ ["b-false", 1],
+ ["b-false", 1],
+ ["c-false", 1],
+ ["c-false", 1],
+ ["c-true", 1],
+ ["d-false", 1],
+ ["e-false", 1],
+ ["e-true", 2],
+ ["g-true", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with group by",
"sql": "SELECT MAX({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col) FROM {tbl} GROUP BY string_col, int_col",
@@ -2663,6 +3662,26 @@
[150]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) with group by - ranking functions",
+ "sql": "SELECT RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1],
+ [2],
+ [1],
+ [2],
+ [1],
+ [2],
+ [3],
+ [4],
+ [1],
+ [1],
+ [1],
+ [1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col and group by",
"sql": "SELECT string_col, MIN({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col) FROM {tbl} GROUP BY string_col, int_col",
@@ -2719,6 +3738,26 @@
["h", 150]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) with select col and group by - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1],
+ ["a", 2],
+ ["b", 1],
+ ["b", 2],
+ ["c", 1],
+ ["c", 2],
+ ["c", 3],
+ ["c", 4],
+ ["d", 1],
+ ["e", 1],
+ ["g", 1],
+ ["h", 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with agg col and group by",
"sql": "SELECT SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
@@ -2771,6 +3810,26 @@
[150, 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER by k2) dense_rank with agg col and group by",
+ "sql": "SELECT SUM(int_col), DENSE_RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [4, 1],
+ [126, 2],
+ [3, 1],
+ [100, 2],
+ [-101, 1],
+ [2, 2],
+ [3, 3],
+ [150, 4],
+ [42, 1],
+ [84, 1],
+ [3, 1],
+ [150, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col, agg col and group by",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
@@ -2803,6 +3862,26 @@
[150, 150, 150]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER by k2) rank with select col, agg col and group by",
+ "sql": "SELECT int_col, SUM(int_col), RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [2, 4, 1],
+ [42, 126, 2],
+ [3, 3, 1],
+ [100, 100, 2],
+ [-101, -101, 1],
+ [2, 2, 2],
+ [3, 3, 3],
+ [150, 150, 4],
+ [42, 42, 1],
+ [42, 84, 1],
+ [3, 3, 1],
+ [150, 150, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col, agg col and group by with global order by",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} GROUP BY int_col ORDER BY int_col",
@@ -2854,6 +3933,25 @@
[150, 150, 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER by k2) dense_rank with select col, agg col and group by with global order by",
+ "sql": "SELECT int_col, SUM(int_col), DENSE_RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col ORDER BY string_col, int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [2, 4, 1],
+ [42, 126, 2],
+ [3, 3, 1],
+ [100, 100, 2],
+ [-101, -101, 1],
+ [2, 2, 2],
+ [3, 3, 3],
+ [150, 150, 4],
+ [42, 42, 1],
+ [42, 84, 1],
+ [3, 3, 1],
+ [150, 150, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col, agg col and group by with a filter",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} WHERE int_col >= 100 GROUP BY int_col",
@@ -2879,6 +3977,15 @@
[150, 150, 300]
]
},
+ {
+ "description": "Single OVER(PARTITION BY k1 ORDER BY k2) rank with select col, agg col and group by with a filter",
+ "sql": "SELECT int_col, SUM(int_col), RANK() OVER(PARTITION BY {tbl}.int_col ORDER BY {tbl}.string_col) FROM {tbl} WHERE int_col >= 100 GROUP BY string_col, int_col",
+ "outputs": [
+ [100, 100, 1],
+ [150, 150, 1],
+ [150, 150, 2]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY) with select col, agg col and group by with a filter that matches no rows",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} WHERE int_col > 200 GROUP BY int_col",
@@ -2935,6 +4042,30 @@
[150, 1]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s - ranking functions",
+ "sql": "SELECT RANK() OVER(PARTITION BY string_col ORDER BY int_col), COUNT(string_col) OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1, 2],
+ [1, 2],
+ [3, 5],
+ [3, 5],
+ [3, 5],
+ [1, 1],
+ [2, 2],
+ [1, 1],
+ [2, 2],
+ [3, 3],
+ [4, 4],
+ [1, 1],
+ [1, 2],
+ [1, 2],
+ [1, 1],
+ [1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col), int_col, MAX(double_col) OVER(PARTITION BY string_col) FROM {tbl}",
@@ -3005,6 +4136,29 @@
["h", 1, 150, 1]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s ranking functions with select columns",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col), int_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl}",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2, 1],
+ ["a", 1, 2, 1],
+ ["a", 2, 42, 3],
+ ["a", 2, 42, 3],
+ ["a", 2, 42, 3],
+ ["b", 1, 3, 1],
+ ["b", 2, 100, 2],
+ ["c", 1, -101, 1],
+ ["c", 2, 2, 2],
+ ["c", 3, 3, 3],
+ ["c", 4, 150, 4],
+ ["d", 1, 42, 1],
+ ["e", 1, 42, 1],
+ ["e", 1, 42, 1],
+ ["g", 1, 3, 1],
+ ["h", 1, 150, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY k1 ORDER BY k2, k3)s with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col ORDER BY int_col, bool_col), int_col, MAX(double_col) OVER(PARTITION BY string_col ORDER BY int_col, bool_col) FROM {tbl}",
@@ -3053,6 +4207,30 @@
["h", 150, 150, -1.53]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2, k3 DESC)s with select columns - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col, bool_col DESC), int_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col, bool_col DESC) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2, 1],
+ ["a", 1, 2, 1],
+ ["a", 3, 42, 2],
+ ["a", 3, 42, 2],
+ ["a", 5, 42, 3],
+ ["b", 1, 3, 1],
+ ["b", 2, 100, 2],
+ ["c", 1, -101, 1],
+ ["c", 2, 2, 2],
+ ["c", 3, 3, 3],
+ ["c", 4, 150, 4],
+ ["d", 1, 42, 1],
+ ["e", 1, 42, 1],
+ ["e", 2, 42, 2],
+ ["g", 1, 3, 1],
+ ["h", 1, 150, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY k1 ORDER BY k2 DESC, k3 DESC)s with select columns",
"sql": "SELECT string_col, SUM(int_col) OVER(PARTITION BY string_col ORDER BY int_col DESC, bool_col DESC), int_col, MAX(double_col) OVER(PARTITION BY string_col ORDER BY int_col DESC, bool_col DESC) FROM {tbl}",
@@ -3147,6 +4325,30 @@
["c", true, false]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col using bool aggregation and ranking function",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY bool_col DESC), BOOL_AND(bool_col) OVER(PARTITION BY string_col ORDER BY bool_col DESC) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, true],
+ ["a", 1, true],
+ ["a", 1, true],
+ ["a", 1, true],
+ ["a", 5, false],
+ ["b", 1, false],
+ ["b", 1, false],
+ ["d", 1, false],
+ ["e", 1, true],
+ ["e", 2, false],
+ ["h", 1, false],
+ ["g", 1, true],
+ ["c", 1, true],
+ ["c", 2, false],
+ ["c", 2, false],
+ ["c", 2, false]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select columns with alias",
"sql": "SELECT string_col AS str, MAX(double_col) OVER(PARTITION BY string_col) AS max, int_col, MIN(int_col) OVER(PARTITION BY string_col) AS sum FROM {tbl}",
@@ -3240,6 +4442,54 @@
[false, -101, 1.01, 69.7755556]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select columns and default frame on one but not the other with ranking functions",
+ "sql": "SELECT bool_col, MIN(int_col) OVER(PARTITION BY bool_col ORDER BY int_col DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), double_col, RANK() OVER(PARTITION BY bool_col ORDER BY int_col DESC) FROM {tbl}",
+ "comments": "Calcite validation fails if more than 1 ORDER BY column is used with RANGE. ROWS is not yet supported. Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [true, 42, 75, 1],
+ [true, 42, 50.5, 1],
+ [true, 42, 50.5, 1],
+ [true, 3, 100, 4],
+ [true, 3, 100, 4],
+ [true, 2, 300, 6],
+ [true, 2, 400, 6],
+ [false, 150, 1.5, 1],
+ [false, 150, -1.53, 1],
+ [false, 100, 1, 3],
+ [false, 42, 42, 4],
+ [false, 42, 42, 4],
+ [false, 42, 42, 4],
+ [false, 3, 100, 7],
+ [false, 2, 400, 8],
+ [false, -101, 1.01, 9]
+ ]
+ },
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select columns and default frame on one but not the other with ranking functions",
+ "sql": "SELECT bool_col, MIN(int_col) OVER(PARTITION BY bool_col ORDER BY int_col DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), double_col, DENSE_RANK() OVER(PARTITION BY bool_col ORDER BY int_col DESC) FROM {tbl}",
+ "comments": "Calcite validation fails if more than 1 ORDER BY column is used with RANGE. ROWS is not yet supported. Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [true, 42, 75, 1],
+ [true, 42, 50.5, 1],
+ [true, 42, 50.5, 1],
+ [true, 3, 100, 2],
+ [true, 3, 100, 2],
+ [true, 2, 300, 3],
+ [true, 2, 400, 3],
+ [false, 150, 1.5, 1],
+ [false, 150, -1.53, 1],
+ [false, 100, 1, 2],
+ [false, 42, 42, 3],
+ [false, 42, 42, 3],
+ [false, 42, 42, 3],
+ [false, 3, 100, 4],
+ [false, 2, 400, 5],
+ [false, -101, 1.01, 6]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col with global order by",
"sql": "SELECT string_col, AVG(double_col) OVER(PARTITION BY string_col), COUNT(string_col) OVER(PARTITION BY string_col) FROM {tbl} ORDER BY string_col",
@@ -3286,6 +4536,29 @@
["h", -1.53, 1]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col with global order by (use two global ORDER BY keys for deterministic ordering) ranking functions",
+ "sql": "SELECT string_col, DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col), RANK() OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 2, 3],
+ ["a", 2, 3],
+ ["a", 2, 3],
+ ["b", 1, 1],
+ ["b", 2, 2],
+ ["c", 1, 1],
+ ["c", 2, 2],
+ ["c", 3, 3],
+ ["c", 4, 4],
+ ["d", 1, 1],
+ ["e", 1, 1],
+ ["e", 1, 1],
+ ["g", 1, 1],
+ ["h", 1, 1]
+ ]
+ },
{
"description": "Single OVER(PARTITION BY (2 keys))s avg with select col with global order by",
"sql": "SELECT string_col, bool_col, AVG(double_col) OVER(PARTITION BY string_col, bool_col), COUNT(string_col) OVER(PARTITION BY bool_col, string_col) FROM {tbl} ORDER BY string_col, bool_col",
@@ -3332,6 +4605,29 @@
["h", false, -1.53, 1]
]
},
+ {
+ "description": "Single OVER(PARTITION BY (2 keys) ORDER BY different key)s avg with select col with global order by (added int_col to global order by for deterministic results) - ranking functions",
+ "sql": "SELECT string_col, bool_col, RANK() OVER(PARTITION BY string_col, bool_col ORDER BY int_col), DENSE_RANK() OVER(PARTITION BY bool_col, string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, bool_col, int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", false, 1, 1],
+ ["a", true, 1, 1],
+ ["a", true, 1, 1],
+ ["a", true, 3, 2],
+ ["a", true, 3, 2],
+ ["b", false, 1, 1],
+ ["b", false, 2, 2],
+ ["c", false, 1, 1],
+ ["c", false, 2, 2],
+ ["c", false, 3, 3],
+ ["c", true, 1, 1],
+ ["d", false, 1, 1],
+ ["e", false, 1, 1],
+ ["e", true, 1, 1],
+ ["g", true, 1, 1],
+ ["h", false, 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col with global order by with LIMIT",
"sql": "SELECT string_col, COUNT(bool_col) OVER(PARTITION BY string_col), AVG(int_col) OVER(PARTITION BY string_col) FROM {tbl} ORDER BY string_col LIMIT 6",
@@ -3361,6 +4657,21 @@
["b", 2, 51.5]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col with global order by with LIMIT ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY int_col), AVG(int_col) OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col LIMIT 7",
+ "comments": "Cannot use LIMIT without ORDER BY since the results can change and we cannot verify exact row outputs",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 1, 2],
+ ["a", 3, 26],
+ ["a", 3, 26],
+ ["a", 3, 26],
+ ["b", 1, 3],
+ ["b", 2, 51.5]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s and transform col",
"sql": "SELECT UPPER(CONCAT(string_col, bool_col, '-')), AVG(int_col) OVER(PARTITION BY string_col), MIN(double_col) OVER(PARTITION BY string_col) FROM {tbl}",
@@ -3407,6 +4718,30 @@
["H-FALSE", 150, -1.53]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s and transform col - ranking functions",
+ "sql": "SELECT UPPER(CONCAT(string_col, bool_col, '-')), RANK() OVER(PARTITION BY string_col ORDER BY bool_col), DENSE_RANK() OVER(PARTITION BY string_col ORDER BY bool_col) FROM {tbl}",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["A-FALSE", 1, 1],
+ ["A-TRUE", 2, 2],
+ ["A-TRUE", 2, 2],
+ ["A-TRUE", 2, 2],
+ ["A-TRUE", 2, 2],
+ ["B-FALSE", 1, 1],
+ ["B-FALSE", 1, 1],
+ ["C-FALSE", 1, 1],
+ ["C-FALSE", 1, 1],
+ ["C-FALSE", 1, 1],
+ ["C-TRUE", 4, 2],
+ ["D-FALSE", 1, 1],
+ ["E-FALSE", 1, 1],
+ ["E-TRUE", 2, 2],
+ ["G-TRUE", 1, 1],
+ ["H-FALSE", 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col and filter",
"sql": "SELECT string_col, COUNT(bool_col) OVER(PARTITION BY string_col), MIN(double_col) OVER(PARTITION BY string_col) FROM {tbl} WHERE string_col = 'a' AND bool_col != false",
@@ -3429,6 +4764,18 @@
["a", 4, 50.5]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col and filter - ranking functinos",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY string_col ORDER BY bool_col), DENSE_RANK() OVER(PARTITION BY string_col ORDER BY bool_col) FROM {tbl} WHERE string_col = 'a' AND bool_col != false",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1],
+ ["a", 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col and filter that matches no rows",
"sql": "SELECT string_col, COUNT(bool_col) OVER(PARTITION BY string_col), AVG(int_col) OVER(PARTITION BY string_col) FROM {tbl} WHERE string_col = 'a' AND bool_col = false AND int_col > 200",
@@ -3483,6 +4830,20 @@
[100, 3, 100]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col and filter - ranking functions",
+ "sql": "SELECT double_col, RANK() OVER(PARTITION BY bool_col, string_col ORDER BY int_col), DENSE_RANK() OVER(PARTITION BY bool_col, string_col ORDER BY int_col) FROM {tbl} WHERE string_col NOT IN ('a', 'd', 'e', 'g', 'h')",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [100, 1, 1],
+ [1, 2, 2],
+ [1.01, 1, 1],
+ [400, 2, 2],
+ [1.5, 3, 3],
+ [100, 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select transform and filter",
"sql": "SELECT LENGTH(CONCAT(string_col, bool_col, '-')), MAX(int_col) OVER(PARTITION BY string_col, int_col), COUNT(double_col) OVER(PARTITION BY string_col, int_col) FROM {tbl} where int_col < 50 OR double_col = 1",
@@ -3525,6 +4886,28 @@
[6, 3, 1]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select transform and filter - ranking functions",
+ "sql": "SELECT LENGTH(CONCAT(string_col, bool_col, '-')), RANK() OVER(PARTITION BY string_col, int_col ORDER BY bool_col), DENSE_RANK() OVER(PARTITION BY string_col, int_col ORDER BY bool_col) FROM {tbl} where int_col < 50 OR double_col = 1",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [6, 1, 1],
+ [6, 1, 1],
+ [7, 1, 1],
+ [6, 2, 2],
+ [6, 2, 2],
+ [7, 1, 1],
+ [7, 1, 1],
+ [7, 1, 1],
+ [7, 1, 1],
+ [6, 1, 1],
+ [7, 1, 1],
+ [7, 1, 1],
+ [6, 2, 2],
+ [6, 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with group by",
"sql": "SELECT MAX({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col), COUNT({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col) FROM {tbl} GROUP BY string_col, int_col",
@@ -3563,6 +4946,26 @@
[150, 1]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with group by - ranking functions",
+ "sql": "SELECT MAX({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col), DENSE_RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [2, 1],
+ [42, 2],
+ [3, 1],
+ [100, 2],
+ [-101, 1],
+ [2, 2],
+ [3, 3],
+ [150, 4],
+ [42, 1],
+ [42, 1],
+ [3, 1],
+ [150, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col and group by",
"sql": "SELECT string_col, MIN({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col) FROM {tbl} GROUP BY string_col, int_col",
@@ -3601,6 +5004,26 @@
["h", 150, 150]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER BY k2)s with select col and group by - ranking functions",
+ "sql": "SELECT string_col, RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ ["a", 1, 2],
+ ["a", 2, 44],
+ ["b", 1, 3],
+ ["b", 2, 103],
+ ["c", 1, -101],
+ ["c", 2, -99],
+ ["c", 3, -96],
+ ["c", 4, 54],
+ ["d", 1, 42],
+ ["e", 1, 42],
+ ["g", 1, 3],
+ ["h", 1, 150]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with agg col and group by",
"sql": "SELECT SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col), AVG({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
@@ -3633,6 +5056,26 @@
[150, 150, 150]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER by k2)s with agg col and group by - ranking functions",
+ "sql": "SELECT SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col), RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [4, 2, 1],
+ [126, 44, 2],
+ [3, 3, 1],
+ [100, 103, 2],
+ [-101, -101, 1],
+ [2, -99, 2],
+ [3, -96, 3],
+ [150, 54, 4],
+ [42, 42, 1],
+ [84, 42, 1],
+ [3, 3, 1],
+ [150, 150, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col, agg col and group by",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col), AVG({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} GROUP BY int_col",
@@ -3697,6 +5140,25 @@
[150, 150, 150, 150]
]
},
+ {
+ "description": "Multiple OVER(PARTITION BY k1 ORDER by k2)s with select col, agg col and group by with global order by - ranking functions",
+ "sql": "SELECT int_col, SUM(int_col), DENSE_RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col), RANK() OVER(PARTITION BY {tbl}.string_col ORDER BY {tbl}.int_col) FROM {tbl} GROUP BY string_col, int_col ORDER BY string_col, int_col",
+ "keepOutputRowOrder": true,
+ "outputs": [
+ [2, 4, 1, 1],
+ [42, 126, 2, 2],
+ [3, 3, 1, 1],
+ [100, 100, 2, 2],
+ [-101, -101, 1, 1],
+ [2, 2, 2, 2],
+ [3, 3, 3, 3],
+ [150, 150, 4, 4],
+ [42, 42, 1, 1],
+ [42, 84, 1, 1],
+ [3, 3, 1, 1],
+ [150, 150, 1, 1]
+ ]
+ },
{
"description": "Multiple OVER(PARTITION BY)s with select col, agg col and group by with a filter",
"sql": "SELECT int_col, SUM(int_col), SUM({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col), MIN({tbl}.int_col) OVER(PARTITION BY {tbl}.int_col) FROM {tbl} WHERE int_col >= 100 GROUP BY int_col",
@@ -3778,6 +5240,112 @@
[3, "c", 3],
[4, "c", 150]
]
+ },
+ {
+ "description": "Subquery with RANK window function to get all values with RANK < value",
+ "sql": "SELECT rank, string_col, int_col FROM (SELECT RANK() OVER(PARTITION BY string_col ORDER BY int_col) AS rank, string_col, int_col from {tbl}) WHERE rank <= 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1, "a", 2],
+ [1, "a", 2],
+ [1, "b", 3],
+ [2, "b", 100],
+ [1, "c", -101],
+ [2, "c", 2],
+ [1, "d", 42],
+ [1, "e", 42],
+ [1, "e", 42],
+ [1, "g", 3],
+ [1, "h", 150]
+ ]
+ },
+ {
+ "description": "Subquery with DENSE_RANK window function to get all values with RANK < value",
+ "sql": "SELECT dense_rank, string_col, int_col FROM (SELECT DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col) AS dense_rank, string_col, int_col from {tbl}) WHERE dense_rank <= 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1, "a", 2],
+ [1, "a", 2],
+ [2, "a", 42],
+ [2, "a", 42],
+ [2, "a", 42],
+ [1, "b", 3],
+ [2, "b", 100],
+ [1, "c", -101],
+ [2, "c", 2],
+ [1, "d", 42],
+ [1, "e", 42],
+ [1, "e", 42],
+ [1, "g", 3],
+ [1, "h", 150]
+ ]
+ },
+ {
+ "description": "Subquery with RANK window function to get all values with RANK < value where ORDER BY is DESC",
+ "sql": "SELECT rank, string_col, int_col FROM (SELECT RANK() OVER(PARTITION BY string_col ORDER BY int_col DESC) AS rank, string_col, int_col from {tbl}) WHERE rank <= 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1, "a", 42],
+ [1, "a", 42],
+ [1, "a", 42],
+ [1, "b", 100],
+ [2, "b", 3],
+ [1, "c", 150],
+ [2, "c", 3],
+ [1, "d", 42],
+ [1, "e", 42],
+ [1, "e", 42],
+ [1, "g", 3],
+ [1, "h", 150]
+ ]
+ },
+ {
+ "description": "Subquery with DENSE_RANK window function to get all values with DENSE_RANK < value where ORDER BY is DESC",
+ "sql": "SELECT dense_rank, string_col, int_col FROM (SELECT DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col DESC) AS dense_rank, string_col, int_col from {tbl}) WHERE dense_rank <= 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [1, "a", 42],
+ [1, "a", 42],
+ [1, "a", 42],
+ [2, "a", 2],
+ [2, "a", 2],
+ [1, "b", 100],
+ [2, "b", 3],
+ [1, "c", 150],
+ [2, "c", 3],
+ [1, "d", 42],
+ [1, "e", 42],
+ [1, "e", 42],
+ [1, "g", 3],
+ [1, "h", 150]
+ ]
+ },
+ {
+ "description": "CTE with RANK window function to get all values with RANK < value",
+ "sql": "WITH windowfunc AS (SELECT RANK() OVER(PARTITION BY string_col ORDER BY int_col) AS rank, string_col, int_col from {tbl}) SELECT rank, string_col, int_col FROM windowfunc WHERE rank > 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [3, "a", 42],
+ [3, "a", 42],
+ [3, "a", 42],
+ [3, "c", 3],
+ [4, "c", 150]
+ ]
+ },
+ {
+ "description": "CTE with DENSE_RANK window function to get all values with DENSE_RANK < value",
+ "sql": "WITH windowfunc AS (SELECT DENSE_RANK() OVER(PARTITION BY string_col ORDER BY int_col) AS dense_rank, string_col, int_col from {tbl}) SELECT dense_rank, string_col, int_col FROM windowfunc WHERE dense_rank > 2",
+ "comments": "Cannot enforce a global ordering as partitions aren't ordered, just keys within a partition are",
+ "keepOutputRowOrder": false,
+ "outputs": [
+ [3, "c", 3],
+ [4, "c", 150]
+ ]
}
]
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org