You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/06 15:48:30 UTC
[impala] 01/02: IMPALA-8954: Uncorrelated scalar subqueries in the
select list
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 388ad555d7df9e2711ea0ab81dbae4235c54b3cc
Author: Shant Hovsepian <sh...@cloudera.com>
AuthorDate: Fri May 29 16:26:07 2020 -0400
IMPALA-8954: Uncorrelated scalar subqueries in the select list
Extend StmtRewriter with the ability to rewrite scalar subqueries in the
select list into cross joins. Currently the subquery must pass plan-time
checks to determine that it returns a single row which may miss cases
that may be valid at runtime or with more complex evaluation of the
predicate expressions in the planner. Support for correlated subqueries
will be a follow on change.
Testing:
* Added new analyzer tests, updated previous subquery tests
* test_queries.py::TestQueries::test_subquery
* Added test_tpcds_q9 to e2e and planner tests
Change-Id: Ibcf55d26889aa01d69bb85f18c9241dda095fb66
Reviewed-on: http://gerrit.cloudera.org:8080/16007
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Tim Armstrong <ta...@cloudera.com>
---
.../org/apache/impala/analysis/SelectList.java | 5 +
.../org/apache/impala/analysis/SelectStmt.java | 43 +-
.../org/apache/impala/analysis/StmtRewriter.java | 169 +++-
.../impala/analysis/AnalyzeSubqueriesTest.java | 23 +-
testdata/datasets/tpcds/tpcds_schema_template.sql | 2 +-
.../queries/PlannerTest/subquery-rewrite.test | 134 +++
.../queries/PlannerTest/tpcds-all.test | 916 +++++++++++++++++++++
.../queries/QueryTest/subquery.test | 173 ++++
testdata/workloads/tpcds/queries/count.test | 9 +-
.../tpcds/queries/tpcds-decimal_v2-q9.test | 54 ++
testdata/workloads/tpcds/queries/tpcds-q9.test | 54 ++
tests/query_test/test_tpcds_queries.py | 6 +
tests/util/parse_util.py | 2 +-
13 files changed, 1572 insertions(+), 18 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectList.java b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
index 3ac3830..9ed28c0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectList.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
@@ -94,9 +94,14 @@ public class SelectList {
public void rewriteExprs(ExprRewriter rewriter, Analyzer analyzer)
throws AnalysisException {
+ List<Subquery> subqueryExprs = new ArrayList<>();
for (SelectListItem item: items_) {
if (item.isStar()) continue;
item.setExpr(rewriter.rewrite(item.getExpr(), analyzer));
+ item.getExpr().collect(Subquery.class, subqueryExprs);
+ }
+ for (Subquery s : subqueryExprs) {
+ s.getStatement().rewriteExprs(rewriter);
}
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 00323de..5618694 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -274,9 +274,23 @@ public class SelectStmt extends QueryStmt {
// Analyze the resultExpr before generating a label to ensure enforcement
// of expr child and depth limits (toColumn() label may call toSql()).
item.getExpr().analyze(analyzer_);
- if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
- throw new AnalysisException(
- "Subqueries are not supported in the select list.");
+ // Check for scalar subquery types which are not supported
+ List<Subquery> subqueryExprs = new ArrayList<>();
+ item.getExpr().collect(Subquery.class, subqueryExprs);
+ for (Subquery s : subqueryExprs) {
+ Preconditions.checkState(s.getStatement() instanceof SelectStmt);
+ if (!s.returnsScalarColumn()) {
+ throw new AnalysisException("A non-scalar subquery is not supported in "
+ + "the expression: " + item.getExpr().toSql());
+ }
+ if (s.getStatement().isRuntimeScalar()) {
+ throw new AnalysisException(
+ "A subquery which may return more than one row is not supported in "
+ + "the expression: " + item.getExpr().toSql());
+ }
+ Preconditions.checkState(((SelectStmt) s.getStatement()).returnsSingleRow(),
+ "Invariant violated: Only subqueries that are guaranteed to return a "
+ + "single row are supported: " + item.getExpr().toSql());
}
resultExprs_.add(item.getExpr());
String label = item.toColumnLabel(i, analyzer_.useHiveColLabels());
@@ -1197,12 +1211,17 @@ public class SelectStmt extends QueryStmt {
if (fromClauseOnly) {
fromClause_.collectFromClauseTableRefs(tblRefs);
} else {
+ // Collect TableRefs in all subqueries.
fromClause_.collectTableRefs(tblRefs);
- }
- if (!fromClauseOnly && whereClause_ != null) {
- // Collect TableRefs in WHERE-clause subqueries.
List<Subquery> subqueries = new ArrayList<>();
- whereClause_.collect(Subquery.class, subqueries);
+ if (whereClause_ != null) {
+ whereClause_.collect(Subquery.class, subqueries);
+ }
+ for (SelectListItem item : selectList_.getItems()) {
+ if (item.isStar()) continue;
+ item.getExpr().collect(Subquery.class, subqueries);
+ }
+
for (Subquery sq: subqueries) {
sq.getStatement().collectTableRefs(tblRefs, fromClauseOnly);
}
@@ -1211,8 +1230,6 @@ public class SelectStmt extends QueryStmt {
@Override
public void collectInlineViews(Set<FeView> inlineViews) {
- // Impala currently supports sub queries only in FROM, WHERE & WITH clauses. Hence,
- // this function does not carry out any checks on HAVING clause.
super.collectInlineViews(inlineViews);
List<TableRef> fromTblRefs = getTableRefs();
Preconditions.checkNotNull(inlineViews);
@@ -1236,6 +1253,14 @@ public class SelectStmt extends QueryStmt {
whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
}
}
+ List<Subquery> selectListSubQueries = Lists.newArrayList();
+ for (SelectListItem item : selectList_.getItems()) {
+ if (item.isStar()) continue;
+ item.getExpr().collect(Subquery.class, selectListSubQueries);
+ }
+ for (Subquery sq : selectListSubQueries) {
+ sq.getStatement().collectInlineViews(inlineViews);
+ }
}
@Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index 850b629..c9fb004 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -934,8 +934,9 @@ public class StmtRewriter {
/**
* Rewrite all the subqueries of a SelectStmt in place. Subqueries are currently
- * supported in FROM and WHERE clauses. The rewrite is performed in place and not in a
- * clone of SelectStmt because it requires the stmt to be analyzed.
+ * supported in the FROM clause, WHERE clause and SELECT list. The rewrite is
+ * performed in place and not in a clone of SelectStmt because it requires the stmt to
+ * be analyzed.
*/
@Override
protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
@@ -952,6 +953,7 @@ public class StmtRewriter {
}
rewriteWhereClauseSubqueries(stmt, analyzer);
}
+ rewriteSelectListSubqueries(stmt, analyzer);
}
/**
@@ -1096,5 +1098,168 @@ public class StmtRewriter {
smap.put(subquery, newSubquery);
return expr.substitute(smap, analyzer, false);
}
+
+ /**
+ * Rewrite subqueries of a stmt's SELECT list. Scalar subqueries are the only type
+ * of subquery supported in the select list. Scalar subqueries return a single column
+ * and at most 1 row, a runtime error should be thrown if more than one row is
+ * returned. Generally these subqueries can be evaluated once for every row of the
+ * outer query however for performance reasons we want to rewrite evaluation to use
+ * joins where possible.
+ *
+ * 1) Uncorrelated Scalar Aggregate Query
+ *
+ * SELECT T1.a, (SELECT avg(T2.a) from T2) FROM T1;
+ *
+ * This is implemented by flattening into a join.
+ *
+ * SELECT T1.a, $a$1.$c$1 FROM T1, (SELECT avg(T2.a) $c$1 FROM T2) $a$1
+ *
+ * Currently we only support very simple subqueries which return a single aggregate
+ * function with no group by columns unless a LIMIT 1 is given. TODO: IMPALA-1285
+ *
+ * 2) Correlated Scalar Aggregate
+ *
+ * TODO: IMPALA-8955
+ * SELECT id, (SELECT count(*) FROM T2 WHERE id=a.id ) FROM T1 a
+ *
+ * This can be flattened with a LEFT OUTER JOIN
+ *
+ * SELECT T1.a, $a$1.$c$1 FROM T1 LEFT OUTER JOIN
+ * (SELECT id, count(*) $c$1 FROM T2 GROUP BY id) $a$1 ON T1.id = $a$1.id
+ *
+ * 3) Correlated Scalar
+ *
+ * TODO: IMPALA-6315
+ * SELECT id, (SELECT cost FROM T2 WHERE id=a.id ) FROM T1 a
+ *
+ * In this case there is no aggregate function to guarantee only a single row is
+ * returned per group so a run time cardinality check must be applied. An exception
+ * would be if the correlated predicates had primary key constraints.
+ *
+ * 4) Runtime Scalar Subqueries
+ *
+ * TODO: IMPALA-5100
+ * We do have a {@link CardinalityCheckNode} for runtime checks however queries
+ * can't always be rewritten into an NLJ without special care. For example with
+ * conditional expression like below:
+ *
+ * SELECT T1.a,
+ * IF((SELECT max(T2.a) from T2 > 10,
+ * (SELECT T2.a from T2 WHERE id=T1.id),
+ * (SELECT T3.a from T2 WHERE if=T1.id)
+ * FROM T1;
+ *
+ * If rewritten to joins with cardinality checks then both legs of the conditional
+ * expression would be evaluated regardless of the condition. If the false case
+ * were to return a runtime error while when the true doesn't and the condition
+ * evaluates to true then we'd have incorrect behavior.
+ */
+ private void rewriteSelectListSubqueries(SelectStmt stmt, Analyzer analyzer)
+ throws AnalysisException {
+ Preconditions.checkNotNull(stmt);
+ Preconditions.checkNotNull(analyzer);
+ final int numTableRefs = stmt.fromClause_.size();
+ final boolean parentHasAgg = stmt.hasMultiAggInfo();
+ // Track any new inline views so we later ensure they are rewritten if needed.
+ // An improvement would be to have a pre/post order abstract rewriter class.
+ final List<InlineViewRef> newViews = new ArrayList<>();
+ for (SelectListItem selectItem : stmt.getSelectList().getItems()) {
+ if (selectItem.isStar()) {
+ continue;
+ }
+
+ final Expr expr = selectItem.getExpr();
+ final List<Subquery> subqueries = new ArrayList<>();
+ // Use collect as opposed to collectAll in order to allow nested subqueries to be
+ // rewritten as needed. For example a subquery in the select list which contains
+ // its own subquery in the where clause.
+ expr.collect(Predicates.instanceOf(Subquery.class), subqueries);
+ if (subqueries.size() == 0) {
+ continue;
+ }
+ final ExprSubstitutionMap smap = new ExprSubstitutionMap();
+ for (Subquery sq : subqueries) {
+ final SelectStmt subqueryStmt = (SelectStmt) sq.getStatement();
+ // TODO: Handle correlated subqueries IMPALA-8955
+ if (isCorrelated(subqueryStmt)) {
+ throw new AnalysisException("A correlated scalar subquery is not supported "
+ + "in the expression: " + expr.toSql());
+ }
+ Preconditions.checkState(sq.getType().isScalarType());
+
+ // Existential subqueries in Impala aren't really execution time expressions,
+ // they are either checked at plan time or expected to be handled by the
+ // subquery rewrite into a join. In the case of the select list we will only
+ // support plan time evaluation.
+ boolean replacedExists = false;
+ final List<ExistsPredicate> existsPredicates = new ArrayList<>();
+ expr.collect(ExistsPredicate.class, existsPredicates);
+ for (ExistsPredicate ep : existsPredicates) {
+ // Check to see if the current subquery is the child of an exists predicate.
+ if (ep.contains(sq)) {
+ final BoolLiteral boolLiteral = replaceExistsPredicate(ep);
+ if (boolLiteral != null) {
+ boolLiteral.analyze(analyzer);
+ smap.put(ep, boolLiteral);
+ replacedExists = true;
+ break;
+ } else {
+ throw new AnalysisException(
+ "Unsupported subquery with runtime scalar check: " + ep.toSql());
+ }
+ }
+ }
+ if (replacedExists) {
+ continue;
+ }
+
+ List<String> colLabels = new ArrayList<>();
+ for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
+ colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
+ }
+ // Create a new inline view from the subquery stmt aliasing the columns.
+ InlineViewRef inlineView = new InlineViewRef(
+ stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels);
+ inlineView.reset();
+ inlineView.analyze(analyzer);
+
+ // For uncorrelated scalar subqueries we rewrite with a CROSS_JOIN. This makes
+ // it simpler to further optimize by merging subqueries without worrying about
+ // join ordering as in IMPALA-9796. For correlated subqueries we'd want to
+ // rewrite to a LOJ.
+ inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
+ stmt.fromClause_.add(inlineView);
+ newViews.add(inlineView);
+
+ SlotRef slotRef = new SlotRef(Lists.newArrayList(
+ inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
+ slotRef.analyze(analyzer);
+ Expr substitute = slotRef;
+ // Need to wrap the expression with a no-op aggregate function if the stmt does
+ // any aggregation, using MAX() given no explicit function to return any value
+ // in a group.
+ if (parentHasAgg) {
+ final FunctionCallExpr aggWrapper =
+ new FunctionCallExpr("max", Lists.newArrayList((Expr) slotRef));
+ aggWrapper.analyze(analyzer);
+ substitute = aggWrapper;
+ }
+ // Substitute original subquery expression with a reference to the inline view.
+ smap.put(sq, substitute);
+ }
+ // Update select list with any new slot references.
+ selectItem.setExpr(expr.substitute(smap, analyzer, false));
+ }
+ // Rewrite any new views
+ for (InlineViewRef v : newViews) {
+ rewriteQueryStatement(v.getViewStmt(), v.getAnalyzer());
+ }
+ // Only applies to the original list of TableRefs, not any as a result of the
+ // rewrite.
+ if (!newViews.isEmpty()) {
+ replaceUnqualifiedStarItems(stmt, numTableRefs);
+ }
+ }
}
}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index d297c48..a845718 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -1207,6 +1207,20 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"(select id from functional.alltypes t2 where t1.int_col = t2.int_col)",
"Unsupported correlated subquery with runtime scalar check: " +
"SELECT id FROM functional.alltypes t2 WHERE t1.int_col = t2.int_col");
+
+ // Scalar subqueries in the select list
+ AnalyzesOk("select id, 10 + (select max(int_col) from functional.alltypestiny) "
+ + "from functional.alltypestiny");
+ AnalyzesOk("select id, (select count(*) from functional.alltypestiny where int_col "
+ + "< (select max(int_col) from functional.alltypes)) from functional.dimtbl");
+ AnalysisError("select id, (select id, count(*) from functional.alltypestiny "
+ + "group by 1) from functional.dimtbl",
+ "A non-scalar subquery is not supported in the expression: "
+ + "(SELECT id, count(*) FROM functional.alltypestiny GROUP BY id)");
+ AnalysisError("select id, (select count(*) from functional.alltypestiny b where "
+ + "id=a.id ) from functional.alltypes a",
+ "A correlated scalar subquery is not supported in the expression: "
+ + "(SELECT count(*) FROM functional.alltypestiny b WHERE id = a.id)");
}
@Test
@@ -1339,7 +1353,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
}
@Test
- public void TestIllegalSubquery() throws AnalysisException {
+ public void testIllegalSubquery() throws AnalysisException {
// Predicate with a child subquery in the HAVING clause
AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
"id having count(*) > (select count(*) from functional.alltypesagg)",
@@ -1349,9 +1363,10 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"Subqueries are not supported in the HAVING clause.");
// Subquery in the select list
- AnalysisError("select id, (select int_col from functional.alltypestiny) " +
- "from functional.alltypestiny",
- "Subqueries are not supported in the select list.");
+ AnalysisError("select id, (select int_col from functional.alltypestiny) "
+ + "from functional.alltypestiny",
+ "A subquery which may return more than one row is not supported in the " +
+ "expression: " + "(SELECT int_col FROM functional.alltypestiny)");
// Subquery in the GROUP BY clause
AnalysisError("select id, count(*) from functional.alltypestiny " +
diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql b/testdata/datasets/tpcds/tpcds_schema_template.sql
index 079e21a..78f9f7c 100644
--- a/testdata/datasets/tpcds/tpcds_schema_template.sql
+++ b/testdata/datasets/tpcds/tpcds_schema_template.sql
@@ -1138,7 +1138,7 @@ tpcds
---- BASE_TABLE_NAME
reason
---- COLUMNS
-r_reason_sk bigint
+r_reason_sk int
r_reason_id string
r_reason_desc string
---- ROW_FORMAT
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index 8335ff3..bcffd73 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -3296,3 +3296,137 @@ PLAN-ROOT SINK
runtime filters: RF000 -> int_col
row-size=89B cardinality=7.30K
====
+# Scalar subquery in select list
+SELECT alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name,
+SUM(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip < 94613) AS decimal(10, 6)) AS close,
+SUM(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip >= 94613) AS decimal(10, 6)) AS remote
+FROM functional.alltypesagg JOIN functional.dimtbl ON (alltypesagg.id = dimtbl.id)
+GROUP BY 1, 2, 3
+ORDER BY alltypesagg.string_col DESC
+---- PLAN
+PLAN-ROOT SINK
+|
+10:SORT
+| order by: alltypesagg.string_col DESC
+| row-size=84B cardinality=11
+|
+09:AGGREGATE [FINALIZE]
+| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
+| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+| row-size=84B cardinality=11
+|
+08:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=84B cardinality=11
+|
+|--05:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 04:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| predicates: dimtbl.zip >= 94613
+| row-size=4B cardinality=1
+|
+07:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=76B cardinality=11
+|
+|--03:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 02:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| predicates: dimtbl.zip < 94613
+| row-size=4B cardinality=1
+|
+06:HASH JOIN [INNER JOIN]
+| hash predicates: alltypesagg.id = dimtbl.id
+| runtime filters: RF000 <- dimtbl.id
+| row-size=68B cardinality=11
+|
+|--01:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| row-size=29B cardinality=10
+|
+00:SCAN HDFS [functional.alltypesagg]
+ HDFS partitions=11/11 files=11 size=814.73KB
+ runtime filters: RF000 -> alltypesagg.id
+ row-size=39B cardinality=11.00K
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+20:MERGING-EXCHANGE [UNPARTITIONED]
+| order by: alltypesagg.string_col DESC
+|
+10:SORT
+| order by: alltypesagg.string_col DESC
+| row-size=84B cardinality=11
+|
+19:AGGREGATE [FINALIZE]
+| output: sum:merge(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max:merge(`$a$1`.`$c$1`), sum:merge(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max:merge(`$a$2`.`$c$1`)
+| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+| row-size=84B cardinality=11
+|
+18:EXCHANGE [HASH(alltypesagg.string_col,alltypesagg.date_string_col,dimtbl.name)]
+|
+09:AGGREGATE [STREAMING]
+| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
+| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+| row-size=84B cardinality=11
+|
+08:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=84B cardinality=11
+|
+|--17:EXCHANGE [BROADCAST]
+| |
+| 16:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 15:EXCHANGE [UNPARTITIONED]
+| |
+| 05:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 04:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| predicates: dimtbl.zip >= 94613
+| row-size=4B cardinality=1
+|
+07:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=76B cardinality=11
+|
+|--14:EXCHANGE [BROADCAST]
+| |
+| 13:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 12:EXCHANGE [UNPARTITIONED]
+| |
+| 03:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 02:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| predicates: dimtbl.zip < 94613
+| row-size=4B cardinality=1
+|
+06:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: alltypesagg.id = dimtbl.id
+| runtime filters: RF000 <- dimtbl.id
+| row-size=68B cardinality=11
+|
+|--11:EXCHANGE [BROADCAST]
+| |
+| 01:SCAN HDFS [functional.dimtbl]
+| HDFS partitions=1/1 files=1 size=171B
+| row-size=29B cardinality=10
+|
+00:SCAN HDFS [functional.alltypesagg]
+ HDFS partitions=11/11 files=11 size=814.73KB
+ runtime filters: RF000 -> alltypesagg.id
+ row-size=39B cardinality=11.00K
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 8d79757..9057a3f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -8788,3 +8788,919 @@ PLAN-ROOT SINK
runtime filters: RF004 -> ss_sold_time_sk, RF002 -> ss_hdemo_sk
row-size=12B cardinality=2.88M
====
+# TPCDS-Q9
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 74129
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 122840
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 56580
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10097
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 165306
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- PLAN
+Max Per-Host Resource Reservation: Memory=120.01MB Threads=17
+Per-Host Resource Estimates: Memory=2.05GB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=124B cardinality=1
+|
+|--30:AGGREGATE [FINALIZE]
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 29:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=116B cardinality=1
+|
+|--28:AGGREGATE [FINALIZE]
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 27:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=108B cardinality=1
+|
+|--26:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 25:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=100B cardinality=1
+|
+|--24:AGGREGATE [FINALIZE]
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 23:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=92B cardinality=1
+|
+|--22:AGGREGATE [FINALIZE]
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 21:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=84B cardinality=1
+|
+|--20:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 19:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=76B cardinality=1
+|
+|--18:AGGREGATE [FINALIZE]
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 17:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=68B cardinality=1
+|
+|--16:AGGREGATE [FINALIZE]
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 15:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=60B cardinality=1
+|
+|--14:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 13:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=52B cardinality=1
+|
+|--12:AGGREGATE [FINALIZE]
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 11:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=44B cardinality=1
+|
+|--10:AGGREGATE [FINALIZE]
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 09:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=36B cardinality=1
+|
+|--08:AGGREGATE [FINALIZE]
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 07:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=28B cardinality=1
+|
+|--06:AGGREGATE [FINALIZE]
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 05:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=20B cardinality=1
+|
+|--04:AGGREGATE [FINALIZE]
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 03:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=12B cardinality=1
+|
+|--00:SCAN HDFS [tpcds.reason]
+| HDFS partitions=1/1 files=1 size=1.31KB
+| predicates: r_reason_sk = 1
+| row-size=4B cardinality=1
+|
+02:AGGREGATE [FINALIZE]
+| output: count(*)
+| row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+ HDFS partitions=1824/1824 files=1824 size=346.60MB
+ predicates: ss_quantity <= 20, ss_quantity >= 1
+ row-size=4B cardinality=288.04K
+---- DISTRIBUTEDPLAN
+Max Per-Host Resource Reservation: Memory=120.01MB Threads=47
+Per-Host Resource Estimates: Memory=2.20GB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=124B cardinality=1
+|
+|--90:EXCHANGE [UNPARTITIONED]
+| |
+| 89:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 88:EXCHANGE [UNPARTITIONED]
+| |
+| 30:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 29:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=116B cardinality=1
+|
+|--87:EXCHANGE [UNPARTITIONED]
+| |
+| 86:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 85:EXCHANGE [UNPARTITIONED]
+| |
+| 28:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 27:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=108B cardinality=1
+|
+|--84:EXCHANGE [UNPARTITIONED]
+| |
+| 83:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 82:EXCHANGE [UNPARTITIONED]
+| |
+| 26:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 25:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=100B cardinality=1
+|
+|--81:EXCHANGE [UNPARTITIONED]
+| |
+| 80:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 79:EXCHANGE [UNPARTITIONED]
+| |
+| 24:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 23:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=92B cardinality=1
+|
+|--78:EXCHANGE [UNPARTITIONED]
+| |
+| 77:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 76:EXCHANGE [UNPARTITIONED]
+| |
+| 22:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 21:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=84B cardinality=1
+|
+|--75:EXCHANGE [UNPARTITIONED]
+| |
+| 74:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 73:EXCHANGE [UNPARTITIONED]
+| |
+| 20:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 19:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=76B cardinality=1
+|
+|--72:EXCHANGE [UNPARTITIONED]
+| |
+| 71:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 70:EXCHANGE [UNPARTITIONED]
+| |
+| 18:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 17:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=68B cardinality=1
+|
+|--69:EXCHANGE [UNPARTITIONED]
+| |
+| 68:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 67:EXCHANGE [UNPARTITIONED]
+| |
+| 16:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 15:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=60B cardinality=1
+|
+|--66:EXCHANGE [UNPARTITIONED]
+| |
+| 65:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 64:EXCHANGE [UNPARTITIONED]
+| |
+| 14:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 13:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=52B cardinality=1
+|
+|--63:EXCHANGE [UNPARTITIONED]
+| |
+| 62:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 61:EXCHANGE [UNPARTITIONED]
+| |
+| 12:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 11:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=44B cardinality=1
+|
+|--60:EXCHANGE [UNPARTITIONED]
+| |
+| 59:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 58:EXCHANGE [UNPARTITIONED]
+| |
+| 10:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 09:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=36B cardinality=1
+|
+|--57:EXCHANGE [UNPARTITIONED]
+| |
+| 56:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 55:EXCHANGE [UNPARTITIONED]
+| |
+| 08:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 07:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=28B cardinality=1
+|
+|--54:EXCHANGE [UNPARTITIONED]
+| |
+| 53:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 52:EXCHANGE [UNPARTITIONED]
+| |
+| 06:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 05:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=20B cardinality=1
+|
+|--51:EXCHANGE [UNPARTITIONED]
+| |
+| 50:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 49:EXCHANGE [UNPARTITIONED]
+| |
+| 04:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 03:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=12B cardinality=1
+|
+|--48:EXCHANGE [UNPARTITIONED]
+| |
+| 00:SCAN HDFS [tpcds.reason]
+| HDFS partitions=1/1 files=1 size=1.31KB
+| predicates: r_reason_sk = 1
+| row-size=4B cardinality=1
+|
+47:AGGREGATE [FINALIZE]
+| output: count:merge(*)
+| row-size=8B cardinality=1
+|
+46:EXCHANGE [UNPARTITIONED]
+|
+02:AGGREGATE
+| output: count(*)
+| row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+ HDFS partitions=1824/1824 files=1824 size=346.60MB
+ predicates: ss_quantity <= 20, ss_quantity >= 1
+ row-size=4B cardinality=288.04K
+---- PARALLELPLANS
+Max Per-Host Resource Reservation: Memory=240.01MB Threads=61
+Per-Host Resource Estimates: Memory=946MB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 00
+| row-size=124B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=00 plan-id=01 cohort-id=01
+| |
+| 90:EXCHANGE [UNPARTITIONED]
+| |
+| 89:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 88:EXCHANGE [UNPARTITIONED]
+| |
+| 30:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 29:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 01
+| row-size=116B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=01 plan-id=02 cohort-id=01
+| |
+| 87:EXCHANGE [UNPARTITIONED]
+| |
+| 86:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 85:EXCHANGE [UNPARTITIONED]
+| |
+| 28:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 27:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 02
+| row-size=108B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=02 plan-id=03 cohort-id=01
+| |
+| 84:EXCHANGE [UNPARTITIONED]
+| |
+| 83:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 82:EXCHANGE [UNPARTITIONED]
+| |
+| 26:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 25:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 100, ss_quantity >= 81
+| row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 03
+| row-size=100B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=03 plan-id=04 cohort-id=01
+| |
+| 81:EXCHANGE [UNPARTITIONED]
+| |
+| 80:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 79:EXCHANGE [UNPARTITIONED]
+| |
+| 24:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 23:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 04
+| row-size=92B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=04 plan-id=05 cohort-id=01
+| |
+| 78:EXCHANGE [UNPARTITIONED]
+| |
+| 77:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 76:EXCHANGE [UNPARTITIONED]
+| |
+| 22:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 21:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 05
+| row-size=84B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=05 plan-id=06 cohort-id=01
+| |
+| 75:EXCHANGE [UNPARTITIONED]
+| |
+| 74:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 73:EXCHANGE [UNPARTITIONED]
+| |
+| 20:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 19:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 80, ss_quantity >= 61
+| row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 06
+| row-size=76B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=06 plan-id=07 cohort-id=01
+| |
+| 72:EXCHANGE [UNPARTITIONED]
+| |
+| 71:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 70:EXCHANGE [UNPARTITIONED]
+| |
+| 18:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 17:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 07
+| row-size=68B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=07 plan-id=08 cohort-id=01
+| |
+| 69:EXCHANGE [UNPARTITIONED]
+| |
+| 68:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 67:EXCHANGE [UNPARTITIONED]
+| |
+| 16:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 15:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 08
+| row-size=60B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=08 plan-id=09 cohort-id=01
+| |
+| 66:EXCHANGE [UNPARTITIONED]
+| |
+| 65:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 64:EXCHANGE [UNPARTITIONED]
+| |
+| 14:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 13:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 60, ss_quantity >= 41
+| row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 09
+| row-size=52B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=09 plan-id=10 cohort-id=01
+| |
+| 63:EXCHANGE [UNPARTITIONED]
+| |
+| 62:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 61:EXCHANGE [UNPARTITIONED]
+| |
+| 12:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 11:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 10
+| row-size=44B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=10 plan-id=11 cohort-id=01
+| |
+| 60:EXCHANGE [UNPARTITIONED]
+| |
+| 59:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 58:EXCHANGE [UNPARTITIONED]
+| |
+| 10:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 09:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 11
+| row-size=36B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=11 plan-id=12 cohort-id=01
+| |
+| 57:EXCHANGE [UNPARTITIONED]
+| |
+| 56:AGGREGATE [FINALIZE]
+| | output: count:merge(*)
+| | row-size=8B cardinality=1
+| |
+| 55:EXCHANGE [UNPARTITIONED]
+| |
+| 08:AGGREGATE
+| | output: count(*)
+| | row-size=8B cardinality=1
+| |
+| 07:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 40, ss_quantity >= 21
+| row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 12
+| row-size=28B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=12 plan-id=13 cohort-id=01
+| |
+| 54:EXCHANGE [UNPARTITIONED]
+| |
+| 53:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 52:EXCHANGE [UNPARTITIONED]
+| |
+| 06:AGGREGATE
+| | output: avg(ss_net_profit)
+| | row-size=8B cardinality=1
+| |
+| 05:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 13
+| row-size=20B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=13 plan-id=14 cohort-id=01
+| |
+| 51:EXCHANGE [UNPARTITIONED]
+| |
+| 50:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 49:EXCHANGE [UNPARTITIONED]
+| |
+| 04:AGGREGATE
+| | output: avg(ss_ext_discount_amt)
+| | row-size=8B cardinality=1
+| |
+| 03:SCAN HDFS [tpcds.store_sales]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_quantity <= 20, ss_quantity >= 1
+| row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| join table id: 14
+| row-size=12B cardinality=1
+|
+|--JOIN BUILD
+| | join-table-id=14 plan-id=15 cohort-id=01
+| |
+| 48:EXCHANGE [UNPARTITIONED]
+| |
+| 00:SCAN HDFS [tpcds.reason]
+| HDFS partitions=1/1 files=1 size=1.31KB
+| predicates: r_reason_sk = 1
+| row-size=4B cardinality=1
+|
+47:AGGREGATE [FINALIZE]
+| output: count:merge(*)
+| row-size=8B cardinality=1
+|
+46:EXCHANGE [UNPARTITIONED]
+|
+02:AGGREGATE
+| output: count(*)
+| row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+ HDFS partitions=1824/1824 files=1824 size=346.60MB
+ predicates: ss_quantity <= 20, ss_quantity >= 1
+ row-size=4B cardinality=288.04K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index c53d29c..000c8cf 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -1039,3 +1039,176 @@ where int_col = (select f.id from (
---- TYPES
BIGINT
====
+---- QUERY
+# Basic scalar in select list
+select id, 1+(select min(id) from functional.alltypessmall)
+ from functional.alltypestiny
+order by id
+---- RESULTS
+0,1
+1,1
+2,1
+3,1
+4,1
+5,1
+6,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# No aggregate however limit 1
+select id, 1+(select id from functional.alltypessmall where id=0 group by id limit 1)
+ from functional.alltypestiny
+order by id
+---- RESULTS
+0,1
+1,1
+2,1
+3,1
+4,1
+5,1
+6,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# outer join
+select s.id, (select count(id) from alltypes) from alltypestiny t
+ left outer join alltypessmall s on t.id = s.id where s.bool_col
+order by id
+---- RESULTS
+0,7300
+2,7300
+4,7300
+6,7300
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# correlated scalar currently unsupported
+select id, (select count(id) from functional.alltypessmall where id=t.id)
+ from functional.alltypestiny t
+order by id
+---- RESULTS
+---- CATCH
+A correlated scalar subquery is not supported in the expression:
+====
+---- QUERY
+# Uncorrelated Scalar Aggregate in select list combined with aggregation in parent query
+select
+max(a.id),
+(select max(id) from functional.alltypestiny),
+(select min(id) from functional.alltypestiny)
+from functional.alltypessmall a
+---- RESULTS
+99,7,0
+---- TYPES
+INT, INT, INT
+====
+---- QUERY
+# Empty tables or false predicate should result in NULLs
+# Count aggregates on empty return 0
+select id,
+ (select sum(f2) from functional.emptytable),
+ (select count(id) from functional.alltypestiny where id > 100)
+from functional.alltypestiny
+order by id
+---- RESULTS
+0,NULL,0
+1,NULL,0
+2,NULL,0
+3,NULL,0
+4,NULL,0
+5,NULL,0
+6,NULL,0
+7,NULL,0
+---- TYPES
+INT, BIGINT, BIGINT
+====
+---- QUERY
+# nested subquery over a view with an outer predicate on the subquery column
+select count(id) from
+ (select id,
+ (select count(*) as cnt from functional.alltypestiny where int_col
+ <= (select max(int_col) from functional.alltypes_view)) as c from functional.dimtbl) T
+ where t.c <10
+---- RESULTS
+10
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Empty tables should result in NULLs
+select id, id in (select min(id) from functional.alltypessmall)
+ from functional.alltypestiny
+order by id
+---- RESULTS
+0,true
+1,false
+2,false
+3,false
+4,false
+5,false
+6,false
+7,false
+---- TYPES
+INT, BOOLEAN
+====
+---- QUERY
+# With a join in the subquery
+select id,
+ (select count(*) from functional.alltypessmall join functional.alltypestiny using (id))
+ from functional.alltypestiny
+order by id
+---- RESULTS
+0,8
+1,8
+2,8
+3,8
+4,8
+5,8
+6,8
+7,8
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# EXISTS predicates are handled at plantime so we need to special case their rewrites
+select id,
+ exists (select max(id) from dimtbl)
+ from functional.alltypestiny
+order by id
+---- RESULTS
+0,true
+1,true
+2,true
+3,true
+4,true
+5,true
+6,true
+7,true
+---- TYPES
+INT, BOOLEAN
+====
+---- QUERY
+# Multiple levels of exists
+SELECT id,
+NOT EXISTS (SELECT MAX(f2) FROM emptytable),
+EXISTS (SELECT MAX(id) FROM DIMTBL WHERE EXISTS (SELECT MAX(id) FROM alltypestiny)),
+(SELECT 1
+FROM alltypestiny t1
+WHERE EXISTS
+ (SELECT 1
+ FROM alltypestiny t2
+ WHERE t1.id = t2.id AND t1.int_col = t2.int_col
+ GROUP BY t2.id
+ HAVING count(1) = 1) LIMIT 1)
+FROM alltypessmall WHERE id < 2
+---- RESULTS
+0,false,true,1
+1,false,true,1
+---- TYPES
+INT, BOOLEAN, BOOLEAN, TINYINT
+====
diff --git a/testdata/workloads/tpcds/queries/count.test b/testdata/workloads/tpcds/queries/count.test
index 213c7a7..8ae49d4 100644
--- a/testdata/workloads/tpcds/queries/count.test
+++ b/testdata/workloads/tpcds/queries/count.test
@@ -159,4 +159,11 @@ select count(*) from web_site;
30
---- TYPES
BIGINT
-====
\ No newline at end of file
+====
+---- QUERY: TPCDS-COUNT-REASON
+select count(*) from reason;
+---- RESULTS
+35
+---- TYPES
+BIGINT
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test
new file mode 100644
index 0000000..82f4527
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test
@@ -0,0 +1,54 @@
+====
+---- QUERY: TPCDS-Q9
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 74129
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1 ,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 122840
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 56580
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10097
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 165306
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- RESULTS
+39.645413,115.898138,191.634713,267.188000,341.986915
+---- TYPES
+DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q9.test b/testdata/workloads/tpcds/queries/tpcds-q9.test
new file mode 100644
index 0000000..881cb68
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q9.test
@@ -0,0 +1,54 @@
+====
+---- QUERY: TPCDS-Q9
+select case when (select count(*)
+ from store_sales
+ where ss_quantity between 1 and 20) > 74129
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 1 and 20)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 1 and 20) end bucket1,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 21 and 40) > 122840
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 21 and 40)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 21 and 40) end bucket2,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 41 and 60) > 56580
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 41 and 60)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 41 and 60) end bucket3,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 61 and 80) > 10097
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 61 and 80)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 61 and 80) end bucket4,
+ case when (select count(*)
+ from store_sales
+ where ss_quantity between 81 and 100) > 165306
+ then (select avg(ss_ext_discount_amt)
+ from store_sales
+ where ss_quantity between 81 and 100)
+ else (select avg(ss_net_profit)
+ from store_sales
+ where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- RESULTS
+39.64,115.89,191.63,267.18,341.98
+---- TYPES
+DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
+====
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 512fbf5..41fa1a7 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -73,6 +73,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
def test_tpcds_q8(self, vector):
self.run_test_case(self.get_workload() + '-q8', vector)
+ def test_tpcds_q9(self, vector):
+ self.run_test_case(self.get_workload() + '-q9', vector)
+
def test_tpcds_q10a(self, vector):
self.run_test_case(self.get_workload() + '-q10a', vector)
@@ -342,6 +345,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
def test_tpcds_q8(self, vector):
self.run_test_case(self.get_workload() + '-decimal_v2-q8', vector)
+ def test_tpcds_q9(self, vector):
+ self.run_test_case(self.get_workload() + '-decimal_v2-q9', vector)
+
def test_tpcds_q10a(self, vector):
self.run_test_case(self.get_workload() + '-decimal_v2-q10a', vector)
diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py
index 4aeb00b..448b6ec 100644
--- a/tests/util/parse_util.py
+++ b/tests/util/parse_util.py
@@ -22,7 +22,7 @@ from datetime import datetime
# changed, and the stress test loses the ability to run the full set of queries. Set
# these constants and assert that when a workload is used, all the queries we expect to
# use are there.
-EXPECTED_TPCDS_QUERIES_COUNT = 84
+EXPECTED_TPCDS_QUERIES_COUNT = 85
EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
EXPECTED_TPCH_QUERIES_COUNT = 22
# Add the number of stress test specific queries, i.e. in files like '*-stress-*.test'