You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/06 15:48:31 UTC
[impala] 02/02: IMPALA-9784,
IMPALA-9905: Uncorrelated subqueries in HAVING.
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2dca55695ef3c208ece543ad36bece2a985cf8da
Author: Shant Hovsepian <sh...@cloudera.com>
AuthorDate: Tue Jun 2 09:28:09 2020 -0400
IMPALA-9784, IMPALA-9905: Uncorrelated subqueries in HAVING.
Support rewriting subqueries in the HAVING clause by nesting the
aggregation query and pulling up the subquery predicates into the outer
WHERE clause.
Testing:
* New analyzer tests
* New functional subquery tests
* Added Q23, Q24 and Q44 to the tpcds workload
* Ran subquery rewrite tests
Change-Id: I124a58a09a1a47e1222a22d84b54fe7d07844461
Reviewed-on: http://gerrit.cloudera.org:8080/16052
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
.../org/apache/impala/analysis/SelectStmt.java | 34 +-
.../org/apache/impala/analysis/StmtRewriter.java | 135 +++++-
.../impala/analysis/AnalyzeSubqueriesTest.java | 25 +-
.../queries/PlannerTest/subquery-rewrite.test | 71 +++
.../queries/PlannerTest/tpcds-all.test | 527 +++++++++++++++++++++
.../queries/QueryTest/subquery.test | 105 ++++
testdata/workloads/tpcds/queries/tpcds-q23-1.test | 56 +++
testdata/workloads/tpcds/queries/tpcds-q23-2.test | 65 +++
testdata/workloads/tpcds/queries/tpcds-q24-1.test | 58 +++
testdata/workloads/tpcds/queries/tpcds-q24-2.test | 59 +++
testdata/workloads/tpcds/queries/tpcds-q44.test | 64 +++
tests/query_test/test_tpcds_queries.py | 15 +
12 files changed, 1192 insertions(+), 22 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 5618694..ab4a425 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -621,10 +621,12 @@ public class SelectStmt extends QueryStmt {
// Analyze the HAVING clause first so we can check if it contains aggregates.
// We need to analyze/register it even if we are not computing aggregates.
if (havingClause_ == null) return;
- // can't contain subqueries
- if (havingClause_.contains(Predicates.instanceOf(Subquery.class))) {
+ List<Expr> subqueries = new ArrayList<>();
+ havingClause_.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
+ if (subqueries.size() > 1) {
throw new AnalysisException(
- "Subqueries are not supported in the HAVING clause.");
+ "Multiple subqueries are not supported in expression: "
+ + havingClause_.toSql());
}
// Resolve (top-level) aliases and analyzes
havingPred_ = resolveReferenceExpr(havingClause_, "HAVING", analyzer_,
@@ -818,10 +820,6 @@ public class SelectStmt extends QueryStmt {
LOG.trace("post-agg selectListExprs: " + Expr.debugString(resultExprs_));
}
if (havingPred_ != null) {
- // Make sure the predicate in the HAVING clause does not contain a
- // subquery.
- Preconditions.checkState(!havingPred_.contains(
- Predicates.instanceOf(Subquery.class)));
havingPred_ = havingPred_.substitute(combinedSmap, analyzer_, false);
analyzer_.registerConjuncts(havingPred_, true);
if (LOG.isTraceEnabled()) {
@@ -1069,16 +1067,16 @@ public class SelectStmt extends QueryStmt {
Preconditions.checkState(isAnalyzed());
selectList_.rewriteExprs(rewriter, analyzer_);
for (TableRef ref: fromClause_.getTableRefs()) ref.rewriteExprs(rewriter, analyzer_);
+ List<Subquery> subqueryExprs = new ArrayList<>();
if (whereClause_ != null) {
whereClause_ = rewriter.rewrite(whereClause_, analyzer_);
- // Also rewrite exprs in the statements of subqueries.
- List<Subquery> subqueryExprs = new ArrayList<>();
whereClause_.collect(Subquery.class, subqueryExprs);
- for (Subquery s: subqueryExprs) s.getStatement().rewriteExprs(rewriter);
}
if (havingClause_ != null) {
havingClause_ = rewriteCheckOrdinalResult(rewriter, havingClause_);
+ havingClause_.collect(Subquery.class, subqueryExprs);
}
+ for (Subquery s : subqueryExprs) s.getStatement().rewriteExprs(rewriter);
if (groupingExprs_ != null) {
for (int i = 0; i < groupingExprs_.size(); ++i) {
groupingExprs_.set(i, rewriteCheckOrdinalResult(
@@ -1217,6 +1215,9 @@ public class SelectStmt extends QueryStmt {
if (whereClause_ != null) {
whereClause_.collect(Subquery.class, subqueries);
}
+ if (havingClause_ != null) {
+ havingClause_.collect(Subquery.class, subqueries);
+ }
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
item.getExpr().collect(Subquery.class, subqueries);
@@ -1253,12 +1254,15 @@ public class SelectStmt extends QueryStmt {
whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
}
}
- List<Subquery> selectListSubQueries = Lists.newArrayList();
+ List<Subquery> subqueries = Lists.newArrayList();
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
- item.getExpr().collect(Subquery.class, selectListSubQueries);
+ item.getExpr().collect(Subquery.class, subqueries);
+ }
+ if (havingClause_ != null) {
+ havingClause_.collect(Subquery.class, subqueries);
}
- for (Subquery sq : selectListSubQueries) {
+ for (Subquery sq : subqueries) {
sq.getStatement().collectInlineViews(inlineViews);
}
}
@@ -1291,6 +1295,9 @@ public class SelectStmt extends QueryStmt {
*
* This function may produce false negatives because the cardinality of the
* result set also depends on the data a stmt is processing.
+ *
+ * TODO: IMPALA-1285 to cover more cases that can be determinded at plan time such has a
+ * group by clause where all grouping expressions are bound to constant expressions.
*/
public boolean returnsSingleRow() {
Preconditions.checkState(isAnalyzed());
@@ -1302,6 +1309,7 @@ public class SelectStmt extends QueryStmt {
if (hasMultiAggInfo() && !hasGroupByClause() && !selectList_.isDistinct()) {
return true;
}
+
// Select from an inline view that returns at most one row.
List<TableRef> tableRefs = fromClause_.getTableRefs();
if (tableRefs.size() == 1 && tableRefs.get(0) instanceof InlineViewRef) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index c9fb004..f98afa4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -307,6 +307,7 @@ public class StmtRewriter {
boolean updateSelectList = false;
SelectStmt subqueryStmt = (SelectStmt) expr.getSubquery().getStatement();
boolean isScalarSubquery = expr.getSubquery().isScalarSubquery();
+ boolean isScalarColumn = expr.getSubquery().returnsScalarColumn();
boolean isRuntimeScalar = subqueryStmt.isRuntimeScalar();
// Create a new inline view from the subquery stmt. The inline view will be added
// to the stmt's table refs later. Explicitly set the inline view's column labels
@@ -460,11 +461,17 @@ public class StmtRewriter {
}
if (!hasEqJoinPred && !inlineView.isCorrelated()) {
- // TODO: Remove this when independent subquery evaluation is implemented.
// TODO: Requires support for non-equi joins.
+ // TODO: Remove this when independent subquery evaluation is implemented.
+ // TODO: IMPALA-5100 to cover all cases, we do let through runtime scalars with
+ // group by clauses to allow for subqueries where we haven't implemented plan time
+ // expression evaluation to ensure only a single row is returned. This may expose
+ // runtime errors in the presence of multiple runtime scalar subqueries until we
+ // implement independent evaluation.
boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
- if ((!isScalarSubquery && !isRuntimeScalar) ||
- (hasGroupBy && !stmt.selectList_.isDistinct())) {
+ if ((!isScalarSubquery && !isRuntimeScalar)
+ || (hasGroupBy && !stmt.selectList_.isDistinct() && !isScalarColumn
+ && !isRuntimeScalar)) {
throw new AnalysisException(
"Unsupported predicate with subquery: " + expr.toSql());
}
@@ -941,6 +948,15 @@ public class StmtRewriter {
@Override
protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
+ // Rewrite all the subqueries in the HAVING clause.
+ if (stmt.hasHavingClause() && stmt.havingClause_.getSubquery() != null) {
+ if (hasSubqueryInDisjunction(stmt.havingClause_)) {
+ throw new AnalysisException("Subqueries in OR predicates are not supported: "
+ + stmt.havingClause_.toSql());
+ }
+ rewriteHavingClauseSubqueries(stmt, analyzer);
+ }
+
// Rewrite all the subqueries in the WHERE clause.
if (stmt.hasWhereClause()) {
// Push negation to leaf operands.
@@ -1261,5 +1277,118 @@ public class StmtRewriter {
replaceUnqualifiedStarItems(stmt, numTableRefs);
}
}
+
+ /**
+ * Rewrite subqueries of a stmt's HAVING clause. The stmt is rewritten into two
+ * separate statements; an inner statement which performs all sql operations that
+ * evaluated before the HAVING clause and an outer statement which projects the inner
+ * stmt's results with the HAVING clause rewritten as a WHERE clause and also performs
+ * the remainder of the sql operations (ORDER BY, LIMIT). We then rely on the WHERE
+ * clause rewrite rule to handle the subqueries that were originally in the HAVING
+ * clause.
+ *
+ * SELECT a, sum(b) FROM T1 GROUP BY a HAVING count(b) > (SELECT max(c) FROM T2)
+ * ORDER BY 2 LIMIT 10
+ *
+ * Inner Stmt becomes:
+ *
+ * SELECT a, sum(b), count(b) FROM T1 GROUP BY a
+ *
+ * Notice we augment the select list with any aggregates in the HAVING clause that are
+ * missing in the original select list.
+ *
+ * Outer Stmt becomes:
+ *
+ * SELECT $a$1.$c$1 a, $a$1.$c$2 sum(b) FROM
+ * (SELECT a, sum(b), count(b) FROM T1 GROUP BY a) $a$1 ($c$1, $c$2, $c$3) WHERE
+ * $a$1.$c$3 > (SELECT max(c) FROM T2) ORDER BY 2 LIMIT 10
+ *
+ * The query should would then be rewritten by the caller using
+ * rewriteWhereClauseSubqueries()
+ *
+ */
+ private void rewriteHavingClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
+ throws AnalysisException {
+ // Generate the inner query from the current statement pulling up the order by,
+ // limit, and any aggregates in the having clause that aren't projected in the
+ // select list.
+ final SelectStmt innerStmt = stmt.clone();
+ final List<FunctionCallExpr> aggExprs = stmt.hasMultiAggInfo() ?
+ stmt.getMultiAggInfo().getAggExprs() :
+ new ArrayList<>();
+ for (FunctionCallExpr agg : aggExprs) {
+ boolean contains = false;
+ for (SelectListItem selectListItem : stmt.getSelectList().getItems()) {
+ contains = selectListItem.getExpr().equals(agg);
+ if (contains) {
+ break;
+ }
+ }
+ if (!contains) {
+ innerStmt.selectList_.getItems().add(
+ new SelectListItem(agg.clone().reset(), null));
+ }
+ }
+
+ // Remove clauses that will go into the outer statement.
+ innerStmt.havingClause_ = null;
+ innerStmt.limitElement_ = new LimitElement(null, null);
+ if (innerStmt.hasOrderByClause()) {
+ innerStmt.orderByElements_ = null;
+ }
+ innerStmt.reset();
+
+ // Used in the substitution map, as post analyze() exprs won't match.
+ final List<SelectListItem> preAnalyzeSelectList =
+ innerStmt.getSelectList().clone().getItems();
+ final ExprSubstitutionMap smap = new ExprSubstitutionMap();
+ List<String> colLabels =
+ Lists.newArrayListWithCapacity(innerStmt.getSelectList().getItems().size());
+
+ for (int i = 0; i < innerStmt.getSelectList().getItems().size(); ++i) {
+ String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
+ colLabels.add(colAlias);
+ }
+
+ final String innerAlias = stmt.getTableAliasGenerator().getNextAlias();
+ final InlineViewRef innerView = new InlineViewRef(innerAlias, innerStmt, colLabels);
+ innerView.analyze(analyzer);
+
+ // Rewrite the new inline view.
+ rewriteSelectStatement(
+ (SelectStmt) innerView.getViewStmt(), innerView.getViewStmt().getAnalyzer());
+
+ for (int i = 0; i < preAnalyzeSelectList.size(); ++i) {
+ final Expr slot = new SlotRef(Lists.newArrayList(innerAlias, colLabels.get(i)));
+ slot.analyze(analyzer);
+ smap.put(preAnalyzeSelectList.get(i).getExpr(), slot);
+ }
+
+ // Create the new outer statement's select list.
+ final List<SelectListItem> outerSelectList = new ArrayList<>();
+ for (int i = 0; i < stmt.getSelectList().getItems().size(); ++i) {
+ // Project the original select list items and labels
+ final SelectListItem si = new SelectListItem(
+ stmt.getSelectList().getItems().get(i).getExpr().clone().reset().substitute(
+ smap, analyzer, false),
+ stmt.getColLabels().get(i));
+ si.getExpr().analyze(analyzer);
+ outerSelectList.add(si);
+ }
+
+ // Clear out the old stmt properties.
+ stmt.whereClause_ = stmt.havingClause_.reset().substitute(smap, analyzer, false);
+ stmt.whereClause_.analyze(analyzer);
+ stmt.havingClause_ = null;
+ stmt.groupingExprs_ = null;
+ stmt.selectList_.getItems().clear();
+ stmt.selectList_.getItems().addAll(outerSelectList);
+ stmt.fromClause_.getTableRefs().clear();
+ stmt.fromClause_.add(innerView);
+
+ stmt.analyze(analyzer);
+ if (LOG.isTraceEnabled())
+ LOG.trace("Rewritten HAVING Clause SQL: " + stmt.toSql(REWRITTEN));
+ }
}
}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index a845718..7412709 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -1355,12 +1355,16 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
@Test
public void testIllegalSubquery() throws AnalysisException {
// Predicate with a child subquery in the HAVING clause
- AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
- "id having count(*) > (select count(*) from functional.alltypesagg)",
- "Subqueries are not supported in the HAVING clause.");
- AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
- "id having (select count(*) from functional.alltypesagg) > 10",
- "Subqueries are not supported in the HAVING clause.");
+ AnalysisError("select id, count(*) from functional.alltypestiny t group by id " +
+ "having count(*) > (select count(*) from functional.alltypesagg where id = t.id)",
+ "Unsupported correlated subquery: SELECT count(*) FROM functional.alltypesagg " +
+ "WHERE id = t.id");
+ AnalysisError("select id, count(*) from functional.alltypestiny t group by id "
+ + "having (select count(*) from functional.alltypesagg) > 10 and count(*) < "
+ + "(select count(*) from functional.alltypesagg)",
+ "Multiple subqueries are not supported in expression: (SELECT count(*) FROM "
+ + "functional.alltypesagg) > 10 AND count(*) < (SELECT count(*) FROM "
+ + "functional.alltypesagg");
// Subquery in the select list
AnalysisError("select id, (select int_col from functional.alltypestiny) "
@@ -1498,4 +1502,13 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
AnalyzesOk("select 1 from functional.alltypes where " +
"coalesce(null, (select bool_col from functional.alltypes where id = 0))");
}
+
+ @Test
+ public void testHavingSubqueries() throws AnalysisException {
+ // Predicate with a child subquery in the HAVING clause
+ AnalyzesOk("select id, count(*) from functional.alltypestiny t group by "
+ + "id having count(*) > (select count(*) from functional.alltypesagg)");
+ AnalyzesOk("select id, count(*) from functional.alltypestiny t group by "
+ + "id having (select count(*) from functional.alltypesagg) > 10");
+ }
}
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index bcffd73..cb44adc 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -3429,4 +3429,75 @@ PLAN-ROOT SINK
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> alltypesagg.id
row-size=39B cardinality=11.00K
+=======
+# IMPALA-9905 Cardinality Check for subqueries with grouping expressions and
+# scalar return types
+select bigint_col from functional.alltypes where int_col >=
+ 3 * (select max(int_col)
+ from functional.alltypestiny where id = 1 group by id
+ )
+---- PLAN
+PLAN-ROOT SINK
+|
+04:NESTED LOOP JOIN [INNER JOIN]
+| predicates: int_col >= 3 * max(int_col)
+| row-size=20B cardinality=7.30K
+|
+|--03:CARDINALITY CHECK
+| | limit: 1
+| | row-size=8B cardinality=1
+| |
+| 02:AGGREGATE [FINALIZE]
+| | output: max(int_col)
+| | group by: id
+| | limit: 2
+| | row-size=8B cardinality=1
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| HDFS partitions=4/4 files=4 size=460B
+| predicates: id = 1
+| row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional.alltypes]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ row-size=12B cardinality=7.30K
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+09:EXCHANGE [UNPARTITIONED]
+|
+04:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+| predicates: int_col >= 3 * max(int_col)
+| row-size=20B cardinality=7.30K
+|
+|--08:EXCHANGE [BROADCAST]
+| |
+| 03:CARDINALITY CHECK
+| | limit: 1
+| | row-size=8B cardinality=1
+| |
+| 07:EXCHANGE [UNPARTITIONED]
+| | limit: 2
+| |
+| 06:AGGREGATE [FINALIZE]
+| | output: max:merge(int_col)
+| | group by: id
+| | limit: 2
+| | row-size=8B cardinality=1
+| |
+| 05:EXCHANGE [HASH(id)]
+| |
+| 02:AGGREGATE [STREAMING]
+| | output: max(int_col)
+| | group by: id
+| | row-size=8B cardinality=1
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| HDFS partitions=4/4 files=4 size=460B
+| predicates: id = 1
+| row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional.alltypes]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ row-size=12B cardinality=7.30K
====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 9057a3f..d7d2953 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -9704,3 +9704,530 @@ PLAN-ROOT SINK
predicates: ss_quantity <= 20, ss_quantity >= 1
row-size=4B cardinality=288.04K
====
+# TPCDS-Q44
+SELECT asceding.rnk,
+ i1.i_product_name best_performing,
+ i2.i_product_name worst_performing
+FROM
+ (SELECT *
+ FROM
+ (SELECT item_sk,
+ rank() OVER (
+ ORDER BY rank_col ASC) rnk
+ FROM
+ (SELECT ss_item_sk item_sk,
+ avg(ss_net_profit) rank_col
+ FROM store_sales ss1
+ WHERE ss_store_sk = 4
+ GROUP BY ss_item_sk
+ HAVING avg(ss_net_profit) > 0.9*
+ (SELECT avg(ss_net_profit) rank_col
+ FROM store_sales
+ WHERE ss_store_sk = 4
+ AND ss_addr_sk IS NULL
+ GROUP BY ss_store_sk))V1)V11
+ WHERE rnk < 11) asceding,
+ (SELECT *
+ FROM
+ (SELECT item_sk,
+ rank() OVER (
+ ORDER BY rank_col DESC) rnk
+ FROM
+ (SELECT ss_item_sk item_sk,
+ avg(ss_net_profit) rank_col
+ FROM store_sales ss1
+ WHERE ss_store_sk = 4
+ GROUP BY ss_item_sk
+ HAVING avg(ss_net_profit) > 0.9*
+ (SELECT avg(ss_net_profit) rank_col
+ FROM store_sales
+ WHERE ss_store_sk = 4
+ AND ss_addr_sk IS NULL
+ GROUP BY ss_store_sk))V2)V21
+ WHERE rnk < 11) descending,
+ item i1,
+ item i2
+WHERE asceding.rnk = descending.rnk
+ AND i1.i_item_sk=asceding.item_sk
+ AND i2.i_item_sk=descending.item_sk
+ORDER BY asceding.rnk
+LIMIT 100
+---- PLAN
+Max Per-Host Resource Reservation: Memory=65.75MB Threads=7
+Per-Host Resource Estimates: Memory=626MB
+PLAN-ROOT SINK
+|
+23:TOP-N [LIMIT=100]
+| order by: rnk ASC
+| row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN]
+| hash predicates: i2.i_item_sk = ss_item_sk
+| runtime filters: RF000 <- ss_item_sk
+| row-size=148B cardinality=1.80K
+|
+|--21:HASH JOIN [INNER JOIN]
+| | hash predicates: rank() = rank()
+| | row-size=110B cardinality=1.80K
+| |
+| |--17:SELECT
+| | | predicates: rank() < 11
+| | | row-size=36B cardinality=1.80K
+| | |
+| | 16:ANALYTIC
+| | | functions: rank()
+| | | order by: avg(ss_net_profit) DESC
+| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | | row-size=36B cardinality=17.98K
+| | |
+| | 15:SORT
+| | | order by: avg(ss_net_profit) DESC
+| | | row-size=28B cardinality=17.98K
+| | |
+| | 14:NESTED LOOP JOIN [INNER JOIN]
+| | | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | | row-size=28B cardinality=17.98K
+| | |
+| | |--13:CARDINALITY CHECK
+| | | | limit: 1
+| | | | row-size=12B cardinality=1
+| | | |
+| | | 12:AGGREGATE [FINALIZE]
+| | | | output: avg(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | limit: 2
+| | | | row-size=12B cardinality=2
+| | | |
+| | | 11:SCAN HDFS [tpcds.store_sales]
+| | | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | | row-size=12B cardinality=53.06K
+| | |
+| | 10:AGGREGATE [FINALIZE]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 09:SCAN HDFS [tpcds.store_sales ss1]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_store_sk = 4
+| | row-size=16B cardinality=480.07K
+| |
+| 20:HASH JOIN [INNER JOIN]
+| | hash predicates: i1.i_item_sk = ss_item_sk
+| | runtime filters: RF002 <- ss_item_sk
+| | row-size=74B cardinality=1.80K
+| |
+| |--08:SELECT
+| | | predicates: rank() < 11
+| | | row-size=36B cardinality=1.80K
+| | |
+| | 07:ANALYTIC
+| | | functions: rank()
+| | | order by: avg(ss_net_profit) ASC
+| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | | row-size=36B cardinality=17.98K
+| | |
+| | 06:SORT
+| | | order by: avg(ss_net_profit) ASC
+| | | row-size=28B cardinality=17.98K
+| | |
+| | 05:NESTED LOOP JOIN [INNER JOIN]
+| | | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | | row-size=28B cardinality=17.98K
+| | |
+| | |--04:CARDINALITY CHECK
+| | | | limit: 1
+| | | | row-size=12B cardinality=1
+| | | |
+| | | 03:AGGREGATE [FINALIZE]
+| | | | output: avg(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | limit: 2
+| | | | row-size=12B cardinality=2
+| | | |
+| | | 02:SCAN HDFS [tpcds.store_sales]
+| | | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | | row-size=12B cardinality=53.06K
+| | |
+| | 01:AGGREGATE [FINALIZE]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 00:SCAN HDFS [tpcds.store_sales ss1]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_store_sk = 4
+| | row-size=16B cardinality=480.07K
+| |
+| 18:SCAN HDFS [tpcds.item i1]
+| HDFS partitions=1/1 files=1 size=4.82MB
+| runtime filters: RF002 -> i1.i_item_sk
+| row-size=38B cardinality=18.00K
+|
+19:SCAN HDFS [tpcds.item i2]
+ HDFS partitions=1/1 files=1 size=4.82MB
+ runtime filters: RF000 -> i2.i_item_sk
+ row-size=38B cardinality=18.00K
+---- DISTRIBUTEDPLAN
+Max Per-Host Resource Reservation: Memory=90.56MB Threads=21
+Per-Host Resource Estimates: Memory=685MB
+PLAN-ROOT SINK
+|
+41:MERGING-EXCHANGE [UNPARTITIONED]
+| order by: rnk ASC
+| limit: 100
+|
+23:TOP-N [LIMIT=100]
+| order by: rnk ASC
+| row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: i2.i_item_sk = ss_item_sk
+| runtime filters: RF000 <- ss_item_sk
+| row-size=148B cardinality=1.80K
+|
+|--40:EXCHANGE [BROADCAST]
+| |
+| 21:HASH JOIN [INNER JOIN, BROADCAST]
+| | hash predicates: rank() = rank()
+| | row-size=110B cardinality=1.80K
+| |
+| |--39:EXCHANGE [UNPARTITIONED]
+| | |
+| | 20:HASH JOIN [INNER JOIN, BROADCAST]
+| | | hash predicates: ss_item_sk = i1.i_item_sk
+| | | row-size=74B cardinality=1.80K
+| | |
+| | |--38:EXCHANGE [UNPARTITIONED]
+| | | |
+| | | 18:SCAN HDFS [tpcds.item i1]
+| | | HDFS partitions=1/1 files=1 size=4.82MB
+| | | row-size=38B cardinality=18.00K
+| | |
+| | 08:SELECT
+| | | predicates: rank() < 11
+| | | row-size=36B cardinality=1.80K
+| | |
+| | 07:ANALYTIC
+| | | functions: rank()
+| | | order by: avg(ss_net_profit) ASC
+| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | | row-size=36B cardinality=17.98K
+| | |
+| | 37:MERGING-EXCHANGE [UNPARTITIONED]
+| | | order by: avg(ss_net_profit) ASC
+| | |
+| | 06:SORT
+| | | order by: avg(ss_net_profit) ASC
+| | | row-size=28B cardinality=17.98K
+| | |
+| | 05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+| | | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | | row-size=28B cardinality=17.98K
+| | |
+| | |--36:EXCHANGE [BROADCAST]
+| | | |
+| | | 04:CARDINALITY CHECK
+| | | | limit: 1
+| | | | row-size=12B cardinality=1
+| | | |
+| | | 35:EXCHANGE [UNPARTITIONED]
+| | | | limit: 2
+| | | |
+| | | 34:AGGREGATE [FINALIZE]
+| | | | output: avg:merge(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | limit: 2
+| | | | row-size=12B cardinality=2
+| | | |
+| | | 33:EXCHANGE [HASH(ss_store_sk)]
+| | | |
+| | | 03:AGGREGATE [STREAMING]
+| | | | output: avg(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | row-size=12B cardinality=6
+| | | |
+| | | 02:SCAN HDFS [tpcds.store_sales]
+| | | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | | row-size=12B cardinality=53.06K
+| | |
+| | 32:AGGREGATE [FINALIZE]
+| | | output: avg:merge(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 31:EXCHANGE [HASH(ss_item_sk)]
+| | |
+| | 01:AGGREGATE [STREAMING]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 00:SCAN HDFS [tpcds.store_sales ss1]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_store_sk = 4
+| | row-size=16B cardinality=480.07K
+| |
+| 17:SELECT
+| | predicates: rank() < 11
+| | row-size=36B cardinality=1.80K
+| |
+| 16:ANALYTIC
+| | functions: rank()
+| | order by: avg(ss_net_profit) DESC
+| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | row-size=36B cardinality=17.98K
+| |
+| 30:MERGING-EXCHANGE [UNPARTITIONED]
+| | order by: avg(ss_net_profit) DESC
+| |
+| 15:SORT
+| | order by: avg(ss_net_profit) DESC
+| | row-size=28B cardinality=17.98K
+| |
+| 14:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+| | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | row-size=28B cardinality=17.98K
+| |
+| |--29:EXCHANGE [BROADCAST]
+| | |
+| | 13:CARDINALITY CHECK
+| | | limit: 1
+| | | row-size=12B cardinality=1
+| | |
+| | 28:EXCHANGE [UNPARTITIONED]
+| | | limit: 2
+| | |
+| | 27:AGGREGATE [FINALIZE]
+| | | output: avg:merge(ss_net_profit)
+| | | group by: ss_store_sk
+| | | limit: 2
+| | | row-size=12B cardinality=2
+| | |
+| | 26:EXCHANGE [HASH(ss_store_sk)]
+| | |
+| | 12:AGGREGATE [STREAMING]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_store_sk
+| | | row-size=12B cardinality=6
+| | |
+| | 11:SCAN HDFS [tpcds.store_sales]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | row-size=12B cardinality=53.06K
+| |
+| 25:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | group by: ss_item_sk
+| | row-size=16B cardinality=17.98K
+| |
+| 24:EXCHANGE [HASH(ss_item_sk)]
+| |
+| 10:AGGREGATE [STREAMING]
+| | output: avg(ss_net_profit)
+| | group by: ss_item_sk
+| | row-size=16B cardinality=17.98K
+| |
+| 09:SCAN HDFS [tpcds.store_sales ss1]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_store_sk = 4
+| row-size=16B cardinality=480.07K
+|
+19:SCAN HDFS [tpcds.item i2]
+ HDFS partitions=1/1 files=1 size=4.82MB
+ runtime filters: RF000 -> i2.i_item_sk
+ row-size=38B cardinality=18.00K
+---- PARALLELPLANS
+Max Per-Host Resource Reservation: Memory=157.12MB Threads=28
+Per-Host Resource Estimates: Memory=368MB
+PLAN-ROOT SINK
+|
+41:MERGING-EXCHANGE [UNPARTITIONED]
+| order by: rnk ASC
+| limit: 100
+|
+23:TOP-N [LIMIT=100]
+| order by: rnk ASC
+| row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: i2.i_item_sk = ss_item_sk
+| row-size=148B cardinality=1.80K
+|
+|--JOIN BUILD
+| | join-table-id=00 plan-id=01 cohort-id=01
+| | build expressions: ss_item_sk
+| | runtime filters: RF000 <- ss_item_sk
+| |
+| 40:EXCHANGE [BROADCAST]
+| |
+| 21:HASH JOIN [INNER JOIN, BROADCAST]
+| | hash predicates: rank() = rank()
+| | row-size=110B cardinality=1.80K
+| |
+| |--JOIN BUILD
+| | | join-table-id=01 plan-id=02 cohort-id=02
+| | | build expressions: rank()
+| | |
+| | 39:EXCHANGE [UNPARTITIONED]
+| | |
+| | 20:HASH JOIN [INNER JOIN, BROADCAST]
+| | | hash predicates: ss_item_sk = i1.i_item_sk
+| | | row-size=74B cardinality=1.80K
+| | |
+| | |--JOIN BUILD
+| | | | join-table-id=02 plan-id=03 cohort-id=03
+| | | | build expressions: i1.i_item_sk
+| | | |
+| | | 38:EXCHANGE [UNPARTITIONED]
+| | | |
+| | | 18:SCAN HDFS [tpcds.item i1]
+| | | HDFS partitions=1/1 files=1 size=4.82MB
+| | | row-size=38B cardinality=18.00K
+| | |
+| | 08:SELECT
+| | | predicates: rank() < 11
+| | | row-size=36B cardinality=1.80K
+| | |
+| | 07:ANALYTIC
+| | | functions: rank()
+| | | order by: avg(ss_net_profit) ASC
+| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | | row-size=36B cardinality=17.98K
+| | |
+| | 37:MERGING-EXCHANGE [UNPARTITIONED]
+| | | order by: avg(ss_net_profit) ASC
+| | |
+| | 06:SORT
+| | | order by: avg(ss_net_profit) ASC
+| | | row-size=28B cardinality=17.98K
+| | |
+| | 05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+| | | join table id: 03
+| | | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | | row-size=28B cardinality=17.98K
+| | |
+| | |--JOIN BUILD
+| | | | join-table-id=03 plan-id=04 cohort-id=03
+| | | |
+| | | 36:EXCHANGE [BROADCAST]
+| | | |
+| | | 04:CARDINALITY CHECK
+| | | | limit: 1
+| | | | row-size=12B cardinality=1
+| | | |
+| | | 35:EXCHANGE [UNPARTITIONED]
+| | | | limit: 2
+| | | |
+| | | 34:AGGREGATE [FINALIZE]
+| | | | output: avg:merge(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | limit: 2
+| | | | row-size=12B cardinality=2
+| | | |
+| | | 33:EXCHANGE [HASH(ss_store_sk)]
+| | | |
+| | | 03:AGGREGATE [STREAMING]
+| | | | output: avg(ss_net_profit)
+| | | | group by: ss_store_sk
+| | | | row-size=12B cardinality=6
+| | | |
+| | | 02:SCAN HDFS [tpcds.store_sales]
+| | | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | | row-size=12B cardinality=53.06K
+| | |
+| | 32:AGGREGATE [FINALIZE]
+| | | output: avg:merge(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 31:EXCHANGE [HASH(ss_item_sk)]
+| | |
+| | 01:AGGREGATE [STREAMING]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_item_sk
+| | | row-size=16B cardinality=17.98K
+| | |
+| | 00:SCAN HDFS [tpcds.store_sales ss1]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_store_sk = 4
+| | row-size=16B cardinality=480.07K
+| |
+| 17:SELECT
+| | predicates: rank() < 11
+| | row-size=36B cardinality=1.80K
+| |
+| 16:ANALYTIC
+| | functions: rank()
+| | order by: avg(ss_net_profit) DESC
+| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| | row-size=36B cardinality=17.98K
+| |
+| 30:MERGING-EXCHANGE [UNPARTITIONED]
+| | order by: avg(ss_net_profit) DESC
+| |
+| 15:SORT
+| | order by: avg(ss_net_profit) DESC
+| | row-size=28B cardinality=17.98K
+| |
+| 14:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+| | join table id: 04
+| | predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+| | row-size=28B cardinality=17.98K
+| |
+| |--JOIN BUILD
+| | | join-table-id=04 plan-id=05 cohort-id=02
+| | |
+| | 29:EXCHANGE [BROADCAST]
+| | |
+| | 13:CARDINALITY CHECK
+| | | limit: 1
+| | | row-size=12B cardinality=1
+| | |
+| | 28:EXCHANGE [UNPARTITIONED]
+| | | limit: 2
+| | |
+| | 27:AGGREGATE [FINALIZE]
+| | | output: avg:merge(ss_net_profit)
+| | | group by: ss_store_sk
+| | | limit: 2
+| | | row-size=12B cardinality=2
+| | |
+| | 26:EXCHANGE [HASH(ss_store_sk)]
+| | |
+| | 12:AGGREGATE [STREAMING]
+| | | output: avg(ss_net_profit)
+| | | group by: ss_store_sk
+| | | row-size=12B cardinality=6
+| | |
+| | 11:SCAN HDFS [tpcds.store_sales]
+| | HDFS partitions=1824/1824 files=1824 size=346.60MB
+| | predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+| | row-size=12B cardinality=53.06K
+| |
+| 25:AGGREGATE [FINALIZE]
+| | output: avg:merge(ss_net_profit)
+| | group by: ss_item_sk
+| | row-size=16B cardinality=17.98K
+| |
+| 24:EXCHANGE [HASH(ss_item_sk)]
+| |
+| 10:AGGREGATE [STREAMING]
+| | output: avg(ss_net_profit)
+| | group by: ss_item_sk
+| | row-size=16B cardinality=17.98K
+| |
+| 09:SCAN HDFS [tpcds.store_sales ss1]
+| HDFS partitions=1824/1824 files=1824 size=346.60MB
+| predicates: ss_store_sk = 4
+| row-size=16B cardinality=480.07K
+|
+19:SCAN HDFS [tpcds.item i2]
+ HDFS partitions=1/1 files=1 size=4.82MB
+ runtime filters: RF000 -> i2.i_item_sk
+ row-size=38B cardinality=18.00K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index 000c8cf..8565f2a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -1212,3 +1212,108 @@ FROM alltypessmall WHERE id < 2
---- TYPES
INT, BOOLEAN, BOOLEAN, TINYINT
====
+---- QUERY
+# Having with subquery
+select id, sum(int_col+smallint_col)
+ from alltypestiny group by id
+ having sum(int_col+smallint_col) in
+ (select sum(int_col) from alltypessmall group by id)
+order by id
+---- RESULTS
+0,0
+1,2
+2,0
+3,2
+4,0
+5,2
+6,0
+7,2
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with not in subquery with nested having
+select id, sum(int_col+smallint_col)
+ from alltypestiny group by id
+ having sum(int_col+smallint_col) not in
+ (select sum(int_col) from alltypessmall group by id having
+ sum(int_col) < 2)
+order by id
+---- RESULTS
+1,2
+3,2
+5,2
+7,2
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with subquery with join and subquery in where
+select id, sum(int_col)
+ from alltypestiny
+ where id in (select id from alltypestiny where id % 2 = 0)
+ group by id having sum(int_col) in
+ (select sum(a.int_col) from alltypessmall a, alltypestiny b where
+ a.id = b.id and b.int_col != 1 group by a.id)
+order by id
+---- RESULTS
+0,0
+2,0
+4,0
+6,0
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with subquery with subquery in select
+select id, sum(int_col)
+ from alltypestiny
+ group by id having sum(int_col) >
+ (select min(int_col)+(select min(int_col) from alltypessmall)
+ from alltypestiny)
+order by id
+---- RESULTS
+1,1
+3,1
+5,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with exists subquery
+select id, sum(int_col+smallint_col)
+ from alltypestiny group by id
+ having not exists
+ (select sum(int_col) from alltypessmall group by id)
+order by id
+---- RESULTS
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Subquery in having and where clauses
+select id, sum(int_col) from alltypestiny where
+ id > (select min(id)+1 from alltypessmall) group by id
+ having sum(int_col) in (select sum(int_col) from alltypessmall group by id)
+ order by id
+---- RESULTS
+2,0
+3,1
+4,0
+5,1
+6,0
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having subquery with a runtime scalar error
+select id, sum(int_col) from alltypestiny where
+ id > (select min(id)+1 from alltypessmall) group by id
+ having sum(int_col) >= (select sum(int_col) from alltypessmall group by id)
+ order by id
+---- RESULTS
+---- CATCH
+Subquery must not return more than one row:
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q23-1.test b/testdata/workloads/tpcds/queries/tpcds-q23-1.test
new file mode 100644
index 0000000..4a6abf5
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q23-1.test
@@ -0,0 +1,56 @@
+====
+---- QUERY: TPCDS-Q23-1
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (2000,2000+1,2000+2,2000+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (2000,2000+1,2000+2,2000+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select
+ *
+from
+ max_store_sales))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 2000
+ and d_moy = 2
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 2000
+ and d_moy = 2
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100;
+---- RESULTS
+17030.91
+---- TYPES
+DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q23-2.test b/testdata/workloads/tpcds/queries/tpcds-q23-2.test
new file mode 100644
index 0000000..ec093d9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q23-2.test
@@ -0,0 +1,65 @@
+====
+---- QUERY: TPCDS-Q23-2
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (2000,2000 + 1,2000 + 2,2000 + 3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (2000,2000+1,2000+2,2000+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select
+ *
+ from max_store_sales))
+ select c_last_name,c_first_name,sales
+ from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales
+ from catalog_sales
+ ,customer
+ ,date_dim
+ where d_year = 2000
+ and d_moy = 2
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ and cs_bill_customer_sk = c_customer_sk
+ group by c_last_name,c_first_name
+ union all
+ select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales
+ from web_sales
+ ,customer
+ ,date_dim
+ where d_year = 2000
+ and d_moy = 2
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ and ws_bill_customer_sk = c_customer_sk
+ group by c_last_name,c_first_name) y
+ order by c_last_name,c_first_name,sales
+ limit 100;
+---- RESULTS
+'Brown','Monika',6031.52
+'Collins','Gordon',727.57
+'Green','Jesse',9672.96
+'NULL','Robert',598.86
+---- TYPES
+STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q24-1.test b/testdata/workloads/tpcds/queries/tpcds-q24-1.test
new file mode 100644
index 0000000..25d0dd5
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q24-1.test
@@ -0,0 +1,58 @@
+====
+---- QUERY: TPCDS-Q24-1
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_net_paid) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_current_addr_sk = ca_address_sk
+ and c_birth_country <> upper(ca_country)
+ and s_zip = ca_zip
+and s_market_id = 8
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'peach'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+order by c_last_name, c_first_name, s_store_name;
+---- RESULTS
+'Martins','Cara','bar',241.96
+'Smallwood','Rhonda','bar',3089.28
+'Terry','Sandra','bar',509.42
+---- TYPES
+STRING, STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q24-2.test b/testdata/workloads/tpcds/queries/tpcds-q24-2.test
new file mode 100644
index 0000000..1ee1fc9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q24-2.test
@@ -0,0 +1,59 @@
+====
+---- QUERY: TPCDS-Q24-2
+with ssales as
+(select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size
+ ,sum(ss_net_paid) netpaid
+from store_sales
+ ,store_returns
+ ,store
+ ,item
+ ,customer
+ ,customer_address
+where ss_ticket_number = sr_ticket_number
+ and ss_item_sk = sr_item_sk
+ and ss_customer_sk = c_customer_sk
+ and ss_item_sk = i_item_sk
+ and ss_store_sk = s_store_sk
+ and c_current_addr_sk = ca_address_sk
+ and c_birth_country <> upper(ca_country)
+ and s_zip = ca_zip
+ and s_market_id = 8
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,ca_state
+ ,s_state
+ ,i_color
+ ,i_current_price
+ ,i_manager_id
+ ,i_units
+ ,i_size)
+select c_last_name
+ ,c_first_name
+ ,s_store_name
+ ,sum(netpaid) paid
+from ssales
+where i_color = 'saddle'
+group by c_last_name
+ ,c_first_name
+ ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+ from ssales)
+order by c_last_name, c_first_name, s_store_name;
+---- RESULTS
+'Hamlin','Heather','able',149.65
+'Martin','Harold','bar',5834.88
+'Nall','Mike','able',999.70
+'Southern','Jeannie','bar',446.31
+---- TYPES
+STRING, STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q44.test b/testdata/workloads/tpcds/queries/tpcds-q44.test
new file mode 100644
index 0000000..c11036b
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q44.test
@@ -0,0 +1,64 @@
+====
+---- QUERY: TPCDS-Q44
+SELECT asceding.rnk,
+ i1.i_product_name best_performing,
+ i2.i_product_name worst_performing
+FROM
+ (SELECT *
+ FROM
+ (SELECT item_sk,
+ rank() OVER (
+ ORDER BY rank_col ASC) rnk
+ FROM
+ (SELECT ss_item_sk item_sk,
+ avg(ss_net_profit) rank_col
+ FROM store_sales ss1
+ WHERE ss_store_sk = 4
+ GROUP BY ss_item_sk
+ HAVING avg(ss_net_profit) > 0.9*
+ (SELECT avg(ss_net_profit) rank_col
+ FROM store_sales
+ WHERE ss_store_sk = 4
+ AND ss_addr_sk IS NULL
+ GROUP BY ss_store_sk))V1)V11
+ WHERE rnk < 11) asceding,
+ (SELECT *
+ FROM
+ (SELECT item_sk,
+ rank() OVER (
+ ORDER BY rank_col DESC) rnk
+ FROM
+ (SELECT ss_item_sk item_sk,
+ avg(ss_net_profit) rank_col
+ FROM store_sales ss1
+ WHERE ss_store_sk = 4
+ GROUP BY ss_item_sk
+ HAVING avg(ss_net_profit) > 0.9*
+ (SELECT avg(ss_net_profit) rank_col
+ FROM store_sales
+ WHERE ss_store_sk = 4
+ AND ss_addr_sk IS NULL
+ GROUP BY ss_store_sk))V2)V21
+ WHERE rnk < 11) descending,
+ item i1,
+ item i2
+WHERE asceding.rnk = descending.rnk
+ AND i1.i_item_sk=asceding.item_sk
+ AND i2.i_item_sk=descending.item_sk
+ORDER BY asceding.rnk
+LIMIT 100;
+---- RESULTS
+1,'oughtantiprin st','callyeingbarcallyought'
+2,'barcallyprioughtought','bareseationcallyought'
+3,'ableeingantiable','barn stcallycally'
+4,'n stesebarn st','eingoughtn stn st'
+5,'antioughtationbarought','callycallybarantiought'
+6,'callyeseationantiought','NULL'
+7,'priableeseableought','eseableablepriought'
+8,'ableoughtableeseought','ationoughtantianti'
+9,'esebarableeseought','callyn stantieseought'
+10,'eingoughtn station','barcallyableought'
+---- TYPES
+BIGINT, STRING, STRING
+====
+
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 41fa1a7..b8a8504 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -109,6 +109,18 @@ class TestTpcdsQuery(ImpalaTestSuite):
def test_tpcds_q21(self, vector):
self.run_test_case(self.get_workload() + '-q21', vector)
+ def test_tpcds_q23_1(self, vector):
+ self.run_test_case(self.get_workload() + '-q23-1', vector)
+
+ def test_tpcds_q23_2(self, vector):
+ self.run_test_case(self.get_workload() + '-q23-2', vector)
+
+ def test_tpcds_q24_1(self, vector):
+ self.run_test_case(self.get_workload() + '-q24-1', vector)
+
+ def test_tpcds_q24_2(self, vector):
+ self.run_test_case(self.get_workload() + '-q24-2', vector)
+
def test_tpcds_q25(self, vector):
self.run_test_case(self.get_workload() + '-q25', vector)
@@ -151,6 +163,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
def test_tpcds_q43(self, vector):
self.run_test_case(self.get_workload() + '-q43', vector)
+ def test_tpcds_q44(self, vector):
+ self.run_test_case(self.get_workload() + '-q44', vector)
+
def test_tpcds_q46(self, vector):
self.run_test_case(self.get_workload() + '-q46', vector)