You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/06 15:48:29 UTC

[impala] branch master updated (49af396 -> 2dca556)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 49af396  IMPALA-9916: Fix DiskIoMgrTest.MetricsOfWriteIoError failed due to data race in TSAN build
     new 388ad55  IMPALA-8954: Uncorrelated scalar subqueries in the select list
     new 2dca556  IMPALA-9784, IMPALA-9905: Uncorrelated subqueries in HAVING.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/impala/analysis/SelectList.java     |    5 +
 .../org/apache/impala/analysis/SelectStmt.java     |   71 +-
 .../org/apache/impala/analysis/StmtRewriter.java   |  304 ++++-
 .../impala/analysis/AnalyzeSubqueriesTest.java     |   48 +-
 testdata/datasets/tpcds/tpcds_schema_template.sql  |    2 +-
 .../queries/PlannerTest/subquery-rewrite.test      |  205 +++
 .../queries/PlannerTest/tpcds-all.test             | 1443 ++++++++++++++++++++
 .../queries/QueryTest/subquery.test                |  278 ++++
 testdata/workloads/tpcds/queries/count.test        |    9 +-
 .../tpcds/queries/tpcds-decimal_v2-q9.test         |   54 +
 testdata/workloads/tpcds/queries/tpcds-q23-1.test  |   56 +
 testdata/workloads/tpcds/queries/tpcds-q23-2.test  |   65 +
 testdata/workloads/tpcds/queries/tpcds-q24-1.test  |   58 +
 testdata/workloads/tpcds/queries/tpcds-q24-2.test  |   59 +
 testdata/workloads/tpcds/queries/tpcds-q44.test    |   64 +
 testdata/workloads/tpcds/queries/tpcds-q9.test     |   54 +
 tests/query_test/test_tpcds_queries.py             |   21 +
 tests/util/parse_util.py                           |    2 +-
 18 files changed, 2761 insertions(+), 37 deletions(-)
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q23-1.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q23-2.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q24-1.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q24-2.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q44.test
 create mode 100644 testdata/workloads/tpcds/queries/tpcds-q9.test


[impala] 02/02: IMPALA-9784, IMPALA-9905: Uncorrelated subqueries in HAVING.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2dca55695ef3c208ece543ad36bece2a985cf8da
Author: Shant Hovsepian <sh...@cloudera.com>
AuthorDate: Tue Jun 2 09:28:09 2020 -0400

    IMPALA-9784, IMPALA-9905: Uncorrelated subqueries in HAVING.
    
    Support rewriting subqueries in the HAVING clause by nesting the
    aggregation query and pulling up the subquery predicates into the outer
    WHERE clause.
    
    Testing:
      * New analyzer tests
      * New functional subquery tests
      * Added Q23, Q24 and Q44 to the tpcds workload
      * Ran subquery rewrite tests
    
    Change-Id: I124a58a09a1a47e1222a22d84b54fe7d07844461
    Reviewed-on: http://gerrit.cloudera.org:8080/16052
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectStmt.java     |  34 +-
 .../org/apache/impala/analysis/StmtRewriter.java   | 135 +++++-
 .../impala/analysis/AnalyzeSubqueriesTest.java     |  25 +-
 .../queries/PlannerTest/subquery-rewrite.test      |  71 +++
 .../queries/PlannerTest/tpcds-all.test             | 527 +++++++++++++++++++++
 .../queries/QueryTest/subquery.test                | 105 ++++
 testdata/workloads/tpcds/queries/tpcds-q23-1.test  |  56 +++
 testdata/workloads/tpcds/queries/tpcds-q23-2.test  |  65 +++
 testdata/workloads/tpcds/queries/tpcds-q24-1.test  |  58 +++
 testdata/workloads/tpcds/queries/tpcds-q24-2.test  |  59 +++
 testdata/workloads/tpcds/queries/tpcds-q44.test    |  64 +++
 tests/query_test/test_tpcds_queries.py             |  15 +
 12 files changed, 1192 insertions(+), 22 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 5618694..ab4a425 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -621,10 +621,12 @@ public class SelectStmt extends QueryStmt {
       // Analyze the HAVING clause first so we can check if it contains aggregates.
       // We need to analyze/register it even if we are not computing aggregates.
       if (havingClause_ == null) return;
-      // can't contain subqueries
-      if (havingClause_.contains(Predicates.instanceOf(Subquery.class))) {
+      List<Expr> subqueries = new ArrayList<>();
+      havingClause_.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
+      if (subqueries.size() > 1) {
         throw new AnalysisException(
-            "Subqueries are not supported in the HAVING clause.");
+            "Multiple subqueries are not supported in expression: "
+            + havingClause_.toSql());
       }
       // Resolve (top-level) aliases and analyzes
       havingPred_ = resolveReferenceExpr(havingClause_, "HAVING", analyzer_,
@@ -818,10 +820,6 @@ public class SelectStmt extends QueryStmt {
         LOG.trace("post-agg selectListExprs: " + Expr.debugString(resultExprs_));
       }
       if (havingPred_ != null) {
-        // Make sure the predicate in the HAVING clause does not contain a
-        // subquery.
-        Preconditions.checkState(!havingPred_.contains(
-            Predicates.instanceOf(Subquery.class)));
         havingPred_ = havingPred_.substitute(combinedSmap, analyzer_, false);
         analyzer_.registerConjuncts(havingPred_, true);
         if (LOG.isTraceEnabled()) {
@@ -1069,16 +1067,16 @@ public class SelectStmt extends QueryStmt {
     Preconditions.checkState(isAnalyzed());
     selectList_.rewriteExprs(rewriter, analyzer_);
     for (TableRef ref: fromClause_.getTableRefs()) ref.rewriteExprs(rewriter, analyzer_);
+    List<Subquery> subqueryExprs = new ArrayList<>();
     if (whereClause_ != null) {
       whereClause_ = rewriter.rewrite(whereClause_, analyzer_);
-      // Also rewrite exprs in the statements of subqueries.
-      List<Subquery> subqueryExprs = new ArrayList<>();
       whereClause_.collect(Subquery.class, subqueryExprs);
-      for (Subquery s: subqueryExprs) s.getStatement().rewriteExprs(rewriter);
     }
     if (havingClause_ != null) {
       havingClause_ = rewriteCheckOrdinalResult(rewriter, havingClause_);
+      havingClause_.collect(Subquery.class, subqueryExprs);
     }
+    for (Subquery s : subqueryExprs) s.getStatement().rewriteExprs(rewriter);
     if (groupingExprs_ != null) {
       for (int i = 0; i < groupingExprs_.size(); ++i) {
         groupingExprs_.set(i, rewriteCheckOrdinalResult(
@@ -1217,6 +1215,9 @@ public class SelectStmt extends QueryStmt {
       if (whereClause_ != null) {
         whereClause_.collect(Subquery.class, subqueries);
       }
+      if (havingClause_ != null) {
+        havingClause_.collect(Subquery.class, subqueries);
+      }
       for (SelectListItem item : selectList_.getItems()) {
         if (item.isStar()) continue;
         item.getExpr().collect(Subquery.class, subqueries);
@@ -1253,12 +1254,15 @@ public class SelectStmt extends QueryStmt {
         whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
       }
     }
-    List<Subquery> selectListSubQueries = Lists.newArrayList();
+    List<Subquery> subqueries = Lists.newArrayList();
     for (SelectListItem item : selectList_.getItems()) {
       if (item.isStar()) continue;
-      item.getExpr().collect(Subquery.class, selectListSubQueries);
+      item.getExpr().collect(Subquery.class, subqueries);
+    }
+    if (havingClause_ != null) {
+      havingClause_.collect(Subquery.class, subqueries);
     }
-    for (Subquery sq : selectListSubQueries) {
+    for (Subquery sq : subqueries) {
       sq.getStatement().collectInlineViews(inlineViews);
     }
   }
@@ -1291,6 +1295,9 @@ public class SelectStmt extends QueryStmt {
    *
    * This function may produce false negatives because the cardinality of the
    * result set also depends on the data a stmt is processing.
+   *
+   * TODO: IMPALA-1285 to cover more cases that can be determinded at plan time such has a
+   * group by clause where all grouping expressions are bound to constant expressions.
    */
   public boolean returnsSingleRow() {
     Preconditions.checkState(isAnalyzed());
@@ -1302,6 +1309,7 @@ public class SelectStmt extends QueryStmt {
     if (hasMultiAggInfo() && !hasGroupByClause() && !selectList_.isDistinct()) {
       return true;
     }
+
     // Select from an inline view that returns at most one row.
     List<TableRef> tableRefs = fromClause_.getTableRefs();
     if (tableRefs.size() == 1 && tableRefs.get(0) instanceof InlineViewRef) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index c9fb004..f98afa4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -307,6 +307,7 @@ public class StmtRewriter {
       boolean updateSelectList = false;
       SelectStmt subqueryStmt = (SelectStmt) expr.getSubquery().getStatement();
       boolean isScalarSubquery = expr.getSubquery().isScalarSubquery();
+      boolean isScalarColumn = expr.getSubquery().returnsScalarColumn();
       boolean isRuntimeScalar = subqueryStmt.isRuntimeScalar();
       // Create a new inline view from the subquery stmt. The inline view will be added
       // to the stmt's table refs later. Explicitly set the inline view's column labels
@@ -460,11 +461,17 @@ public class StmtRewriter {
       }
 
       if (!hasEqJoinPred && !inlineView.isCorrelated()) {
-        // TODO: Remove this when independent subquery evaluation is implemented.
         // TODO: Requires support for non-equi joins.
+        // TODO: Remove this when independent subquery evaluation is implemented.
+        // TODO: IMPALA-5100 to cover all cases, we do let through runtime scalars with
+        // group by clauses to allow for subqueries where we haven't implemented plan time
+        // expression evaluation to ensure only a single row is returned. This may expose
+        // runtime errors in the presence of multiple runtime scalar subqueries until we
+        // implement independent evaluation.
         boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
-        if ((!isScalarSubquery && !isRuntimeScalar) ||
-            (hasGroupBy && !stmt.selectList_.isDistinct())) {
+        if ((!isScalarSubquery && !isRuntimeScalar)
+            || (hasGroupBy && !stmt.selectList_.isDistinct() && !isScalarColumn
+                   && !isRuntimeScalar)) {
           throw new AnalysisException(
               "Unsupported predicate with subquery: " + expr.toSql());
         }
@@ -941,6 +948,15 @@ public class StmtRewriter {
     @Override
     protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
         throws AnalysisException {
+      // Rewrite all the subqueries in the HAVING clause.
+      if (stmt.hasHavingClause() && stmt.havingClause_.getSubquery() != null) {
+        if (hasSubqueryInDisjunction(stmt.havingClause_)) {
+          throw new AnalysisException("Subqueries in OR predicates are not supported: "
+              + stmt.havingClause_.toSql());
+        }
+        rewriteHavingClauseSubqueries(stmt, analyzer);
+      }
+
       // Rewrite all the subqueries in the WHERE clause.
       if (stmt.hasWhereClause()) {
         // Push negation to leaf operands.
@@ -1261,5 +1277,118 @@ public class StmtRewriter {
         replaceUnqualifiedStarItems(stmt, numTableRefs);
       }
     }
+
+    /**
+     * Rewrite subqueries of a stmt's HAVING clause. The stmt is rewritten into two
+     * separate statements; an inner statement which performs all sql operations that
+     * evaluated before the HAVING clause and an outer statement which projects the inner
+     * stmt's results with the HAVING clause rewritten as a WHERE clause and also performs
+     * the remainder of the sql operations (ORDER BY, LIMIT). We then rely on the WHERE
+     * clause rewrite rule to handle the subqueries that were originally in the HAVING
+     * clause.
+     *
+     * SELECT a, sum(b) FROM T1 GROUP BY a HAVING count(b) > (SELECT max(c) FROM T2)
+     * ORDER BY 2 LIMIT 10
+     *
+     * Inner Stmt becomes:
+     *
+     * SELECT a, sum(b), count(b) FROM T1 GROUP BY a
+     *
+     * Notice we augment the select list with any aggregates in the HAVING clause that are
+     * missing in the original select list.
+     *
+     * Outer Stmt becomes:
+     *
+     * SELECT $a$1.$c$1 a, $a$1.$c$2 sum(b) FROM
+     * (SELECT a, sum(b), count(b) FROM T1 GROUP BY a) $a$1 ($c$1, $c$2, $c$3) WHERE
+     * $a$1.$c$3 > (SELECT max(c) FROM T2) ORDER BY 2 LIMIT 10
+     *
+     * The query should would then be rewritten by the caller using
+     * rewriteWhereClauseSubqueries()
+     *
+     */
+    private void rewriteHavingClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
+        throws AnalysisException {
+      // Generate the inner query from the current statement pulling up the order by,
+      // limit, and any aggregates in the having clause that aren't projected in the
+      // select list.
+      final SelectStmt innerStmt = stmt.clone();
+      final List<FunctionCallExpr> aggExprs = stmt.hasMultiAggInfo() ?
+          stmt.getMultiAggInfo().getAggExprs() :
+          new ArrayList<>();
+      for (FunctionCallExpr agg : aggExprs) {
+        boolean contains = false;
+        for (SelectListItem selectListItem : stmt.getSelectList().getItems()) {
+          contains = selectListItem.getExpr().equals(agg);
+          if (contains) {
+            break;
+          }
+        }
+        if (!contains) {
+          innerStmt.selectList_.getItems().add(
+              new SelectListItem(agg.clone().reset(), null));
+        }
+      }
+
+      // Remove clauses that will go into the outer statement.
+      innerStmt.havingClause_ = null;
+      innerStmt.limitElement_ = new LimitElement(null, null);
+      if (innerStmt.hasOrderByClause()) {
+        innerStmt.orderByElements_ = null;
+      }
+      innerStmt.reset();
+
+      // Used in the substitution map, as post analyze() exprs won't match.
+      final List<SelectListItem> preAnalyzeSelectList =
+          innerStmt.getSelectList().clone().getItems();
+      final ExprSubstitutionMap smap = new ExprSubstitutionMap();
+      List<String> colLabels =
+          Lists.newArrayListWithCapacity(innerStmt.getSelectList().getItems().size());
+
+      for (int i = 0; i < innerStmt.getSelectList().getItems().size(); ++i) {
+        String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
+        colLabels.add(colAlias);
+      }
+
+      final String innerAlias = stmt.getTableAliasGenerator().getNextAlias();
+      final InlineViewRef innerView = new InlineViewRef(innerAlias, innerStmt, colLabels);
+      innerView.analyze(analyzer);
+
+      // Rewrite the new inline view.
+      rewriteSelectStatement(
+          (SelectStmt) innerView.getViewStmt(), innerView.getViewStmt().getAnalyzer());
+
+      for (int i = 0; i < preAnalyzeSelectList.size(); ++i) {
+        final Expr slot = new SlotRef(Lists.newArrayList(innerAlias, colLabels.get(i)));
+        slot.analyze(analyzer);
+        smap.put(preAnalyzeSelectList.get(i).getExpr(), slot);
+      }
+
+      // Create the new outer statement's select list.
+      final List<SelectListItem> outerSelectList = new ArrayList<>();
+      for (int i = 0; i < stmt.getSelectList().getItems().size(); ++i) {
+        // Project the original select list items and labels
+        final SelectListItem si = new SelectListItem(
+            stmt.getSelectList().getItems().get(i).getExpr().clone().reset().substitute(
+                smap, analyzer, false),
+            stmt.getColLabels().get(i));
+        si.getExpr().analyze(analyzer);
+        outerSelectList.add(si);
+      }
+
+      // Clear out the old stmt properties.
+      stmt.whereClause_ = stmt.havingClause_.reset().substitute(smap, analyzer, false);
+      stmt.whereClause_.analyze(analyzer);
+      stmt.havingClause_ = null;
+      stmt.groupingExprs_ = null;
+      stmt.selectList_.getItems().clear();
+      stmt.selectList_.getItems().addAll(outerSelectList);
+      stmt.fromClause_.getTableRefs().clear();
+      stmt.fromClause_.add(innerView);
+
+      stmt.analyze(analyzer);
+      if (LOG.isTraceEnabled())
+        LOG.trace("Rewritten HAVING Clause SQL: " + stmt.toSql(REWRITTEN));
+    }
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index a845718..7412709 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -1355,12 +1355,16 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
   @Test
   public void testIllegalSubquery() throws AnalysisException {
     // Predicate with a child subquery in the HAVING clause
-    AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
-        "id having count(*) > (select count(*) from functional.alltypesagg)",
-        "Subqueries are not supported in the HAVING clause.");
-    AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
-        "id having (select count(*) from functional.alltypesagg) > 10",
-        "Subqueries are not supported in the HAVING clause.");
+    AnalysisError("select id, count(*) from functional.alltypestiny t group by id " +
+        "having count(*) > (select count(*) from functional.alltypesagg where id = t.id)",
+        "Unsupported correlated subquery: SELECT count(*) FROM functional.alltypesagg " +
+        "WHERE id = t.id");
+    AnalysisError("select id, count(*) from functional.alltypestiny t group by id "
+            + "having (select count(*) from functional.alltypesagg) > 10 and count(*) < "
+            + "(select count(*) from functional.alltypesagg)",
+        "Multiple subqueries are not supported in expression: (SELECT count(*) FROM "
+            + "functional.alltypesagg) > 10 AND count(*) < (SELECT count(*) FROM "
+            + "functional.alltypesagg");
 
     // Subquery in the select list
     AnalysisError("select id, (select int_col from functional.alltypestiny) "
@@ -1498,4 +1502,13 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
     AnalyzesOk("select 1 from functional.alltypes where " +
         "coalesce(null, (select bool_col from functional.alltypes where id = 0))");
   }
+
+  @Test
+  public void testHavingSubqueries() throws AnalysisException {
+    // Predicate with a child subquery in the HAVING clause
+    AnalyzesOk("select id, count(*) from functional.alltypestiny t group by "
+        + "id having count(*) > (select count(*) from functional.alltypesagg)");
+    AnalyzesOk("select id, count(*) from functional.alltypestiny t group by "
+        + "id having (select count(*) from functional.alltypesagg) > 10");
+  }
 }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index bcffd73..cb44adc 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -3429,4 +3429,75 @@ PLAN-ROOT SINK
    HDFS partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> alltypesagg.id
    row-size=39B cardinality=11.00K
+=======
+# IMPALA-9905 Cardinality Check for subqueries with grouping expressions and
+# scalar return types
+select bigint_col from functional.alltypes where int_col >=
+  3 * (select max(int_col)
+   from functional.alltypestiny where id = 1 group by id
+  )
+---- PLAN
+PLAN-ROOT SINK
+|
+04:NESTED LOOP JOIN [INNER JOIN]
+|  predicates: int_col >= 3 * max(int_col)
+|  row-size=20B cardinality=7.30K
+|
+|--03:CARDINALITY CHECK
+|  |  limit: 1
+|  |  row-size=8B cardinality=1
+|  |
+|  02:AGGREGATE [FINALIZE]
+|  |  output: max(int_col)
+|  |  group by: id
+|  |  limit: 2
+|  |  row-size=8B cardinality=1
+|  |
+|  01:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: id = 1
+|     row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=12B cardinality=7.30K
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+09:EXCHANGE [UNPARTITIONED]
+|
+04:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+|  predicates: int_col >= 3 * max(int_col)
+|  row-size=20B cardinality=7.30K
+|
+|--08:EXCHANGE [BROADCAST]
+|  |
+|  03:CARDINALITY CHECK
+|  |  limit: 1
+|  |  row-size=8B cardinality=1
+|  |
+|  07:EXCHANGE [UNPARTITIONED]
+|  |  limit: 2
+|  |
+|  06:AGGREGATE [FINALIZE]
+|  |  output: max:merge(int_col)
+|  |  group by: id
+|  |  limit: 2
+|  |  row-size=8B cardinality=1
+|  |
+|  05:EXCHANGE [HASH(id)]
+|  |
+|  02:AGGREGATE [STREAMING]
+|  |  output: max(int_col)
+|  |  group by: id
+|  |  row-size=8B cardinality=1
+|  |
+|  01:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: id = 1
+|     row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=12B cardinality=7.30K
 ====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 9057a3f..d7d2953 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -9704,3 +9704,530 @@ PLAN-ROOT SINK
    predicates: ss_quantity <= 20, ss_quantity >= 1
    row-size=4B cardinality=288.04K
 ====
+# TPCDS-Q44
+SELECT asceding.rnk,
+       i1.i_product_name best_performing,
+       i2.i_product_name worst_performing
+FROM
+  (SELECT *
+   FROM
+     (SELECT item_sk,
+             rank() OVER (
+                          ORDER BY rank_col ASC) rnk
+      FROM
+        (SELECT ss_item_sk item_sk,
+                avg(ss_net_profit) rank_col
+         FROM store_sales ss1
+         WHERE ss_store_sk = 4
+         GROUP BY ss_item_sk
+         HAVING avg(ss_net_profit) > 0.9*
+           (SELECT avg(ss_net_profit) rank_col
+            FROM store_sales
+            WHERE ss_store_sk = 4
+              AND ss_addr_sk IS NULL
+            GROUP BY ss_store_sk))V1)V11
+   WHERE rnk < 11) asceding,
+  (SELECT *
+   FROM
+     (SELECT item_sk,
+             rank() OVER (
+                          ORDER BY rank_col DESC) rnk
+      FROM
+        (SELECT ss_item_sk item_sk,
+                avg(ss_net_profit) rank_col
+         FROM store_sales ss1
+         WHERE ss_store_sk = 4
+         GROUP BY ss_item_sk
+         HAVING avg(ss_net_profit) > 0.9*
+           (SELECT avg(ss_net_profit) rank_col
+            FROM store_sales
+            WHERE ss_store_sk = 4
+              AND ss_addr_sk IS NULL
+            GROUP BY ss_store_sk))V2)V21
+   WHERE rnk < 11) descending,
+     item i1,
+     item i2
+WHERE asceding.rnk = descending.rnk
+  AND i1.i_item_sk=asceding.item_sk
+  AND i2.i_item_sk=descending.item_sk
+ORDER BY asceding.rnk
+LIMIT 100
+---- PLAN
+Max Per-Host Resource Reservation: Memory=65.75MB Threads=7
+Per-Host Resource Estimates: Memory=626MB
+PLAN-ROOT SINK
+|
+23:TOP-N [LIMIT=100]
+|  order by: rnk ASC
+|  row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN]
+|  hash predicates: i2.i_item_sk = ss_item_sk
+|  runtime filters: RF000 <- ss_item_sk
+|  row-size=148B cardinality=1.80K
+|
+|--21:HASH JOIN [INNER JOIN]
+|  |  hash predicates: rank() = rank()
+|  |  row-size=110B cardinality=1.80K
+|  |
+|  |--17:SELECT
+|  |  |  predicates: rank() < 11
+|  |  |  row-size=36B cardinality=1.80K
+|  |  |
+|  |  16:ANALYTIC
+|  |  |  functions: rank()
+|  |  |  order by: avg(ss_net_profit) DESC
+|  |  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  |  row-size=36B cardinality=17.98K
+|  |  |
+|  |  15:SORT
+|  |  |  order by: avg(ss_net_profit) DESC
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  14:NESTED LOOP JOIN [INNER JOIN]
+|  |  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  |--13:CARDINALITY CHECK
+|  |  |  |  limit: 1
+|  |  |  |  row-size=12B cardinality=1
+|  |  |  |
+|  |  |  12:AGGREGATE [FINALIZE]
+|  |  |  |  output: avg(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  limit: 2
+|  |  |  |  row-size=12B cardinality=2
+|  |  |  |
+|  |  |  11:SCAN HDFS [tpcds.store_sales]
+|  |  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |  |     row-size=12B cardinality=53.06K
+|  |  |
+|  |  10:AGGREGATE [FINALIZE]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  09:SCAN HDFS [tpcds.store_sales ss1]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_store_sk = 4
+|  |     row-size=16B cardinality=480.07K
+|  |
+|  20:HASH JOIN [INNER JOIN]
+|  |  hash predicates: i1.i_item_sk = ss_item_sk
+|  |  runtime filters: RF002 <- ss_item_sk
+|  |  row-size=74B cardinality=1.80K
+|  |
+|  |--08:SELECT
+|  |  |  predicates: rank() < 11
+|  |  |  row-size=36B cardinality=1.80K
+|  |  |
+|  |  07:ANALYTIC
+|  |  |  functions: rank()
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  |  row-size=36B cardinality=17.98K
+|  |  |
+|  |  06:SORT
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  05:NESTED LOOP JOIN [INNER JOIN]
+|  |  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  |--04:CARDINALITY CHECK
+|  |  |  |  limit: 1
+|  |  |  |  row-size=12B cardinality=1
+|  |  |  |
+|  |  |  03:AGGREGATE [FINALIZE]
+|  |  |  |  output: avg(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  limit: 2
+|  |  |  |  row-size=12B cardinality=2
+|  |  |  |
+|  |  |  02:SCAN HDFS [tpcds.store_sales]
+|  |  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |  |     row-size=12B cardinality=53.06K
+|  |  |
+|  |  01:AGGREGATE [FINALIZE]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  00:SCAN HDFS [tpcds.store_sales ss1]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_store_sk = 4
+|  |     row-size=16B cardinality=480.07K
+|  |
+|  18:SCAN HDFS [tpcds.item i1]
+|     HDFS partitions=1/1 files=1 size=4.82MB
+|     runtime filters: RF002 -> i1.i_item_sk
+|     row-size=38B cardinality=18.00K
+|
+19:SCAN HDFS [tpcds.item i2]
+   HDFS partitions=1/1 files=1 size=4.82MB
+   runtime filters: RF000 -> i2.i_item_sk
+   row-size=38B cardinality=18.00K
+---- DISTRIBUTEDPLAN
+Max Per-Host Resource Reservation: Memory=90.56MB Threads=21
+Per-Host Resource Estimates: Memory=685MB
+PLAN-ROOT SINK
+|
+41:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: rnk ASC
+|  limit: 100
+|
+23:TOP-N [LIMIT=100]
+|  order by: rnk ASC
+|  row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN, BROADCAST]
+|  hash predicates: i2.i_item_sk = ss_item_sk
+|  runtime filters: RF000 <- ss_item_sk
+|  row-size=148B cardinality=1.80K
+|
+|--40:EXCHANGE [BROADCAST]
+|  |
+|  21:HASH JOIN [INNER JOIN, BROADCAST]
+|  |  hash predicates: rank() = rank()
+|  |  row-size=110B cardinality=1.80K
+|  |
+|  |--39:EXCHANGE [UNPARTITIONED]
+|  |  |
+|  |  20:HASH JOIN [INNER JOIN, BROADCAST]
+|  |  |  hash predicates: ss_item_sk = i1.i_item_sk
+|  |  |  row-size=74B cardinality=1.80K
+|  |  |
+|  |  |--38:EXCHANGE [UNPARTITIONED]
+|  |  |  |
+|  |  |  18:SCAN HDFS [tpcds.item i1]
+|  |  |     HDFS partitions=1/1 files=1 size=4.82MB
+|  |  |     row-size=38B cardinality=18.00K
+|  |  |
+|  |  08:SELECT
+|  |  |  predicates: rank() < 11
+|  |  |  row-size=36B cardinality=1.80K
+|  |  |
+|  |  07:ANALYTIC
+|  |  |  functions: rank()
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  |  row-size=36B cardinality=17.98K
+|  |  |
+|  |  37:MERGING-EXCHANGE [UNPARTITIONED]
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |
+|  |  06:SORT
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+|  |  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  |--36:EXCHANGE [BROADCAST]
+|  |  |  |
+|  |  |  04:CARDINALITY CHECK
+|  |  |  |  limit: 1
+|  |  |  |  row-size=12B cardinality=1
+|  |  |  |
+|  |  |  35:EXCHANGE [UNPARTITIONED]
+|  |  |  |  limit: 2
+|  |  |  |
+|  |  |  34:AGGREGATE [FINALIZE]
+|  |  |  |  output: avg:merge(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  limit: 2
+|  |  |  |  row-size=12B cardinality=2
+|  |  |  |
+|  |  |  33:EXCHANGE [HASH(ss_store_sk)]
+|  |  |  |
+|  |  |  03:AGGREGATE [STREAMING]
+|  |  |  |  output: avg(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  row-size=12B cardinality=6
+|  |  |  |
+|  |  |  02:SCAN HDFS [tpcds.store_sales]
+|  |  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |  |     row-size=12B cardinality=53.06K
+|  |  |
+|  |  32:AGGREGATE [FINALIZE]
+|  |  |  output: avg:merge(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  31:EXCHANGE [HASH(ss_item_sk)]
+|  |  |
+|  |  01:AGGREGATE [STREAMING]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  00:SCAN HDFS [tpcds.store_sales ss1]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_store_sk = 4
+|  |     row-size=16B cardinality=480.07K
+|  |
+|  17:SELECT
+|  |  predicates: rank() < 11
+|  |  row-size=36B cardinality=1.80K
+|  |
+|  16:ANALYTIC
+|  |  functions: rank()
+|  |  order by: avg(ss_net_profit) DESC
+|  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  row-size=36B cardinality=17.98K
+|  |
+|  30:MERGING-EXCHANGE [UNPARTITIONED]
+|  |  order by: avg(ss_net_profit) DESC
+|  |
+|  15:SORT
+|  |  order by: avg(ss_net_profit) DESC
+|  |  row-size=28B cardinality=17.98K
+|  |
+|  14:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+|  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  row-size=28B cardinality=17.98K
+|  |
+|  |--29:EXCHANGE [BROADCAST]
+|  |  |
+|  |  13:CARDINALITY CHECK
+|  |  |  limit: 1
+|  |  |  row-size=12B cardinality=1
+|  |  |
+|  |  28:EXCHANGE [UNPARTITIONED]
+|  |  |  limit: 2
+|  |  |
+|  |  27:AGGREGATE [FINALIZE]
+|  |  |  output: avg:merge(ss_net_profit)
+|  |  |  group by: ss_store_sk
+|  |  |  limit: 2
+|  |  |  row-size=12B cardinality=2
+|  |  |
+|  |  26:EXCHANGE [HASH(ss_store_sk)]
+|  |  |
+|  |  12:AGGREGATE [STREAMING]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_store_sk
+|  |  |  row-size=12B cardinality=6
+|  |  |
+|  |  11:SCAN HDFS [tpcds.store_sales]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |     row-size=12B cardinality=53.06K
+|  |
+|  25:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  group by: ss_item_sk
+|  |  row-size=16B cardinality=17.98K
+|  |
+|  24:EXCHANGE [HASH(ss_item_sk)]
+|  |
+|  10:AGGREGATE [STREAMING]
+|  |  output: avg(ss_net_profit)
+|  |  group by: ss_item_sk
+|  |  row-size=16B cardinality=17.98K
+|  |
+|  09:SCAN HDFS [tpcds.store_sales ss1]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_store_sk = 4
+|     row-size=16B cardinality=480.07K
+|
+19:SCAN HDFS [tpcds.item i2]
+   HDFS partitions=1/1 files=1 size=4.82MB
+   runtime filters: RF000 -> i2.i_item_sk
+   row-size=38B cardinality=18.00K
+---- PARALLELPLANS
+Max Per-Host Resource Reservation: Memory=157.12MB Threads=28
+Per-Host Resource Estimates: Memory=368MB
+PLAN-ROOT SINK
+|
+41:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: rnk ASC
+|  limit: 100
+|
+23:TOP-N [LIMIT=100]
+|  order by: rnk ASC
+|  row-size=68B cardinality=100
+|
+22:HASH JOIN [INNER JOIN, BROADCAST]
+|  hash predicates: i2.i_item_sk = ss_item_sk
+|  row-size=148B cardinality=1.80K
+|
+|--JOIN BUILD
+|  |  join-table-id=00 plan-id=01 cohort-id=01
+|  |  build expressions: ss_item_sk
+|  |  runtime filters: RF000 <- ss_item_sk
+|  |
+|  40:EXCHANGE [BROADCAST]
+|  |
+|  21:HASH JOIN [INNER JOIN, BROADCAST]
+|  |  hash predicates: rank() = rank()
+|  |  row-size=110B cardinality=1.80K
+|  |
+|  |--JOIN BUILD
+|  |  |  join-table-id=01 plan-id=02 cohort-id=02
+|  |  |  build expressions: rank()
+|  |  |
+|  |  39:EXCHANGE [UNPARTITIONED]
+|  |  |
+|  |  20:HASH JOIN [INNER JOIN, BROADCAST]
+|  |  |  hash predicates: ss_item_sk = i1.i_item_sk
+|  |  |  row-size=74B cardinality=1.80K
+|  |  |
+|  |  |--JOIN BUILD
+|  |  |  |  join-table-id=02 plan-id=03 cohort-id=03
+|  |  |  |  build expressions: i1.i_item_sk
+|  |  |  |
+|  |  |  38:EXCHANGE [UNPARTITIONED]
+|  |  |  |
+|  |  |  18:SCAN HDFS [tpcds.item i1]
+|  |  |     HDFS partitions=1/1 files=1 size=4.82MB
+|  |  |     row-size=38B cardinality=18.00K
+|  |  |
+|  |  08:SELECT
+|  |  |  predicates: rank() < 11
+|  |  |  row-size=36B cardinality=1.80K
+|  |  |
+|  |  07:ANALYTIC
+|  |  |  functions: rank()
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  |  row-size=36B cardinality=17.98K
+|  |  |
+|  |  37:MERGING-EXCHANGE [UNPARTITIONED]
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |
+|  |  06:SORT
+|  |  |  order by: avg(ss_net_profit) ASC
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+|  |  |  join table id: 03
+|  |  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  |  row-size=28B cardinality=17.98K
+|  |  |
+|  |  |--JOIN BUILD
+|  |  |  |  join-table-id=03 plan-id=04 cohort-id=03
+|  |  |  |
+|  |  |  36:EXCHANGE [BROADCAST]
+|  |  |  |
+|  |  |  04:CARDINALITY CHECK
+|  |  |  |  limit: 1
+|  |  |  |  row-size=12B cardinality=1
+|  |  |  |
+|  |  |  35:EXCHANGE [UNPARTITIONED]
+|  |  |  |  limit: 2
+|  |  |  |
+|  |  |  34:AGGREGATE [FINALIZE]
+|  |  |  |  output: avg:merge(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  limit: 2
+|  |  |  |  row-size=12B cardinality=2
+|  |  |  |
+|  |  |  33:EXCHANGE [HASH(ss_store_sk)]
+|  |  |  |
+|  |  |  03:AGGREGATE [STREAMING]
+|  |  |  |  output: avg(ss_net_profit)
+|  |  |  |  group by: ss_store_sk
+|  |  |  |  row-size=12B cardinality=6
+|  |  |  |
+|  |  |  02:SCAN HDFS [tpcds.store_sales]
+|  |  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |  |     row-size=12B cardinality=53.06K
+|  |  |
+|  |  32:AGGREGATE [FINALIZE]
+|  |  |  output: avg:merge(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  31:EXCHANGE [HASH(ss_item_sk)]
+|  |  |
+|  |  01:AGGREGATE [STREAMING]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_item_sk
+|  |  |  row-size=16B cardinality=17.98K
+|  |  |
+|  |  00:SCAN HDFS [tpcds.store_sales ss1]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_store_sk = 4
+|  |     row-size=16B cardinality=480.07K
+|  |
+|  17:SELECT
+|  |  predicates: rank() < 11
+|  |  row-size=36B cardinality=1.80K
+|  |
+|  16:ANALYTIC
+|  |  functions: rank()
+|  |  order by: avg(ss_net_profit) DESC
+|  |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  |  row-size=36B cardinality=17.98K
+|  |
+|  30:MERGING-EXCHANGE [UNPARTITIONED]
+|  |  order by: avg(ss_net_profit) DESC
+|  |
+|  15:SORT
+|  |  order by: avg(ss_net_profit) DESC
+|  |  row-size=28B cardinality=17.98K
+|  |
+|  14:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
+|  |  join table id: 04
+|  |  predicates: avg(ss_net_profit) > 0.9 * avg(ss_net_profit)
+|  |  row-size=28B cardinality=17.98K
+|  |
+|  |--JOIN BUILD
+|  |  |  join-table-id=04 plan-id=05 cohort-id=02
+|  |  |
+|  |  29:EXCHANGE [BROADCAST]
+|  |  |
+|  |  13:CARDINALITY CHECK
+|  |  |  limit: 1
+|  |  |  row-size=12B cardinality=1
+|  |  |
+|  |  28:EXCHANGE [UNPARTITIONED]
+|  |  |  limit: 2
+|  |  |
+|  |  27:AGGREGATE [FINALIZE]
+|  |  |  output: avg:merge(ss_net_profit)
+|  |  |  group by: ss_store_sk
+|  |  |  limit: 2
+|  |  |  row-size=12B cardinality=2
+|  |  |
+|  |  26:EXCHANGE [HASH(ss_store_sk)]
+|  |  |
+|  |  12:AGGREGATE [STREAMING]
+|  |  |  output: avg(ss_net_profit)
+|  |  |  group by: ss_store_sk
+|  |  |  row-size=12B cardinality=6
+|  |  |
+|  |  11:SCAN HDFS [tpcds.store_sales]
+|  |     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|  |     predicates: ss_addr_sk IS NULL, ss_store_sk = 4
+|  |     row-size=12B cardinality=53.06K
+|  |
+|  25:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  group by: ss_item_sk
+|  |  row-size=16B cardinality=17.98K
+|  |
+|  24:EXCHANGE [HASH(ss_item_sk)]
+|  |
+|  10:AGGREGATE [STREAMING]
+|  |  output: avg(ss_net_profit)
+|  |  group by: ss_item_sk
+|  |  row-size=16B cardinality=17.98K
+|  |
+|  09:SCAN HDFS [tpcds.store_sales ss1]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_store_sk = 4
+|     row-size=16B cardinality=480.07K
+|
+19:SCAN HDFS [tpcds.item i2]
+   HDFS partitions=1/1 files=1 size=4.82MB
+   runtime filters: RF000 -> i2.i_item_sk
+   row-size=38B cardinality=18.00K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index 000c8cf..8565f2a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -1212,3 +1212,108 @@ FROM alltypessmall WHERE id < 2
 ---- TYPES
 INT, BOOLEAN, BOOLEAN, TINYINT
 ====
+---- QUERY
+# Having with subquery
+select id, sum(int_col+smallint_col)
+  from alltypestiny group by id
+  having sum(int_col+smallint_col) in
+    (select sum(int_col) from alltypessmall group by id)
+order by id
+---- RESULTS
+0,0
+1,2
+2,0
+3,2
+4,0
+5,2
+6,0
+7,2
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with not in subquery with nested having
+select id, sum(int_col+smallint_col)
+  from alltypestiny group by id
+  having sum(int_col+smallint_col) not in
+    (select sum(int_col) from alltypessmall group by id having
+    sum(int_col) < 2)
+order by id
+---- RESULTS
+1,2
+3,2
+5,2
+7,2
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with subquery with join and subquery in where
+select id, sum(int_col)
+  from alltypestiny
+  where id in (select id from alltypestiny where id % 2 = 0)
+  group by id having sum(int_col) in
+    (select sum(a.int_col) from alltypessmall a, alltypestiny b where
+    a.id = b.id and b.int_col != 1 group by a.id)
+order by id
+---- RESULTS
+0,0
+2,0
+4,0
+6,0
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with subquery with subquery in select
+select id, sum(int_col)
+  from alltypestiny
+  group by id having sum(int_col) >
+    (select min(int_col)+(select min(int_col) from alltypessmall)
+      from alltypestiny)
+order by id
+---- RESULTS
+1,1
+3,1
+5,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having with exists subquery
+select id, sum(int_col+smallint_col)
+  from alltypestiny group by id
+  having not exists
+    (select sum(int_col) from alltypessmall group by id)
+order by id
+---- RESULTS
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Subquery in having and where clauses
+select id, sum(int_col) from alltypestiny where
+ id > (select min(id)+1 from alltypessmall) group by id
+ having sum(int_col) in (select sum(int_col) from alltypessmall group by id)
+ order by id
+---- RESULTS
+2,0
+3,1
+4,0
+5,1
+6,0
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# Having subquery with a runtime scalar error
+select id, sum(int_col) from alltypestiny where
+ id > (select min(id)+1 from alltypessmall) group by id
+ having sum(int_col) >= (select sum(int_col) from alltypessmall group by id)
+ order by id
+---- RESULTS
+---- CATCH
+Subquery must not return more than one row:
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q23-1.test b/testdata/workloads/tpcds/queries/tpcds-q23-1.test
new file mode 100644
index 0000000..4a6abf5
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q23-1.test
@@ -0,0 +1,56 @@
+====
+---- QUERY: TPCDS-Q23-1
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+  from store_sales
+      ,date_dim
+      ,item
+  where ss_sold_date_sk = d_date_sk
+    and ss_item_sk = i_item_sk
+    and d_year in (2000,2000+1,2000+2,2000+3)
+  group by substr(i_item_desc,1,30),i_item_sk,d_date
+  having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+  from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+        from store_sales
+            ,customer
+            ,date_dim
+        where ss_customer_sk = c_customer_sk
+         and ss_sold_date_sk = d_date_sk
+         and d_year in (2000,2000+1,2000+2,2000+3)
+        group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+  from store_sales
+      ,customer
+  where ss_customer_sk = c_customer_sk
+  group by c_customer_sk
+  having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select
+  *
+from
+ max_store_sales))
+  select  sum(sales)
+ from (select cs_quantity*cs_list_price sales
+       from catalog_sales
+           ,date_dim
+       where d_year = 2000
+         and d_moy = 2
+         and cs_sold_date_sk = d_date_sk
+         and cs_item_sk in (select item_sk from frequent_ss_items)
+         and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+      union all
+      select ws_quantity*ws_list_price sales
+       from web_sales
+           ,date_dim
+       where d_year = 2000
+         and d_moy = 2
+         and ws_sold_date_sk = d_date_sk
+         and ws_item_sk in (select item_sk from frequent_ss_items)
+         and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100;
+---- RESULTS
+17030.91
+---- TYPES
+DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q23-2.test b/testdata/workloads/tpcds/queries/tpcds-q23-2.test
new file mode 100644
index 0000000..ec093d9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q23-2.test
@@ -0,0 +1,65 @@
+====
+---- QUERY: TPCDS-Q23-2
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+  from store_sales
+      ,date_dim
+      ,item
+  where ss_sold_date_sk = d_date_sk
+    and ss_item_sk = i_item_sk
+    and d_year in (2000,2000 + 1,2000 + 2,2000 + 3)
+  group by substr(i_item_desc,1,30),i_item_sk,d_date
+  having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+  from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+        from store_sales
+            ,customer
+            ,date_dim
+        where ss_customer_sk = c_customer_sk
+         and ss_sold_date_sk = d_date_sk
+         and d_year in (2000,2000+1,2000+2,2000+3)
+        group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+  from store_sales
+      ,customer
+  where ss_customer_sk = c_customer_sk
+  group by c_customer_sk
+  having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select
+  *
+ from max_store_sales))
+  select  c_last_name,c_first_name,sales
+ from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales
+        from catalog_sales
+            ,customer
+            ,date_dim
+        where d_year = 2000
+         and d_moy = 2
+         and cs_sold_date_sk = d_date_sk
+         and cs_item_sk in (select item_sk from frequent_ss_items)
+         and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+         and cs_bill_customer_sk = c_customer_sk
+       group by c_last_name,c_first_name
+      union all
+      select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales
+       from web_sales
+           ,customer
+           ,date_dim
+       where d_year = 2000
+         and d_moy = 2
+         and ws_sold_date_sk = d_date_sk
+         and ws_item_sk in (select item_sk from frequent_ss_items)
+         and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+         and ws_bill_customer_sk = c_customer_sk
+       group by c_last_name,c_first_name) y
+     order by c_last_name,c_first_name,sales
+  limit 100;
+---- RESULTS
+'Brown','Monika',6031.52
+'Collins','Gordon',727.57
+'Green','Jesse',9672.96
+'NULL','Robert',598.86
+---- TYPES
+STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q24-1.test b/testdata/workloads/tpcds/queries/tpcds-q24-1.test
new file mode 100644
index 0000000..25d0dd5
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q24-1.test
@@ -0,0 +1,58 @@
+====
+---- QUERY: TPCDS-Q24-1
+with ssales as
+(select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,ca_state
+      ,s_state
+      ,i_color
+      ,i_current_price
+      ,i_manager_id
+      ,i_units
+      ,i_size
+      ,sum(ss_net_paid) netpaid
+from store_sales
+    ,store_returns
+    ,store
+    ,item
+    ,customer
+    ,customer_address
+where ss_ticket_number = sr_ticket_number
+  and ss_item_sk = sr_item_sk
+  and ss_customer_sk = c_customer_sk
+  and ss_item_sk = i_item_sk
+  and ss_store_sk = s_store_sk
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
+  and s_zip = ca_zip
+and s_market_id = 8
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+        ,ca_state
+        ,s_state
+        ,i_color
+        ,i_current_price
+        ,i_manager_id
+        ,i_units
+        ,i_size)
+select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,sum(netpaid) paid
+from ssales
+where i_color = 'peach'
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+                                 from ssales)
+order by c_last_name, c_first_name, s_store_name;
+---- RESULTS
+'Martins','Cara','bar',241.96
+'Smallwood','Rhonda','bar',3089.28
+'Terry','Sandra','bar',509.42
+---- TYPES
+STRING, STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q24-2.test b/testdata/workloads/tpcds/queries/tpcds-q24-2.test
new file mode 100644
index 0000000..1ee1fc9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q24-2.test
@@ -0,0 +1,59 @@
+====
+---- QUERY: TPCDS-Q24-2
+with ssales as
+(select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,ca_state
+      ,s_state
+      ,i_color
+      ,i_current_price
+      ,i_manager_id
+      ,i_units
+      ,i_size
+      ,sum(ss_net_paid) netpaid
+from store_sales
+    ,store_returns
+    ,store
+    ,item
+    ,customer
+    ,customer_address
+where ss_ticket_number = sr_ticket_number
+  and ss_item_sk = sr_item_sk
+  and ss_customer_sk = c_customer_sk
+  and ss_item_sk = i_item_sk
+  and ss_store_sk = s_store_sk
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
+  and s_zip = ca_zip
+  and s_market_id = 8
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+        ,ca_state
+        ,s_state
+        ,i_color
+        ,i_current_price
+        ,i_manager_id
+        ,i_units
+        ,i_size)
+select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,sum(netpaid) paid
+from ssales
+where i_color = 'saddle'
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+                           from ssales)
+order by c_last_name, c_first_name, s_store_name;
+---- RESULTS
+'Hamlin','Heather','able',149.65
+'Martin','Harold','bar',5834.88
+'Nall','Mike','able',999.70
+'Southern','Jeannie','bar',446.31
+---- TYPES
+STRING, STRING, STRING, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q44.test b/testdata/workloads/tpcds/queries/tpcds-q44.test
new file mode 100644
index 0000000..c11036b
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q44.test
@@ -0,0 +1,64 @@
+====
+---- QUERY: TPCDS-Q44
+SELECT asceding.rnk,
+       i1.i_product_name best_performing,
+       i2.i_product_name worst_performing
+FROM
+  (SELECT *
+   FROM
+     (SELECT item_sk,
+             rank() OVER (
+                          ORDER BY rank_col ASC) rnk
+      FROM
+        (SELECT ss_item_sk item_sk,
+                avg(ss_net_profit) rank_col
+         FROM store_sales ss1
+         WHERE ss_store_sk = 4
+         GROUP BY ss_item_sk
+         HAVING avg(ss_net_profit) > 0.9*
+           (SELECT avg(ss_net_profit) rank_col
+            FROM store_sales
+            WHERE ss_store_sk = 4
+              AND ss_addr_sk IS NULL
+            GROUP BY ss_store_sk))V1)V11
+   WHERE rnk < 11) asceding,
+  (SELECT *
+   FROM
+     (SELECT item_sk,
+             rank() OVER (
+                          ORDER BY rank_col DESC) rnk
+      FROM
+        (SELECT ss_item_sk item_sk,
+                avg(ss_net_profit) rank_col
+         FROM store_sales ss1
+         WHERE ss_store_sk = 4
+         GROUP BY ss_item_sk
+         HAVING avg(ss_net_profit) > 0.9*
+           (SELECT avg(ss_net_profit) rank_col
+            FROM store_sales
+            WHERE ss_store_sk = 4
+              AND ss_addr_sk IS NULL
+            GROUP BY ss_store_sk))V2)V21
+   WHERE rnk < 11) descending,
+     item i1,
+     item i2
+WHERE asceding.rnk = descending.rnk
+  AND i1.i_item_sk=asceding.item_sk
+  AND i2.i_item_sk=descending.item_sk
+ORDER BY asceding.rnk
+LIMIT 100;
+---- RESULTS
+1,'oughtantiprin st','callyeingbarcallyought'
+2,'barcallyprioughtought','bareseationcallyought'
+3,'ableeingantiable','barn stcallycally'
+4,'n stesebarn st','eingoughtn stn st'
+5,'antioughtationbarought','callycallybarantiought'
+6,'callyeseationantiought','NULL'
+7,'priableeseableought','eseableablepriought'
+8,'ableoughtableeseought','ationoughtantianti'
+9,'esebarableeseought','callyn stantieseought'
+10,'eingoughtn station','barcallyableought'
+---- TYPES
+BIGINT, STRING, STRING
+====
+
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 41fa1a7..b8a8504 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -109,6 +109,18 @@ class TestTpcdsQuery(ImpalaTestSuite):
   def test_tpcds_q21(self, vector):
     self.run_test_case(self.get_workload() + '-q21', vector)
 
+  def test_tpcds_q23_1(self, vector):
+    self.run_test_case(self.get_workload() + '-q23-1', vector)
+
+  def test_tpcds_q23_2(self, vector):
+    self.run_test_case(self.get_workload() + '-q23-2', vector)
+
+  def test_tpcds_q24_1(self, vector):
+    self.run_test_case(self.get_workload() + '-q24-1', vector)
+
+  def test_tpcds_q24_2(self, vector):
+    self.run_test_case(self.get_workload() + '-q24-2', vector)
+
   def test_tpcds_q25(self, vector):
     self.run_test_case(self.get_workload() + '-q25', vector)
 
@@ -151,6 +163,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
   def test_tpcds_q43(self, vector):
     self.run_test_case(self.get_workload() + '-q43', vector)
 
+  def test_tpcds_q44(self, vector):
+    self.run_test_case(self.get_workload() + '-q44', vector)
+
   def test_tpcds_q46(self, vector):
     self.run_test_case(self.get_workload() + '-q46', vector)
 


[impala] 01/02: IMPALA-8954: Uncorrelated scalar subqueries in the select list

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 388ad555d7df9e2711ea0ab81dbae4235c54b3cc
Author: Shant Hovsepian <sh...@cloudera.com>
AuthorDate: Fri May 29 16:26:07 2020 -0400

    IMPALA-8954: Uncorrelated scalar subqueries in the select list
    
    Extend StmtRewriter with the ability to rewrite scalar subqueries in the
    select list into cross joins. Currently the subquery must pass plan-time
    checks to determine that it returns a single row which may miss cases
    that may be valid at runtime or with more complex evaluation of the
    predicate expressions in the planner. Support for correlated subqueries
    will be a follow on change.
    
    Testing:
      * Added new analyzer tests, updated previous subquery tests
      * test_queries.py::TestQueries::test_subquery
      * Added test_tpcds_q9 to e2e and planner tests
    
    Change-Id: Ibcf55d26889aa01d69bb85f18c9241dda095fb66
    Reviewed-on: http://gerrit.cloudera.org:8080/16007
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Tim Armstrong <ta...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectList.java     |   5 +
 .../org/apache/impala/analysis/SelectStmt.java     |  43 +-
 .../org/apache/impala/analysis/StmtRewriter.java   | 169 +++-
 .../impala/analysis/AnalyzeSubqueriesTest.java     |  23 +-
 testdata/datasets/tpcds/tpcds_schema_template.sql  |   2 +-
 .../queries/PlannerTest/subquery-rewrite.test      | 134 +++
 .../queries/PlannerTest/tpcds-all.test             | 916 +++++++++++++++++++++
 .../queries/QueryTest/subquery.test                | 173 ++++
 testdata/workloads/tpcds/queries/count.test        |   9 +-
 .../tpcds/queries/tpcds-decimal_v2-q9.test         |  54 ++
 testdata/workloads/tpcds/queries/tpcds-q9.test     |  54 ++
 tests/query_test/test_tpcds_queries.py             |   6 +
 tests/util/parse_util.py                           |   2 +-
 13 files changed, 1572 insertions(+), 18 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectList.java b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
index 3ac3830..9ed28c0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectList.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectList.java
@@ -94,9 +94,14 @@ public class SelectList {
 
   public void rewriteExprs(ExprRewriter rewriter, Analyzer analyzer)
       throws AnalysisException {
+    List<Subquery> subqueryExprs = new ArrayList<>();
     for (SelectListItem item: items_) {
       if (item.isStar()) continue;
       item.setExpr(rewriter.rewrite(item.getExpr(), analyzer));
+      item.getExpr().collect(Subquery.class, subqueryExprs);
+    }
+    for (Subquery s : subqueryExprs) {
+      s.getStatement().rewriteExprs(rewriter);
     }
   }
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 00323de..5618694 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -274,9 +274,23 @@ public class SelectStmt extends QueryStmt {
           // Analyze the resultExpr before generating a label to ensure enforcement
           // of expr child and depth limits (toColumn() label may call toSql()).
           item.getExpr().analyze(analyzer_);
-          if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
-            throw new AnalysisException(
-                "Subqueries are not supported in the select list.");
+          // Check for scalar subquery types which are not supported
+          List<Subquery> subqueryExprs = new ArrayList<>();
+          item.getExpr().collect(Subquery.class, subqueryExprs);
+          for (Subquery s : subqueryExprs) {
+            Preconditions.checkState(s.getStatement() instanceof SelectStmt);
+            if (!s.returnsScalarColumn()) {
+              throw new AnalysisException("A non-scalar subquery is not supported in "
+                  + "the expression: " + item.getExpr().toSql());
+            }
+            if (s.getStatement().isRuntimeScalar()) {
+              throw new AnalysisException(
+                  "A subquery which may return more than one row is not supported in "
+                  + "the expression: " + item.getExpr().toSql());
+            }
+            Preconditions.checkState(((SelectStmt) s.getStatement()).returnsSingleRow(),
+                "Invariant violated: Only subqueries that are guaranteed to return a "
+                    + "single row are supported: " + item.getExpr().toSql());
           }
           resultExprs_.add(item.getExpr());
           String label = item.toColumnLabel(i, analyzer_.useHiveColLabels());
@@ -1197,12 +1211,17 @@ public class SelectStmt extends QueryStmt {
     if (fromClauseOnly) {
       fromClause_.collectFromClauseTableRefs(tblRefs);
     } else {
+      // Collect TableRefs in all subqueries.
       fromClause_.collectTableRefs(tblRefs);
-    }
-    if (!fromClauseOnly && whereClause_ != null) {
-      // Collect TableRefs in WHERE-clause subqueries.
       List<Subquery> subqueries = new ArrayList<>();
-      whereClause_.collect(Subquery.class, subqueries);
+      if (whereClause_ != null) {
+        whereClause_.collect(Subquery.class, subqueries);
+      }
+      for (SelectListItem item : selectList_.getItems()) {
+        if (item.isStar()) continue;
+        item.getExpr().collect(Subquery.class, subqueries);
+      }
+
       for (Subquery sq: subqueries) {
         sq.getStatement().collectTableRefs(tblRefs, fromClauseOnly);
       }
@@ -1211,8 +1230,6 @@ public class SelectStmt extends QueryStmt {
 
   @Override
   public void collectInlineViews(Set<FeView> inlineViews) {
-    // Impala currently supports sub queries only in FROM, WHERE & WITH clauses. Hence,
-    // this function does not carry out any checks on HAVING clause.
     super.collectInlineViews(inlineViews);
     List<TableRef> fromTblRefs = getTableRefs();
     Preconditions.checkNotNull(inlineViews);
@@ -1236,6 +1253,14 @@ public class SelectStmt extends QueryStmt {
         whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
       }
     }
+    List<Subquery> selectListSubQueries = Lists.newArrayList();
+    for (SelectListItem item : selectList_.getItems()) {
+      if (item.isStar()) continue;
+      item.getExpr().collect(Subquery.class, selectListSubQueries);
+    }
+    for (Subquery sq : selectListSubQueries) {
+      sq.getStatement().collectInlineViews(inlineViews);
+    }
   }
 
   @Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index 850b629..c9fb004 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -934,8 +934,9 @@ public class StmtRewriter {
 
     /**
      * Rewrite all the subqueries of a SelectStmt in place. Subqueries are currently
-     * supported in FROM and WHERE clauses. The rewrite is performed in place and not in a
-     * clone of SelectStmt because it requires the stmt to be analyzed.
+     * supported in the FROM clause, WHERE clause and SELECT list. The rewrite is
+     * performed in place and not in a clone of SelectStmt because it requires the stmt to
+     * be analyzed.
      */
     @Override
     protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
@@ -952,6 +953,7 @@ public class StmtRewriter {
         }
         rewriteWhereClauseSubqueries(stmt, analyzer);
       }
+      rewriteSelectListSubqueries(stmt, analyzer);
     }
 
     /**
@@ -1096,5 +1098,168 @@ public class StmtRewriter {
       smap.put(subquery, newSubquery);
       return expr.substitute(smap, analyzer, false);
     }
+
+    /**
+     * Rewrite subqueries of a stmt's SELECT list. Scalar subqueries are the only type
+     * of subquery supported in the select list.  Scalar subqueries return a single column
+     * and at most 1 row, a runtime error should be thrown if more than one row is
+     * returned. Generally these subqueries can be evaluated once for every row of the
+     * outer query however for performance reasons we want to rewrite evaluation to use
+     * joins where possible.
+     *
+     * 1) Uncorrelated Scalar Aggregate Query
+     *
+     *    SELECT T1.a, (SELECT avg(T2.a) from T2) FROM T1;
+     *
+     *    This is implemented by flattening into a join.
+     *
+     *    SELECT T1.a, $a$1.$c$1 FROM T1, (SELECT avg(T2.a) $c$1 FROM T2) $a$1
+     *
+     *    Currently we only support very simple subqueries which return a single aggregate
+     *    function with no group by columns unless a LIMIT 1 is given. TODO: IMPALA-1285
+     *
+     * 2) Correlated Scalar Aggregate
+     *
+     *    TODO: IMPALA-8955
+     *    SELECT id, (SELECT count(*) FROM T2 WHERE id=a.id ) FROM T1 a
+     *
+     *    This can be flattened with a LEFT OUTER JOIN
+     *
+     *    SELECT T1.a, $a$1.$c$1 FROM T1 LEFT OUTER JOIN
+     *      (SELECT id, count(*) $c$1 FROM T2 GROUP BY id) $a$1 ON T1.id = $a$1.id
+     *
+     * 3) Correlated Scalar
+     *
+     *    TODO: IMPALA-6315
+     *    SELECT id, (SELECT cost FROM T2 WHERE id=a.id ) FROM T1 a
+     *
+     *    In this case there is no aggregate function to guarantee only a single row is
+     *    returned per group so a run time cardinality check must be applied. An exception
+     *    would be if the correlated predicates had primary key constraints.
+     *
+     * 4) Runtime Scalar Subqueries
+     *
+     *    TODO: IMPALA-5100
+     *    We do have a {@link CardinalityCheckNode} for runtime checks however queries
+     *    can't always be rewritten into an NLJ without special care. For example with
+     *    conditional expression like below:
+     *
+     *    SELECT T1.a,
+     *      IF((SELECT max(T2.a) from T2 > 10,
+     *         (SELECT T2.a from T2 WHERE id=T1.id),
+     *         (SELECT T3.a from T2 WHERE if=T1.id)
+     *    FROM T1;
+     *
+     *    If rewritten to joins with cardinality checks then both legs of the conditional
+     *    expression would be evaluated regardless of the condition. If the false case
+     *    were to return a runtime error while when the true doesn't and the condition
+     *    evaluates to true then we'd have incorrect behavior.
+     */
+    private void rewriteSelectListSubqueries(SelectStmt stmt, Analyzer analyzer)
+        throws AnalysisException {
+      Preconditions.checkNotNull(stmt);
+      Preconditions.checkNotNull(analyzer);
+      final int numTableRefs = stmt.fromClause_.size();
+      final boolean parentHasAgg = stmt.hasMultiAggInfo();
+      // Track any new inline views so we later ensure they are rewritten if needed.
+      // An improvement would be to have a pre/post order abstract rewriter class.
+      final List<InlineViewRef> newViews = new ArrayList<>();
+      for (SelectListItem selectItem : stmt.getSelectList().getItems()) {
+        if (selectItem.isStar()) {
+          continue;
+        }
+
+        final Expr expr = selectItem.getExpr();
+        final List<Subquery> subqueries = new ArrayList<>();
+        // Use collect as opposed to collectAll in order to allow nested subqueries to be
+        // rewritten as needed. For example a subquery in the select list which contains
+        // its own subquery in the where clause.
+        expr.collect(Predicates.instanceOf(Subquery.class), subqueries);
+        if (subqueries.size() == 0) {
+          continue;
+        }
+        final ExprSubstitutionMap smap = new ExprSubstitutionMap();
+        for (Subquery sq : subqueries) {
+          final SelectStmt subqueryStmt = (SelectStmt) sq.getStatement();
+          // TODO: Handle correlated subqueries IMPALA-8955
+          if (isCorrelated(subqueryStmt)) {
+            throw new AnalysisException("A correlated scalar subquery is not supported "
+                + "in the expression: " + expr.toSql());
+          }
+          Preconditions.checkState(sq.getType().isScalarType());
+
+          // Existential subqueries in Impala aren't really execution time expressions,
+          // they are either checked at plan time or expected to be handled by the
+          // subquery rewrite into a join. In the case of the select list we will only
+          // support plan time evaluation.
+          boolean replacedExists = false;
+          final List<ExistsPredicate> existsPredicates = new ArrayList<>();
+          expr.collect(ExistsPredicate.class, existsPredicates);
+          for (ExistsPredicate ep : existsPredicates) {
+            // Check to see if the current subquery is the child of an exists predicate.
+            if (ep.contains(sq)) {
+              final BoolLiteral boolLiteral = replaceExistsPredicate(ep);
+              if (boolLiteral != null) {
+                boolLiteral.analyze(analyzer);
+                smap.put(ep, boolLiteral);
+                replacedExists = true;
+                break;
+              } else {
+                throw new AnalysisException(
+                    "Unsupported subquery with runtime scalar check: " + ep.toSql());
+              }
+            }
+          }
+          if (replacedExists) {
+            continue;
+          }
+
+          List<String> colLabels = new ArrayList<>();
+          for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
+            colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
+          }
+          // Create a new inline view from the subquery stmt aliasing the columns.
+          InlineViewRef inlineView = new InlineViewRef(
+              stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels);
+          inlineView.reset();
+          inlineView.analyze(analyzer);
+
+          // For uncorrelated scalar subqueries we rewrite with a CROSS_JOIN. This makes
+          // it simpler to further optimize by merging subqueries without worrying about
+          // join ordering as in IMPALA-9796. For correlated subqueries we'd want to
+          // rewrite to a LOJ.
+          inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
+          stmt.fromClause_.add(inlineView);
+          newViews.add(inlineView);
+
+          SlotRef slotRef = new SlotRef(Lists.newArrayList(
+              inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
+          slotRef.analyze(analyzer);
+          Expr substitute = slotRef;
+          // Need to wrap the expression with a no-op aggregate function if the stmt does
+          // any aggregation, using MAX() given no explicit function to return any value
+          // in a group.
+          if (parentHasAgg) {
+            final FunctionCallExpr aggWrapper =
+                new FunctionCallExpr("max", Lists.newArrayList((Expr) slotRef));
+            aggWrapper.analyze(analyzer);
+            substitute = aggWrapper;
+          }
+          // Substitute original subquery expression with a reference to the inline view.
+          smap.put(sq, substitute);
+        }
+        // Update select list with any new slot references.
+        selectItem.setExpr(expr.substitute(smap, analyzer, false));
+      }
+      // Rewrite any new views
+      for (InlineViewRef v : newViews) {
+        rewriteQueryStatement(v.getViewStmt(), v.getAnalyzer());
+      }
+      // Only applies to the original list of TableRefs, not any as a result of the
+      // rewrite.
+      if (!newViews.isEmpty()) {
+        replaceUnqualifiedStarItems(stmt, numTableRefs);
+      }
+    }
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index d297c48..a845718 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -1207,6 +1207,20 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
         "(select id from functional.alltypes t2 where t1.int_col = t2.int_col)",
         "Unsupported correlated subquery with runtime scalar check: " +
         "SELECT id FROM functional.alltypes t2 WHERE t1.int_col = t2.int_col");
+
+    // Scalar subqueries in the select list
+    AnalyzesOk("select id, 10 + (select max(int_col) from functional.alltypestiny) "
+        + "from functional.alltypestiny");
+    AnalyzesOk("select id, (select count(*) from functional.alltypestiny where int_col "
+        + "< (select max(int_col) from functional.alltypes)) from functional.dimtbl");
+    AnalysisError("select id, (select id, count(*) from functional.alltypestiny "
+            + "group by 1) from functional.dimtbl",
+        "A non-scalar subquery is not supported in the expression: "
+            + "(SELECT id, count(*) FROM functional.alltypestiny GROUP BY id)");
+    AnalysisError("select id, (select count(*) from functional.alltypestiny b where "
+            + "id=a.id ) from functional.alltypes a",
+        "A correlated scalar subquery is not supported in the expression: "
+            + "(SELECT count(*) FROM functional.alltypestiny b WHERE id = a.id)");
   }
 
   @Test
@@ -1339,7 +1353,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
   }
 
   @Test
-  public void TestIllegalSubquery() throws AnalysisException {
+  public void testIllegalSubquery() throws AnalysisException {
     // Predicate with a child subquery in the HAVING clause
     AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
         "id having count(*) > (select count(*) from functional.alltypesagg)",
@@ -1349,9 +1363,10 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
         "Subqueries are not supported in the HAVING clause.");
 
     // Subquery in the select list
-    AnalysisError("select id, (select int_col from functional.alltypestiny) " +
-        "from functional.alltypestiny",
-        "Subqueries are not supported in the select list.");
+    AnalysisError("select id, (select int_col from functional.alltypestiny) "
+            + "from functional.alltypestiny",
+        "A subquery which may return more than one row is not supported in the " +
+        "expression: " + "(SELECT int_col FROM functional.alltypestiny)");
 
     // Subquery in the GROUP BY clause
     AnalysisError("select id, count(*) from functional.alltypestiny " +
diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql b/testdata/datasets/tpcds/tpcds_schema_template.sql
index 079e21a..78f9f7c 100644
--- a/testdata/datasets/tpcds/tpcds_schema_template.sql
+++ b/testdata/datasets/tpcds/tpcds_schema_template.sql
@@ -1138,7 +1138,7 @@ tpcds
 ---- BASE_TABLE_NAME
 reason
 ---- COLUMNS
-r_reason_sk           bigint
+r_reason_sk           int
 r_reason_id           string
 r_reason_desc         string
 ---- ROW_FORMAT
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index 8335ff3..bcffd73 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -3296,3 +3296,137 @@ PLAN-ROOT SINK
    runtime filters: RF000 -> int_col
    row-size=89B cardinality=7.30K
 ====
+# Scalar subquery in select list
+SELECT alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name,
+SUM(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip < 94613) AS decimal(10, 6)) AS close,
+SUM(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip >= 94613) AS decimal(10, 6)) AS remote
+FROM functional.alltypesagg JOIN functional.dimtbl ON (alltypesagg.id = dimtbl.id)
+GROUP BY 1, 2, 3
+ORDER BY alltypesagg.string_col DESC
+---- PLAN
+PLAN-ROOT SINK
+|
+10:SORT
+|  order by: alltypesagg.string_col DESC
+|  row-size=84B cardinality=11
+|
+09:AGGREGATE [FINALIZE]
+|  output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
+|  group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+|  row-size=84B cardinality=11
+|
+08:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=84B cardinality=11
+|
+|--05:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  04:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     predicates: dimtbl.zip >= 94613
+|     row-size=4B cardinality=1
+|
+07:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=76B cardinality=11
+|
+|--03:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  02:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     predicates: dimtbl.zip < 94613
+|     row-size=4B cardinality=1
+|
+06:HASH JOIN [INNER JOIN]
+|  hash predicates: alltypesagg.id = dimtbl.id
+|  runtime filters: RF000 <- dimtbl.id
+|  row-size=68B cardinality=11
+|
+|--01:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     row-size=29B cardinality=10
+|
+00:SCAN HDFS [functional.alltypesagg]
+   HDFS partitions=11/11 files=11 size=814.73KB
+   runtime filters: RF000 -> alltypesagg.id
+   row-size=39B cardinality=11.00K
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+20:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: alltypesagg.string_col DESC
+|
+10:SORT
+|  order by: alltypesagg.string_col DESC
+|  row-size=84B cardinality=11
+|
+19:AGGREGATE [FINALIZE]
+|  output: sum:merge(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max:merge(`$a$1`.`$c$1`), sum:merge(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max:merge(`$a$2`.`$c$1`)
+|  group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+|  row-size=84B cardinality=11
+|
+18:EXCHANGE [HASH(alltypesagg.string_col,alltypesagg.date_string_col,dimtbl.name)]
+|
+09:AGGREGATE [STREAMING]
+|  output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
+|  group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
+|  row-size=84B cardinality=11
+|
+08:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=84B cardinality=11
+|
+|--17:EXCHANGE [BROADCAST]
+|  |
+|  16:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  15:EXCHANGE [UNPARTITIONED]
+|  |
+|  05:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  04:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     predicates: dimtbl.zip >= 94613
+|     row-size=4B cardinality=1
+|
+07:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=76B cardinality=11
+|
+|--14:EXCHANGE [BROADCAST]
+|  |
+|  13:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  12:EXCHANGE [UNPARTITIONED]
+|  |
+|  03:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  02:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     predicates: dimtbl.zip < 94613
+|     row-size=4B cardinality=1
+|
+06:HASH JOIN [INNER JOIN, BROADCAST]
+|  hash predicates: alltypesagg.id = dimtbl.id
+|  runtime filters: RF000 <- dimtbl.id
+|  row-size=68B cardinality=11
+|
+|--11:EXCHANGE [BROADCAST]
+|  |
+|  01:SCAN HDFS [functional.dimtbl]
+|     HDFS partitions=1/1 files=1 size=171B
+|     row-size=29B cardinality=10
+|
+00:SCAN HDFS [functional.alltypesagg]
+   HDFS partitions=11/11 files=11 size=814.73KB
+   runtime filters: RF000 -> alltypesagg.id
+   row-size=39B cardinality=11.00K
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 8d79757..9057a3f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -8788,3 +8788,919 @@ PLAN-ROOT SINK
    runtime filters: RF004 -> ss_sold_time_sk, RF002 -> ss_hdemo_sk
    row-size=12B cardinality=2.88M
 ====
+# TPCDS-Q9
+select case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 1 and 20) > 74129
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 1 and 20)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 1 and 20) end bucket1,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 21 and 40) > 122840
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 21 and 40)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 21 and 40) end bucket2,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 41 and 60) > 56580
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 41 and 60)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 41 and 60) end bucket3,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 61 and 80) > 10097
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 61 and 80)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 61 and 80) end bucket4,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 81 and 100) > 165306
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 81 and 100)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- PLAN
+Max Per-Host Resource Reservation: Memory=120.01MB Threads=17
+Per-Host Resource Estimates: Memory=2.05GB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=124B cardinality=1
+|
+|--30:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  29:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=116B cardinality=1
+|
+|--28:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  27:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=108B cardinality=1
+|
+|--26:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  25:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=100B cardinality=1
+|
+|--24:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  23:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=92B cardinality=1
+|
+|--22:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  21:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=84B cardinality=1
+|
+|--20:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  19:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=76B cardinality=1
+|
+|--18:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  17:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=68B cardinality=1
+|
+|--16:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  15:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=60B cardinality=1
+|
+|--14:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  13:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=52B cardinality=1
+|
+|--12:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  11:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=44B cardinality=1
+|
+|--10:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  09:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=36B cardinality=1
+|
+|--08:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  07:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=28B cardinality=1
+|
+|--06:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  05:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=20B cardinality=1
+|
+|--04:AGGREGATE [FINALIZE]
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  03:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=12B cardinality=1
+|
+|--00:SCAN HDFS [tpcds.reason]
+|     HDFS partitions=1/1 files=1 size=1.31KB
+|     predicates: r_reason_sk = 1
+|     row-size=4B cardinality=1
+|
+02:AGGREGATE [FINALIZE]
+|  output: count(*)
+|  row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+   HDFS partitions=1824/1824 files=1824 size=346.60MB
+   predicates: ss_quantity <= 20, ss_quantity >= 1
+   row-size=4B cardinality=288.04K
+---- DISTRIBUTEDPLAN
+Max Per-Host Resource Reservation: Memory=120.01MB Threads=47
+Per-Host Resource Estimates: Memory=2.20GB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=124B cardinality=1
+|
+|--90:EXCHANGE [UNPARTITIONED]
+|  |
+|  89:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  88:EXCHANGE [UNPARTITIONED]
+|  |
+|  30:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  29:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=116B cardinality=1
+|
+|--87:EXCHANGE [UNPARTITIONED]
+|  |
+|  86:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  85:EXCHANGE [UNPARTITIONED]
+|  |
+|  28:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  27:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=108B cardinality=1
+|
+|--84:EXCHANGE [UNPARTITIONED]
+|  |
+|  83:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  82:EXCHANGE [UNPARTITIONED]
+|  |
+|  26:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  25:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=100B cardinality=1
+|
+|--81:EXCHANGE [UNPARTITIONED]
+|  |
+|  80:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  79:EXCHANGE [UNPARTITIONED]
+|  |
+|  24:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  23:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=92B cardinality=1
+|
+|--78:EXCHANGE [UNPARTITIONED]
+|  |
+|  77:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  76:EXCHANGE [UNPARTITIONED]
+|  |
+|  22:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  21:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=84B cardinality=1
+|
+|--75:EXCHANGE [UNPARTITIONED]
+|  |
+|  74:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  73:EXCHANGE [UNPARTITIONED]
+|  |
+|  20:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  19:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=76B cardinality=1
+|
+|--72:EXCHANGE [UNPARTITIONED]
+|  |
+|  71:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  70:EXCHANGE [UNPARTITIONED]
+|  |
+|  18:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  17:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=68B cardinality=1
+|
+|--69:EXCHANGE [UNPARTITIONED]
+|  |
+|  68:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  67:EXCHANGE [UNPARTITIONED]
+|  |
+|  16:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  15:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=60B cardinality=1
+|
+|--66:EXCHANGE [UNPARTITIONED]
+|  |
+|  65:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  64:EXCHANGE [UNPARTITIONED]
+|  |
+|  14:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  13:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=52B cardinality=1
+|
+|--63:EXCHANGE [UNPARTITIONED]
+|  |
+|  62:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  61:EXCHANGE [UNPARTITIONED]
+|  |
+|  12:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  11:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=44B cardinality=1
+|
+|--60:EXCHANGE [UNPARTITIONED]
+|  |
+|  59:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  58:EXCHANGE [UNPARTITIONED]
+|  |
+|  10:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  09:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=36B cardinality=1
+|
+|--57:EXCHANGE [UNPARTITIONED]
+|  |
+|  56:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  55:EXCHANGE [UNPARTITIONED]
+|  |
+|  08:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  07:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=28B cardinality=1
+|
+|--54:EXCHANGE [UNPARTITIONED]
+|  |
+|  53:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  52:EXCHANGE [UNPARTITIONED]
+|  |
+|  06:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  05:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=20B cardinality=1
+|
+|--51:EXCHANGE [UNPARTITIONED]
+|  |
+|  50:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  49:EXCHANGE [UNPARTITIONED]
+|  |
+|  04:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  03:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=12B cardinality=1
+|
+|--48:EXCHANGE [UNPARTITIONED]
+|  |
+|  00:SCAN HDFS [tpcds.reason]
+|     HDFS partitions=1/1 files=1 size=1.31KB
+|     predicates: r_reason_sk = 1
+|     row-size=4B cardinality=1
+|
+47:AGGREGATE [FINALIZE]
+|  output: count:merge(*)
+|  row-size=8B cardinality=1
+|
+46:EXCHANGE [UNPARTITIONED]
+|
+02:AGGREGATE
+|  output: count(*)
+|  row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+   HDFS partitions=1824/1824 files=1824 size=346.60MB
+   predicates: ss_quantity <= 20, ss_quantity >= 1
+   row-size=4B cardinality=288.04K
+---- PARALLELPLANS
+Max Per-Host Resource Reservation: Memory=240.01MB Threads=61
+Per-Host Resource Estimates: Memory=946MB
+PLAN-ROOT SINK
+|
+45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 00
+|  row-size=124B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=00 plan-id=01 cohort-id=01
+|  |
+|  90:EXCHANGE [UNPARTITIONED]
+|  |
+|  89:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  88:EXCHANGE [UNPARTITIONED]
+|  |
+|  30:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  29:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 01
+|  row-size=116B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=01 plan-id=02 cohort-id=01
+|  |
+|  87:EXCHANGE [UNPARTITIONED]
+|  |
+|  86:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  85:EXCHANGE [UNPARTITIONED]
+|  |
+|  28:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  27:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=8B cardinality=288.04K
+|
+43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 02
+|  row-size=108B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=02 plan-id=03 cohort-id=01
+|  |
+|  84:EXCHANGE [UNPARTITIONED]
+|  |
+|  83:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  82:EXCHANGE [UNPARTITIONED]
+|  |
+|  26:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  25:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 100, ss_quantity >= 81
+|     row-size=4B cardinality=288.04K
+|
+42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 03
+|  row-size=100B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=03 plan-id=04 cohort-id=01
+|  |
+|  81:EXCHANGE [UNPARTITIONED]
+|  |
+|  80:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  79:EXCHANGE [UNPARTITIONED]
+|  |
+|  24:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  23:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 04
+|  row-size=92B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=04 plan-id=05 cohort-id=01
+|  |
+|  78:EXCHANGE [UNPARTITIONED]
+|  |
+|  77:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  76:EXCHANGE [UNPARTITIONED]
+|  |
+|  22:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  21:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=8B cardinality=288.04K
+|
+40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 05
+|  row-size=84B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=05 plan-id=06 cohort-id=01
+|  |
+|  75:EXCHANGE [UNPARTITIONED]
+|  |
+|  74:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  73:EXCHANGE [UNPARTITIONED]
+|  |
+|  20:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  19:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 80, ss_quantity >= 61
+|     row-size=4B cardinality=288.04K
+|
+39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 06
+|  row-size=76B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=06 plan-id=07 cohort-id=01
+|  |
+|  72:EXCHANGE [UNPARTITIONED]
+|  |
+|  71:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  70:EXCHANGE [UNPARTITIONED]
+|  |
+|  18:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  17:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 07
+|  row-size=68B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=07 plan-id=08 cohort-id=01
+|  |
+|  69:EXCHANGE [UNPARTITIONED]
+|  |
+|  68:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  67:EXCHANGE [UNPARTITIONED]
+|  |
+|  16:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  15:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=8B cardinality=288.04K
+|
+37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 08
+|  row-size=60B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=08 plan-id=09 cohort-id=01
+|  |
+|  66:EXCHANGE [UNPARTITIONED]
+|  |
+|  65:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  64:EXCHANGE [UNPARTITIONED]
+|  |
+|  14:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  13:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 60, ss_quantity >= 41
+|     row-size=4B cardinality=288.04K
+|
+36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 09
+|  row-size=52B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=09 plan-id=10 cohort-id=01
+|  |
+|  63:EXCHANGE [UNPARTITIONED]
+|  |
+|  62:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  61:EXCHANGE [UNPARTITIONED]
+|  |
+|  12:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  11:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 10
+|  row-size=44B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=10 plan-id=11 cohort-id=01
+|  |
+|  60:EXCHANGE [UNPARTITIONED]
+|  |
+|  59:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  58:EXCHANGE [UNPARTITIONED]
+|  |
+|  10:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  09:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=8B cardinality=288.04K
+|
+34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 11
+|  row-size=36B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=11 plan-id=12 cohort-id=01
+|  |
+|  57:EXCHANGE [UNPARTITIONED]
+|  |
+|  56:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  55:EXCHANGE [UNPARTITIONED]
+|  |
+|  08:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  07:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 40, ss_quantity >= 21
+|     row-size=4B cardinality=288.04K
+|
+33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 12
+|  row-size=28B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=12 plan-id=13 cohort-id=01
+|  |
+|  54:EXCHANGE [UNPARTITIONED]
+|  |
+|  53:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  52:EXCHANGE [UNPARTITIONED]
+|  |
+|  06:AGGREGATE
+|  |  output: avg(ss_net_profit)
+|  |  row-size=8B cardinality=1
+|  |
+|  05:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 13
+|  row-size=20B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=13 plan-id=14 cohort-id=01
+|  |
+|  51:EXCHANGE [UNPARTITIONED]
+|  |
+|  50:AGGREGATE [FINALIZE]
+|  |  output: avg:merge(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  49:EXCHANGE [UNPARTITIONED]
+|  |
+|  04:AGGREGATE
+|  |  output: avg(ss_ext_discount_amt)
+|  |  row-size=8B cardinality=1
+|  |
+|  03:SCAN HDFS [tpcds.store_sales]
+|     HDFS partitions=1824/1824 files=1824 size=346.60MB
+|     predicates: ss_quantity <= 20, ss_quantity >= 1
+|     row-size=8B cardinality=288.04K
+|
+31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  join table id: 14
+|  row-size=12B cardinality=1
+|
+|--JOIN BUILD
+|  |  join-table-id=14 plan-id=15 cohort-id=01
+|  |
+|  48:EXCHANGE [UNPARTITIONED]
+|  |
+|  00:SCAN HDFS [tpcds.reason]
+|     HDFS partitions=1/1 files=1 size=1.31KB
+|     predicates: r_reason_sk = 1
+|     row-size=4B cardinality=1
+|
+47:AGGREGATE [FINALIZE]
+|  output: count:merge(*)
+|  row-size=8B cardinality=1
+|
+46:EXCHANGE [UNPARTITIONED]
+|
+02:AGGREGATE
+|  output: count(*)
+|  row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpcds.store_sales]
+   HDFS partitions=1824/1824 files=1824 size=346.60MB
+   predicates: ss_quantity <= 20, ss_quantity >= 1
+   row-size=4B cardinality=288.04K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
index c53d29c..000c8cf 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test
@@ -1039,3 +1039,176 @@ where int_col = (select f.id from (
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+# Basic scalar in select list
+select id, 1+(select min(id) from functional.alltypessmall)
+  from functional.alltypestiny
+order by id
+---- RESULTS
+0,1
+1,1
+2,1
+3,1
+4,1
+5,1
+6,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# No aggregate however limit 1
+select id, 1+(select id from functional.alltypessmall where id=0 group by id limit 1)
+  from functional.alltypestiny
+order by id
+---- RESULTS
+0,1
+1,1
+2,1
+3,1
+4,1
+5,1
+6,1
+7,1
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# outer join
+select s.id, (select count(id) from alltypes) from alltypestiny t
+  left outer join alltypessmall s on t.id = s.id where s.bool_col
+order by id
+---- RESULTS
+0,7300
+2,7300
+4,7300
+6,7300
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# correlated scalar currently unsupported
+select id, (select count(id) from functional.alltypessmall where id=t.id)
+  from functional.alltypestiny t
+order by id
+---- RESULTS
+---- CATCH
+A correlated scalar subquery is not supported in the expression:
+====
+---- QUERY
+# Uncorrelated Scalar Aggregate in select list combined with aggregation in parent query
+select
+max(a.id),
+(select max(id) from functional.alltypestiny),
+(select min(id) from functional.alltypestiny)
+from functional.alltypessmall a
+---- RESULTS
+99,7,0
+---- TYPES
+INT, INT, INT
+====
+---- QUERY
+# Empty tables or false predicate should result in NULLs
+# Count aggregates on empty return 0
+select id,
+  (select sum(f2) from functional.emptytable),
+  (select count(id) from functional.alltypestiny where id > 100)
+from functional.alltypestiny
+order by id
+---- RESULTS
+0,NULL,0
+1,NULL,0
+2,NULL,0
+3,NULL,0
+4,NULL,0
+5,NULL,0
+6,NULL,0
+7,NULL,0
+---- TYPES
+INT, BIGINT, BIGINT
+====
+---- QUERY
+# nested subquery over a view with an outer predicate on the subquery column
+select count(id) from
+  (select id,
+    (select count(*) as cnt from functional.alltypestiny where int_col
+    <= (select max(int_col) from functional.alltypes_view)) as c from functional.dimtbl) T
+  where t.c <10
+---- RESULTS
+10
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Empty tables should result in NULLs
+select id, id in (select min(id) from functional.alltypessmall)
+  from functional.alltypestiny
+order by id
+---- RESULTS
+0,true
+1,false
+2,false
+3,false
+4,false
+5,false
+6,false
+7,false
+---- TYPES
+INT, BOOLEAN
+====
+---- QUERY
+# With a join in the subquery
+select id,
+  (select count(*) from functional.alltypessmall join functional.alltypestiny using (id))
+  from functional.alltypestiny
+order by id
+---- RESULTS
+0,8
+1,8
+2,8
+3,8
+4,8
+5,8
+6,8
+7,8
+---- TYPES
+INT, BIGINT
+====
+---- QUERY
+# EXISTS predicates are handled at plantime so we need to special case their rewrites
+select id,
+  exists (select max(id) from dimtbl)
+  from functional.alltypestiny
+order by id
+---- RESULTS
+0,true
+1,true
+2,true
+3,true
+4,true
+5,true
+6,true
+7,true
+---- TYPES
+INT, BOOLEAN
+====
+---- QUERY
+# Multiple levels of exists
+SELECT id,
+NOT EXISTS (SELECT MAX(f2) FROM emptytable),
+EXISTS (SELECT MAX(id) FROM DIMTBL WHERE EXISTS (SELECT MAX(id) FROM alltypestiny)),
+(SELECT 1
+FROM alltypestiny t1
+WHERE EXISTS
+  (SELECT 1
+   FROM alltypestiny t2
+   WHERE t1.id = t2.id AND t1.int_col = t2.int_col
+   GROUP BY t2.id
+   HAVING count(1) = 1) LIMIT 1)
+FROM alltypessmall WHERE id < 2
+---- RESULTS
+0,false,true,1
+1,false,true,1
+---- TYPES
+INT, BOOLEAN, BOOLEAN, TINYINT
+====
diff --git a/testdata/workloads/tpcds/queries/count.test b/testdata/workloads/tpcds/queries/count.test
index 213c7a7..8ae49d4 100644
--- a/testdata/workloads/tpcds/queries/count.test
+++ b/testdata/workloads/tpcds/queries/count.test
@@ -159,4 +159,11 @@ select count(*) from web_site;
 30
 ---- TYPES
 BIGINT
-====
\ No newline at end of file
+====
+---- QUERY: TPCDS-COUNT-REASON
+select count(*) from reason;
+---- RESULTS
+35
+---- TYPES
+BIGINT
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test
new file mode 100644
index 0000000..82f4527
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test
@@ -0,0 +1,54 @@
+====
+---- QUERY: TPCDS-Q9
+select case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 1 and 20) > 74129
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 1 and 20)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 1 and 20) end bucket1 ,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 21 and 40) > 122840
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 21 and 40)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 21 and 40) end bucket2,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 41 and 60) > 56580
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 41 and 60)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 41 and 60) end bucket3,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 61 and 80) > 10097
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 61 and 80)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 61 and 80) end bucket4,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 81 and 100) > 165306
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 81 and 100)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- RESULTS
+39.645413,115.898138,191.634713,267.188000,341.986915
+---- TYPES
+DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
+====
diff --git a/testdata/workloads/tpcds/queries/tpcds-q9.test b/testdata/workloads/tpcds/queries/tpcds-q9.test
new file mode 100644
index 0000000..881cb68
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-q9.test
@@ -0,0 +1,54 @@
+====
+---- QUERY: TPCDS-Q9
+select case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 1 and 20) > 74129
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 1 and 20)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 1 and 20) end bucket1,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 21 and 40) > 122840
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 21 and 40)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 21 and 40) end bucket2,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 41 and 60) > 56580
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 41 and 60)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 41 and 60) end bucket3,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 61 and 80) > 10097
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 61 and 80)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 61 and 80) end bucket4,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 81 and 100) > 165306
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 81 and 100)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+---- RESULTS
+39.64,115.89,191.63,267.18,341.98
+---- TYPES
+DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
+====
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 512fbf5..41fa1a7 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -73,6 +73,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
   def test_tpcds_q8(self, vector):
     self.run_test_case(self.get_workload() + '-q8', vector)
 
+  def test_tpcds_q9(self, vector):
+    self.run_test_case(self.get_workload() + '-q9', vector)
+
   def test_tpcds_q10a(self, vector):
     self.run_test_case(self.get_workload() + '-q10a', vector)
 
@@ -342,6 +345,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
   def test_tpcds_q8(self, vector):
     self.run_test_case(self.get_workload() + '-decimal_v2-q8', vector)
 
+  def test_tpcds_q9(self, vector):
+    self.run_test_case(self.get_workload() + '-decimal_v2-q9', vector)
+
   def test_tpcds_q10a(self, vector):
     self.run_test_case(self.get_workload() + '-decimal_v2-q10a', vector)
 
diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py
index 4aeb00b..448b6ec 100644
--- a/tests/util/parse_util.py
+++ b/tests/util/parse_util.py
@@ -22,7 +22,7 @@ from datetime import datetime
 # changed, and the stress test loses the ability to run the full set of queries. Set
 # these constants and assert that when a workload is used, all the queries we expect to
 # use are there.
-EXPECTED_TPCDS_QUERIES_COUNT = 84
+EXPECTED_TPCDS_QUERIES_COUNT = 85
 EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
 EXPECTED_TPCH_QUERIES_COUNT = 22
 # Add the number of stress test specific queries, i.e. in files like '*-stress-*.test'