You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2020/03/20 08:59:52 UTC

[GitHub] [incubator-doris] yangzhg commented on a change in pull request #3150: Support non-correlated subquery in having clause

yangzhg commented on a change in pull request #3150: Support non-correlated subquery in having clause
URL: https://github.com/apache/incubator-doris/pull/3150#discussion_r395505446
 
 

 ##########
 File path: fe/src/main/java/org/apache/doris/analysis/StmtRewriter.java
 ##########
 @@ -97,8 +103,128 @@ private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
             }
             rewriteWhereClauseSubqueries(stmt, analyzer);
         }
+        // Rewrite all subquery in the having clause
+        if (stmt.getHavingPred() != null && stmt.getHavingPred().getSubquery() != null) {
+            stmt = rewriteHavingClauseSubqueries(stmt, analyzer);
+        }
         stmt.sqlString_ = null;
         if (LOG.isDebugEnabled()) LOG.debug("rewritten stmt: " + stmt.toSql());
+        return stmt;
+    }
+
+    /**
+     * Rewrite having subquery.
+     * Step1: rewrite having subquery to where subquery
+     * Step2: rewrite where subquery
+     * <p>
+     * For example:
+     * select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk
+     * having sum(cs_sales_price) >
+     *        (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk);
+     * <p>
+     * Step1: rewrite having subquery to where subquery
+     * Outer query is changed to inline view in rewritten query
+     * Inline view of outer query:
+     *     from (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a
+     * Rewritten subquery of expr:
+     *     where a.sum_cs_sales_price >
+     *           (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk)
+     * Rewritten query:
+     *     select cs_item_sk, a.sum_cs_sales_price from
+     *     (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a
+     *     where a.sum_cs_sales_price >
+     *           (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk)
+     * <p>
+     * Step2: rewrite where subquery
+     * Inline view of subquery:
+     *     from (select b.cs_item_sk, min(cs_sales_price) from catalog_sales b group by cs_item_sk) c
+     * Rewritten correlated predicate:
+     *     where c.cs_item_sk = a.cs_item_sk and a.sum_cs_sales_price > c.min(cs_sales_price)
+     *
+     * @param stmt
+     * @param analyzer
+     */
+    private static SelectStmt rewriteHavingClauseSubqueries(SelectStmt stmt, Analyzer analyzer) throws AnalysisException {
+        // extract having predicate
+        Expr havingPredicate = stmt.getHavingPred();
+        Preconditions.checkState(havingPredicate != null);
+        Preconditions.checkState(havingPredicate.getSubquery() != null);
+        // extract result of stmt
+        List<Expr> leftExprList = stmt.getResultExprs();
+        // extract table alias generator
+        TableAliasGenerator tableAliasGenerator = stmt.getTableAliasGenerator();
+
+        /*
+         * The outer query is changed to inline view without having predicate
+         * For example:
+         * Query: select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk having ...;
+         * Inline view:
+         *     from (select cs_item_sk $ColumnA, sum(cs_sales_price) $ColumnB from catalog_sales a group by cs_item_sk) $TableA
+         */
+        SelectStmt inlineViewQuery = (SelectStmt) stmt.clone();
+        inlineViewQuery.reset();
+        inlineViewQuery.removeHavingClause();
+        // add a new alias for all of columns in subquery
+        List<String> colAliasOfInlineView = Lists.newArrayList();
+        for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); ++i) {
+            colAliasOfInlineView.add(inlineViewQuery.getColumnAliasGenerator().getNextAlias());
+        }
+        InlineViewRef inlineViewRef = new InlineViewRef(tableAliasGenerator.getNextAlias(), inlineViewQuery,
+                colAliasOfInlineView);
+        try {
+            inlineViewRef.analyze(analyzer);
+        } catch (UserException e) {
+            throw new AnalysisException(e.getMessage());
+        }
+        LOG.debug("Outer query is changed to " + inlineViewRef.tableRefToSql());
+
+        /*
+         * Columns which belong to outer query can substitute for output columns of inline view
+         * For example:
+         * Having predicate: sum(cs_sales_price) >
+         *                   (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk);
+         * Columns which belong to outer query: sum(cs_sales_price), a.cs_item_sk
+         * SMap: <cs_item_sk $ColumnA> <sum(cs_sales_price) $ColumnB>
+         * After substitute: $ColumnB >
+         *                   (select min(cs_sales_price) from catalog_sales b where $ColumnA = b.cs_item_sk)
+         */
+        /*
+         * Prepare select list of new query.
+         * Generate a new select item for each original columns in select list
+         */
+        havingPredicate.reset();
+        ExprSubstitutionMap smap = new ExprSubstitutionMap();
+        SelectList newSelectList = new SelectList();
+        for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); i++) {
+            Expr leftExpr = leftExprList.get(i);
+            Expr rightExpr = new SlotRef(inlineViewRef.getAliasAsName(), colAliasOfInlineView.get(i));
+            rightExpr.analyze(analyzer);
+            smap.put(leftExpr, rightExpr);
+            // construct outer query select list
+            SelectListItem selectListItem = new SelectListItem(rightExpr, stmt.getColLabels().get(i));
+            newSelectList.addItem(selectListItem);
+        }
+        Expr newWherePredicate = havingPredicate.substitute(smap, analyzer,false);
+        LOG.debug("Having predicate is changed to " + newWherePredicate.toSql());
+
+        // construct rewritten query
+        List<TableRef> newTableRefList = Lists.newArrayList();
+        newTableRefList.add(inlineViewRef);
+        FromClause newFromClause = new FromClause(newTableRefList);
+        SelectStmt result = new SelectStmt(newSelectList, newFromClause, newWherePredicate, null, null, null,
+                LimitElement.NO_LIMIT);
 
 Review comment:
   what if  the origin query is 
   ```
   select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk
        having sum(cs_sales_price) >
        (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk) limit 1;
   ```
   on limit after rewrite
   what about order by ?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org