You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/05/19 11:38:21 UTC

[GitHub] [incubator-doris] jackwener commented on a diff in pull request #9686: push predicate to subquery

jackwener commented on code in PR #9686:
URL: https://github.com/apache/incubator-doris/pull/9686#discussion_r876916819


##########
fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java:
##########
@@ -655,6 +666,224 @@ private void parse() throws AnalysisException, DdlException {
         redirectStatus = parsedStmt.getRedirectStatus();
     }
 
+    /**
+     * TPCH q17:
+     * select
+     *     sum(l_extendedprice) / 7.0 as avg_yearly
+     * from
+     *     lineitem,
+     *     part
+     * where
+     *     p_partkey = l_partkey
+     *     and p_brand = 'Brand#23'
+     *     and p_container = 'MED BOX'
+     *     and l_quantity < (
+     *         select
+     *             0.2 * avg(l_quantity)
+     *         from
+     *             lineitem
+     *         where
+     *             l_partkey = p_partkey
+     *     );
+     *
+     * after pushPredicateToSubquery(), the scan rows of lineitem in subquery will much less than old q17
+     * as well as the less row count for group node
+     *
+     * TPCH Q17(converted):
+     * select
+     *     sum(l_extendedprice) / 7.0 as avg_yearly
+     * from
+     *     lineitem,
+     *     part p1
+     * where
+     *     p_partkey = l_partkey
+     *     and p_brand = 'Brand#23'
+     *     and p_container = 'MED BOX'
+     *     and l_quantity < (
+     *         select
+     *             0.2 * avg(l_quantity)
+     *         from
+     *             lineitem,
+     *             part p2
+     *         where
+     *             l_partkey = p1.p_partkey
+     *             and p2.p_partkey = l_partkey
+     *             and p2.p_brand = 'Brand#23'
+     *             and p2.p_container = 'MED BOX'
+     *     );
+     */
+    private void pushPredicateToSubquery() throws UserException {
+        // only support 1-layer subqueries, we don't look for predicate in subquery's subquery etc...
+        // TODO: although we might push some predicate to subquery to minimize the original scan row count
+        // TODO: it will also introduce a new table and a new join node, which results more rows to scan, filter and join.
+        // TODO: in TPCH Q2 and Q17, the pushed table is relatively small, so get better performance after pushing.
+        // TODO: But normally, a CBO optimizer is needed to decide if the pushing is worth.
+        // TODO: At last, if table spool optimizer is implemented, we can always push the predicates.
+        if (parsedStmt instanceof SelectStmt) {
+            SelectStmt stmt = (SelectStmt) parsedStmt;
+            if (stmt.getTableRefs().size() < 2 || stmt.getWhereClause() == null) {
+                // no need to push predicate if table refs count is 1 or no where clause
+                return;
+            }
+            List<Expr> conjuncts = stmt.getWhereClause().getConjuncts();
+            List<Subquery> subqueries = findSubqueryInConjuncts(conjuncts);
+            for (Subquery query : subqueries) {
+                QueryStmt queryStmt = query.getStatement();
+                if (queryStmt instanceof SelectStmt) {
+                    SelectStmt selectStmt = (SelectStmt) queryStmt;
+                    Pair<SlotRef, SlotRef> correlatedSlotRefs = findAndModifyCorrelatedSlotsInSubquery(selectStmt);
+                    if (correlatedSlotRefs != null) {
+                        String tmpTableSuffix = "_" + UUID.randomUUID().toString();
+                        List<Expr> predicates = findSingleColumnPredicateToPush(conjuncts, correlatedSlotRefs.first, tmpTableSuffix);
+                        if (!predicates.isEmpty()) {
+                            // add a new tableRef in subquery
+                            TableName tableNameInSchema = new TableName(correlatedSlotRefs.first.getTableName().getDb(), correlatedSlotRefs.first.getTable().getName());
+                            TableRef tableRef = new TableRef(tableNameInSchema, correlatedSlotRefs.first.getTableName().getTbl() + tmpTableSuffix);
+
+                            List<TableRef> newTableRefs = selectStmt.getTableRefs();
+                            newTableRefs.add(tableRef);
+
+                            predicates.add(new BinaryPredicate(BinaryPredicate.Operator.EQ,
+                                    new SlotRef(new TableName(null, correlatedSlotRefs.first.getTableName().getTbl() + tmpTableSuffix),
+                                            correlatedSlotRefs.first.getColumnName()), correlatedSlotRefs.second));
+                            predicates.add(selectStmt.getWhereClause());
+                            selectStmt.setWhereClause(convertConjunctsToAndCompoundPredicate(predicates));
+
+                            Analyzer analyzer = new Analyzer(selectStmt.getAnalyzer().getParentAnalyzer());
+                            analyzer.setIsSubquery();
+                            selectStmt.reset();
+                            selectStmt.analyze(analyzer);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private Expr convertConjunctsToAndCompoundPredicate(List<Expr> conjuncts) {
+        List<Expr> targetConjuncts = Lists.newArrayList(conjuncts);
+        while (targetConjuncts.size() > 1) {
+            List<Expr> newTargetConjuncts = Lists.newArrayList();
+            for (int i = 0; i < targetConjuncts.size(); i += 2) {
+                Expr expr = i + 1 < targetConjuncts.size() ? new CompoundPredicate(CompoundPredicate.Operator.AND, targetConjuncts.get(i),
+                        targetConjuncts.get(i + 1)) : targetConjuncts.get(i);
+                newTargetConjuncts.add(expr);
+            }
+            targetConjuncts = newTargetConjuncts;
+        }
+        return targetConjuncts.get(0);
+    }

Review Comment:
   It's better to add it into `PredicateUtils`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org