You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/05/30 13:17:16 UTC

[GitHub] [incubator-doris] 924060929 commented on a diff in pull request #9807: [Enhancement](Nereids)rewrite framework used in Memo

924060929 commented on code in PR #9807:
URL: https://github.com/apache/incubator-doris/pull/9807#discussion_r884801218


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/RewriteTopDownJob.java:
##########
@@ -21,20 +21,64 @@
 import org.apache.doris.nereids.jobs.Job;
 import org.apache.doris.nereids.jobs.JobType;
 import org.apache.doris.nereids.memo.Group;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.pattern.GroupExpressionMatching;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.trees.TreeNode;
+
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+import java.util.Objects;
 
 /**
  * Top down job for rewrite, use pattern match.
  */
-public class RewriteTopDownJob extends Job {
+public class RewriteTopDownJob<NODE_TYPE extends TreeNode> extends Job<NODE_TYPE> {
     private final Group group;
+    private final List<Rule<NODE_TYPE>> rules;
+    private final boolean childrenOptimized;
+
+    public RewriteTopDownJob(Group group, List<Rule<NODE_TYPE>> rules, PlannerContext context) {
+        this(group, rules, context, false);
+    }
 
-    public RewriteTopDownJob(Group group, PlannerContext context) {
-        super(JobType.TOP_DOWN_REWRITE, context);
-        this.group = group;
+    private RewriteTopDownJob(Group group, List<Rule<NODE_TYPE>> rules,
+            PlannerContext context, boolean childrenOptimized) {
+        super(JobType.BOTTOM_UP_REWRITE, context);
+        this.group = Objects.requireNonNull(group, "group cannot be null");
+        this.rules = Objects.requireNonNull(rules, "rules cannot be null");
+        this.childrenOptimized = childrenOptimized;
     }
 
     @Override
     public void execute() {
+        GroupExpression logicalExpression = group.getLogicalExpression();
+
+        List<Rule<NODE_TYPE>> validRules = getValidRules(logicalExpression, rules);
+        for (Rule<NODE_TYPE> rule : validRules) {
+            GroupExpressionMatching<NODE_TYPE> groupExpressionMatching
+                    = new GroupExpressionMatching<>(rule.getPattern(), logicalExpression);
+            for (NODE_TYPE before : groupExpressionMatching) {
+                List<NODE_TYPE> afters = rule.transform(before, context);
+                Preconditions.checkArgument(afters.size() == 1);
+                NODE_TYPE after = afters.get(0);
+                if (after != before) {
+                    context.getOptimizerContext().getMemo().copyIn(after, group, rule.isRewrite());
+                    pushTask(new RewriteTopDownJob<>(group, rules, context, false));
+                    return;
+                }
+            }
+            logicalExpression.setApplied(rule);
+        }
+
+        if (!childrenOptimized) {
+            for (Group childGroup : logicalExpression.children()) {
+                pushTask(new RewriteTopDownJob<>(childGroup, rules, context, false));
 
+                pushTask(new RewriteTopDownJob<>(group, rules, context, true));

Review Comment:
   Move this line before the loop



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/AbstractTreeNode.java:
##########
@@ -33,15 +36,43 @@
 
     protected final NodeType type;
     protected final List<TreeNode> children;
+    protected final GroupExpression groupExpression;
+
 
     public AbstractTreeNode(NodeType type, TreeNode... children) {
-        this.type = type;
-        this.children = ImmutableList.copyOf(children);
+        this(type, null, children);
     }
 
-    public AbstractTreeNode(NodeType type, List<NODE_TYPE> children) {
+    /**
+     * Constructor for plan node.
+     *
+     * @param type node type
+     * @param groupExpression group expression related to the operator of this node
+     * @param children children of this node
+     */
+    public AbstractTreeNode(NodeType type, GroupExpression groupExpression, TreeNode... children) {
         this.type = type;
-        this.children = ImmutableList.copyOf(children);
+        if (children.length != 0 && children[0] == null) {

Review Comment:
   The special handling of null is too tricky, we should use an dummy Plan denote empty children or don't care children for temporary



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java:
##########
@@ -17,65 +17,142 @@
 
 package org.apache.doris.nereids.memo;
 
+import org.apache.doris.nereids.trees.TreeNode;
+import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.plans.Plan;
-import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.Maps;
 
 import java.util.List;
-import java.util.Set;
+import java.util.Map;
 
 /**
  * Representation for memo in cascades optimizer.
+ *
+ * @param <NODE_TYPE> should be {@link Plan} or {@link Expression}
  */
-public class Memo {
+public class Memo<NODE_TYPE extends TreeNode> {
     private final List<Group> groups = Lists.newArrayList();
-    private final Set<GroupExpression> groupExpressions = Sets.newHashSet();
-    private Group rootSet;
+    // we could not use Set, because Set has no get method.
+    private final Map<GroupExpression, GroupExpression> groupExpressions = Maps.newHashMap();
+    private Group root;
 
-    public void initialize(LogicalPlan plan) {
-        rootSet = newGroupExpression(plan, null).getParent();
+    public void initialize(NODE_TYPE node) {
+        root = copyIn(node, null, false).getParent();
     }
 
-    public Group getRootSet() {
-        return rootSet;
+    public Group getRoot() {
+        return root;
     }
 
     /**
-     * Add plan to Memo.
+     * Add node to Memo.
      *
-     * @param plan   {@link Plan} to be added
-     * @param target target group to add plan. null to generate new Group
-     * @return Reference of plan in Memo
+     * @param node {@link Plan} or {@link Expression} to be added
+     * @param target target group to add node. null to generate new Group
+     * @param rewrite whether to rewrite the node to the target group
+     * @return Reference of node in Memo
      */
-    // TODO: need to merge PlanRefSet if new PlanRef is same with some one already in memo
-    public GroupExpression newGroupExpression(Plan<?, ?> plan, Group target) {
-        List<GroupExpression> childGroupExpr = Lists.newArrayList();
-        for (Plan<?, ?> childrenPlan : plan.children()) {
-            childGroupExpr.add(newGroupExpression(childrenPlan, null));
+    public GroupExpression copyIn(NODE_TYPE node, Group target, boolean rewrite) {
+        Preconditions.checkArgument(!rewrite || target != null);
+        List<Group> childrenGroups = Lists.newArrayList();
+        for (Object object : node.children()) {
+            NODE_TYPE child = (NODE_TYPE) object;
+            childrenGroups.add(copyIn(child, null, rewrite).getParent());
         }
-        GroupExpression newGroupExpression = new GroupExpression(plan);
-        for (GroupExpression childReference : childGroupExpr) {
-            newGroupExpression.addChild(childReference.getParent());
+        if (node.getGroupExpression() != null && groupExpressions.containsKey(node.getGroupExpression())) {
+            return node.getGroupExpression();
         }
-
-        return insertGroupExpression(newGroupExpression, target);
+        GroupExpression newGroupExpression = new GroupExpression(node.getOperator());
+        newGroupExpression.setChildren(childrenGroups);
+        return insertOrRewriteGroupExpression(newGroupExpression, target, rewrite);
+        // TODO: need to derive logical property if generate new group. currently we not copy logical plan into
     }
 
-    private GroupExpression insertGroupExpression(GroupExpression groupExpression, Group target) {
-        if (groupExpressions.contains(groupExpression)) {
-            return groupExpression;
+    /**
+     * Insert or rewrite groupExpression to target group.
+     * If group expression is already in memo and target group is not null, we merge two groups.
+     * If target is null, generate new group.
+     * If rewrite is true, rewrite the groupExpression to target group.
+     *
+     * @param groupExpression groupExpression to insert
+     * @param target target group to insert or rewrite groupExpression
+     * @param rewrite whether to rewrite the groupExpression to target group
+     * @return existing groupExpression in memo or newly generated groupExpression
+     */
+    private GroupExpression insertOrRewriteGroupExpression(
+            GroupExpression groupExpression, Group target, boolean rewrite) {
+        GroupExpression existedGroupExpression = groupExpressions.get(groupExpression);
+        if (existedGroupExpression != null) {
+            if (target != null && !target.getGroupId().equals(existedGroupExpression.getParent().getGroupId())) {
+                mergeGroup(target, existedGroupExpression.getParent());
+            }
+            return existedGroupExpression;
         }
-
-        groupExpressions.add(groupExpression);
-
         if (target != null) {
-            target.addGroupExpression(groupExpression);
+            if (rewrite) {
+                GroupExpression oldExpression = target.rewriteLogicalExpression(groupExpression);
+                groupExpressions.remove(oldExpression);
+            } else {
+                target.addGroupExpression(groupExpression);
+            }
         } else {
             Group group = new Group(groupExpression);
+            Preconditions.checkArgument(!groups.contains(group), "new group with already exist output");
             groups.add(group);
         }
+        groupExpressions.put(groupExpression, groupExpression);
         return groupExpression;
     }
+
+    /**
+     * Merge two groups.
+     * 1. find all group expression which has source as child
+     * 2. replace its child with destination
+     * 3. remove redundant group expression after replace child
+     * 4. move all group expression in source to destination
+     *
+     * @param source source group
+     * @param destination destination group
+     */
+    private void mergeGroup(Group source, Group destination) {
+        if (source.equals(destination)) {
+            return;
+        }
+        List<GroupExpression> needReplaceChild = Lists.newArrayList();
+        for (GroupExpression groupExpression : groupExpressions.values()) {
+            if (groupExpression.children().contains(source)) {
+                if (groupExpression.getParent().equals(destination)) {
+                    // cycle, we should not merge
+                    return;
+                }
+                needReplaceChild.add(groupExpression);
+            }
+        }
+        for (GroupExpression groupExpression : needReplaceChild) {

Review Comment:
   mergeGroup function look seems that the performance is poor, because iterative all GroupExpr, find and remove the GroupExpr. Maybe we should not save all groupExpr in the list?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java:
##########
@@ -53,20 +54,19 @@ public ApplyRuleJob(GroupExpression groupExpression, Rule<Plan> rule, PlannerCon
 
     @Override
     public void execute() throws AnalysisException {
-        if (groupExpression.hasExplored(rule)) {
+        if (groupExpression.hasApplied(rule)) {
             return;
         }
 
         // TODO: need to find all plan reference tree that match this pattern
-        PatternMatching patternMatching = new PatternMatching();
-        for (Plan<?, ?> plan : patternMatching) {
-            if (!rule.check(plan, context)) {
-                continue;
-            }
-            List<Plan> newPlanList = rule.transform(plan, context);
-            for (Plan newPlan : newPlanList) {
+        GroupExpressionMatching<Plan> groupExpressionMatching
+                = new GroupExpressionMatching(rule.getPattern(), groupExpression);
+        for (TreeNode treeNode : groupExpressionMatching) {
+            Plan plan = (Plan) treeNode;
+            List<Plan> newPlans = rule.transform(plan, context);
+            for (Plan newPlan : newPlans) {
                 GroupExpression newGroupExpression = context.getOptimizerContext().getMemo()
-                        .newGroupExpression(newPlan, groupExpression.getParent());
+                        .copyIn(newPlan, groupExpression.getParent(), rule.isRewrite());
                 // TODO need to check return is a new Reference, other wise will be into a dead loop
                 if (newPlan instanceof LogicalPlan) {
                     pushTask(new DeriveStatsJob(newGroupExpression, context));

Review Comment:
   missing 'return' if (exploredOnly == true) ?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/pattern/GroupExpressionMatching.java:
##########
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.pattern;
+
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.trees.TreeNode;
+import org.apache.doris.nereids.trees.Utils;
+
+import com.google.common.collect.Lists;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+
+/**
+ * Get all pattern matching subtree in query plan from a group expression.
+ */
+public class GroupExpressionMatching<NODE_TYPE extends TreeNode> implements Iterable<NODE_TYPE> {
+    private final Pattern pattern;
+    private final GroupExpression groupExpression;
+
+    public GroupExpressionMatching(Pattern pattern, GroupExpression groupExpression) {
+        this.pattern = Objects.requireNonNull(pattern);
+        this.groupExpression = Objects.requireNonNull(groupExpression);
+    }
+
+    @Override
+    public GroupExpressionIterator<NODE_TYPE> iterator() {
+        return new GroupExpressionIterator<>(pattern, groupExpression);
+    }
+
+    /**
+     * Iterator to get all subtrees.
+     */
+    public static class GroupExpressionIterator<NODE_TYPE extends TreeNode> implements Iterator<NODE_TYPE> {
+        private final List<NODE_TYPE> results;

Review Comment:
   Save all combination plan in the iterator maybe oom, because the exponential relationship between the number of plan combinations and the number of layers. We should optimize it later.



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java:
##########
@@ -23,18 +23,22 @@
 public enum RuleType {
     // binding rules
     BINDING_UNBOUND_RELATION_RULE,
+    BINDING_SENTINEL,

Review Comment:
   We should use a enum RuleClass to avoid sentinel. e.g.
   ```java
   enum RuleClass {
     BIND_RULE, REWRITE_RULE,  EXPLORATION_RULE...
   }
   
   public enum RuleType {
      BINDING_UNBOUND_RELATION_RULE(BIND_RULE),
      LOGICAL_JOIN_COMMUTATIVE(EXPLORATION_RULE)
      ...
   }
   ```
   
   



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/RewriteBottomUpJob.java:
##########
@@ -17,24 +17,68 @@
 
 package org.apache.doris.nereids.jobs.rewrite;
 
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.nereids.PlannerContext;
 import org.apache.doris.nereids.jobs.Job;
 import org.apache.doris.nereids.jobs.JobType;
 import org.apache.doris.nereids.memo.Group;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.pattern.GroupExpressionMatching;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.trees.TreeNode;
+
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+import java.util.Objects;
 
 /**
  * Bottom up job for rewrite, use pattern match.
  */
-public class RewriteBottomUpJob extends Job {
+public class RewriteBottomUpJob<NODE_TYPE extends TreeNode> extends Job<NODE_TYPE> {
     private final Group group;
+    private final List<Rule<NODE_TYPE>> rules;
+    private final boolean childrenOptimized;
 
-    public RewriteBottomUpJob(Group group, PlannerContext context) {
+    public RewriteBottomUpJob(Group group, List<Rule<NODE_TYPE>> rules, PlannerContext context) {
+        this(group, rules, context, false);
+    }
+
+    private RewriteBottomUpJob(Group group, List<Rule<NODE_TYPE>> rules,
+            PlannerContext context, boolean childrenOptimized) {
         super(JobType.BOTTOM_UP_REWRITE, context);
-        this.group = group;
+        this.group = Objects.requireNonNull(group, "group cannot be null");
+        this.rules = Objects.requireNonNull(rules, "rules cannot be null");
+        this.childrenOptimized = childrenOptimized;
     }
 
     @Override
-    public void execute() {
+    public void execute() throws AnalysisException {
+        GroupExpression logicalExpression = group.getLogicalExpression();
+        if (!childrenOptimized) {
+            for (Group childGroup : logicalExpression.children()) {
+                pushTask(new RewriteBottomUpJob<>(childGroup, rules, context, false));
+
+                pushTask(new RewriteBottomUpJob<>(group, rules, context, true));

Review Comment:
   Move this line before the loop



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org