You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by hy...@apache.org on 2020/07/01 14:55:49 UTC

[calcite] branch master updated: [CALCITE-4097] Avoid requesting unnecessary trait request when deriving traits

This is an automated email from the ASF dual-hosted git repository.

hyuan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 640da7c  [CALCITE-4097] Avoid requesting unnecessary trait request when deriving traits
640da7c is described below

commit 640da7c6d85c3e83fe38fb45d7f23ef5e1000c4e
Author: Haisheng Yuan <h....@alibaba-inc.com>
AuthorDate: Tue Jun 30 20:12:25 2020 -0500

    [CALCITE-4097] Avoid requesting unnecessary trait request when deriving traits
    
    If the child subset is used to derive new traits for current relnode, the
    subset will be marked REQUIRED when registering the new derived relnode and
    later will add enforcers between other delivered subsets.  e.g. a MergeJoin
    request both inputs hash distributed by [a,b] sorted by [a,b]. If the left
    input R1 happens to be distributed by [a], the MergeJoin can derive new traits
    from this input and request both input to be distributed by [a] sorted by
    [a,b]. In case there is a alternative R2 with ANY distribution in the left
    input's RelSet, we end up with requesting hash distribution [a] on alternative
    R2, which is unnecessary and waste, because we request distribution by [a]
    because of R1 can deliver the exact same distribution and we don't need to
    enforce properties on other subsets that can't satisfy the specific trait
    requirement.
---
 .../apache/calcite/plan/volcano/OptimizeTask.java  | 29 +++++++++++++++++++++-
 .../org/apache/calcite/plan/volcano/RelSet.java    |  8 +++---
 .../org/apache/calcite/plan/volcano/RelSubset.java | 17 +++++++++++++
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java b/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java
index 71fb037..c72e2b8 100644
--- a/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java
+++ b/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java
@@ -199,7 +199,6 @@ abstract class OptimizeTask {
           RelSubset subset = input.set.subsets.get(j);
           if (!subset.isDelivered() || equalsSansConvention(
               subset.getTraitSet(), rel.getCluster().traitSet())) {
-            // TODO: should use matching type to determine
             // Ideally we should stop deriving new relnodes when the
             // subset's traitSet equals with input traitSet, but
             // in case someone manually builds a physical relnode
@@ -227,6 +226,34 @@ abstract class OptimizeTask {
           } else {
             RelNode newRel = rel.derive(subset.getTraitSet(), childId);
             if (newRel != null && !planner.isRegistered(newRel)) {
+              RelNode newInput = newRel.getInput(childId);
+              assert newInput instanceof RelSubset;
+              if (newInput == subset) {
+                // If the child subset is used to derive new traits for
+                // current relnode, the subset will be marked REQUIRED
+                // when registering the new derived relnode and later
+                // will add enforcers between other delivered subsets.
+                // e.g. a MergeJoin request both inputs hash distributed
+                // by [a,b] sorted by [a,b]. If the left input R1 happens to
+                // be distributed by [a], the MergeJoin can derive new
+                // traits from this input and request both input to be
+                // distributed by [a] sorted by [a,b]. In case there is a
+                // alternative R2 with ANY distribution in the left input's
+                // RelSet, we may end up with requesting hash distribution
+                // [a] on alternative R2, which is unnecessary and waste,
+                // because we request distribution by [a] because of R1 can
+                // deliver the exact same distribution and we don't need to
+                // enforce properties on other subsets that can't satisfy
+                // the specific trait requirement.
+                // Here we add a constraint that {@code newInput == subset},
+                // because if the delivered child subset is HASH[a], but
+                // we require HASH[a].SORT[a,b], we still need to enable
+                // property enforcement on the required subset. Otherwise,
+                // we need to restrict enforcement between HASH[a].SORT[a,b]
+                // and HASH[a] only, which will make things a little bit
+                // complicated. We might optimize it in the future.
+                subset.disableEnforcing();
+              }
               RelSubset relSubset = planner.register(newRel, node);
               assert relSubset.set == planner.getSubset(node).set;
             }
diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java b/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java
index 833aeba..5d20104 100644
--- a/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java
+++ b/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java
@@ -236,9 +236,11 @@ class RelSet {
         to = subset;
       }
 
-      if (from == to || useAbstractConverter
-          && !from.getConvention().useAbstractConvertersForConversion(
-              from.getTraitSet(), to.getTraitSet())) {
+      if (from == to
+          || to.isEnforceDisabled()
+          || useAbstractConverter
+              && !from.getConvention().useAbstractConvertersForConversion(
+                  from.getTraitSet(), to.getTraitSet())) {
         continue;
       }
 
diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java b/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java
index 72d423b..f77b5c7 100644
--- a/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java
+++ b/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java
@@ -120,6 +120,14 @@ public class RelSubset extends AbstractRelNode {
    */
   boolean triggerRule = false;
 
+  /**
+   * When the subset state is REQUIRED, whether enable property enforcing
+   * between this subset and other delivered subsets. When it is true,
+   * no enforcer operators will be added even if the other subset can't
+   * satisfy current subset's required traitSet.
+   */
+  private boolean enforceDisabled = false;
+
   //~ Constructors -----------------------------------------------------------
 
   RelSubset(
@@ -179,6 +187,15 @@ public class RelSubset extends AbstractRelNode {
     return (state & REQUIRED) == REQUIRED;
   }
 
+  void disableEnforcing() {
+    assert isDelivered();
+    enforceDisabled = true;
+  }
+
+  boolean isEnforceDisabled() {
+    return enforceDisabled;
+  }
+
   public RelNode getBest() {
     return best;
   }