You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2015/05/10 04:41:03 UTC

[2/2] drill git commit: Address review comments. Modify costing for Cartesian Logical Join, to fix CanNotPlan in some TPCDS queries with view on text format.

Address review comments. Modify costing for Cartesian Logical Join, to fix CanNotPlan in some TPCDS queries with view on text format.

Code clean up.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a2963836
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a2963836
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a2963836

Branch: refs/heads/master
Commit: a296383632946a1f45a9b66d4638dab00a026d30
Parents: cd05500
Author: Jinfeng Ni <jn...@apache.org>
Authored: Sat May 9 15:29:00 2015 -0700
Committer: Jinfeng Ni <jn...@apache.org>
Committed: Sat May 9 16:42:43 2015 -0700

----------------------------------------------------------------------
 .../exec/planner/common/DrillJoinRelBase.java   | 63 ++++++++++----------
 .../exec/planner/logical/DrillRuleSets.java     | 13 ++--
 2 files changed, 35 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/a2963836/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
index 63fd497..7d3ad0a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
@@ -28,6 +28,7 @@ import org.apache.drill.exec.planner.cost.DrillCostBase;
 import org.apache.drill.exec.physical.impl.join.JoinUtils;
 import org.apache.drill.exec.physical.impl.join.JoinUtils.JoinCategory;
 import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory;
+import org.apache.drill.exec.planner.cost.DrillRelOptCost;
 import org.apache.drill.exec.planner.physical.PrelUtil;
 import org.apache.calcite.rel.InvalidRelException;
 import org.apache.calcite.rel.core.Join;
@@ -122,38 +123,26 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
   }
 
   protected  RelOptCost computeCartesianJoinCost(RelOptPlanner planner) {
-    double probeRowCount = RelMetadataQuery.getRowCount(this.getLeft());
-    double buildRowCount = RelMetadataQuery.getRowCount(this.getRight());
-
-    // cpu cost of hashing the join keys for the build side
-    double cpuCostBuild = DrillCostBase.HASH_CPU_COST * getRightKeys().size() * buildRowCount;
-    // cpu cost of hashing the join keys for the probe side
-    double cpuCostProbe = DrillCostBase.HASH_CPU_COST * getLeftKeys().size() * probeRowCount;
-
-    // cpu cost of evaluating each leftkey=rightkey join condition
-    double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * this.getLeftKeys().size();
+    final double probeRowCount = RelMetadataQuery.getRowCount(this.getLeft());
+    final double buildRowCount = RelMetadataQuery.getRowCount(this.getRight());
 
-    double factor = PrelUtil.getPlannerSettings(planner).getOptions()
-        .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val;
-    long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions()
-        .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
-
-    // table + hashValues + links
-    double memCost =
-        (
-            (fieldWidth * this.getRightKeys().size()) +
-                IntHolder.WIDTH +
-                IntHolder.WIDTH
-        ) * buildRowCount * factor;
+    final DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
 
-    double cpuCost = joinConditionCost * (probeRowCount * buildRowCount) // probe size determine the join condition comparison cost
-        + cpuCostBuild + cpuCostProbe ;
+    final double mulFactor = 10000; // This is a magic number,
+                                    // just to make sure Cartesian Join is more expensive
+                                    // than Non-Cartesian Join.
 
-    DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
+    final int keySize = 1 ;  // assume having 1 join key, when estimate join cost.
+    final DrillCostBase cost = (DrillCostBase) computeHashJoinCostWithKeySize(planner, keySize).multiplyBy(mulFactor);
 
-    final double mulFactor = 100000; // This is a magic number, just to make sure CartesianJoin is more expensive than Non-CartesianJoin.
+    // Cartesian join row count will be product of two inputs. The other factors come from the above estimated DrillCost.
+    return costFactory.makeCost(
+        buildRowCount * probeRowCount,
+        cost.getCpu(),
+        cost.getIo(),
+        cost.getNetwork(),
+        cost.getMemory() );
 
-    return costFactory.makeCost(buildRowCount * probeRowCount, cpuCost * mulFactor, 0, 0, memCost * mulFactor);
   }
 
   protected RelOptCost computeLogicalJoinCost(RelOptPlanner planner) {
@@ -168,16 +157,26 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
   }
 
   protected RelOptCost computeHashJoinCost(RelOptPlanner planner) {
+      return computeHashJoinCostWithKeySize(planner, this.getLeftKeys().size());
+  }
+
+  /**
+   *
+   * @param planner  : Optimization Planner.
+   * @param keySize  : the # of join keys in join condition. Left key size should be equal to right key size.
+   * @return         : RelOptCost
+   */
+  private RelOptCost computeHashJoinCostWithKeySize(RelOptPlanner planner, int keySize) {
     double probeRowCount = RelMetadataQuery.getRowCount(this.getLeft());
     double buildRowCount = RelMetadataQuery.getRowCount(this.getRight());
 
     // cpu cost of hashing the join keys for the build side
-    double cpuCostBuild = DrillCostBase.HASH_CPU_COST * getRightKeys().size() * buildRowCount;
+    double cpuCostBuild = DrillCostBase.HASH_CPU_COST * keySize * buildRowCount;
     // cpu cost of hashing the join keys for the probe side
-    double cpuCostProbe = DrillCostBase.HASH_CPU_COST * getLeftKeys().size() * probeRowCount;
+    double cpuCostProbe = DrillCostBase.HASH_CPU_COST * keySize * probeRowCount;
 
     // cpu cost of evaluating each leftkey=rightkey join condition
-    double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * this.getLeftKeys().size();
+    double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * keySize;
 
     double factor = PrelUtil.getPlannerSettings(planner).getOptions()
         .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val;
@@ -187,7 +186,7 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
     // table + hashValues + links
     double memCost =
         (
-            (fieldWidth * this.getRightKeys().size()) +
+            (fieldWidth * keySize) +
                 IntHolder.WIDTH +
                 IntHolder.WIDTH
         ) * buildRowCount * factor;
@@ -198,8 +197,8 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
     DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
 
     return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost);
-
   }
+
   private boolean hasScalarSubqueryInput() {
     if (JoinUtils.isScalarSubquery(this.getLeft())
         || JoinUtils.isScalarSubquery(this.getRight())) {

http://git-wip-us.apache.org/repos/asf/drill/blob/a2963836/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
index 212cffa..f7cfbf4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
@@ -124,20 +124,15 @@ public class DrillRuleSets {
       DrillPushFilterPastProjectRule.INSTANCE,
       DrillFilterJoinRules.DRILL_FILTER_ON_JOIN,
       DrillFilterJoinRules.DRILL_JOIN,
-//      JoinPushThroughJoinRule.RIGHT,
-//      JoinPushThroughJoinRule.LEFT,
       // End support for WHERE style joins.
 
       FilterMergeRule.INSTANCE,
-//      SwapJoinRule.INSTANCE,
-      AggregateRemoveRule.INSTANCE,   // RemoveDistinctRule
-//      UnionToDistinctRule.INSTANCE,
-      ProjectRemoveRule.NAME_CALC_INSTANCE,     // RemoveTrivialProjectRule
-//      RemoveTrivialCalcRule.INSTANCE,
-      SortRemoveRule.INSTANCE,      //RemoveSortRule.INSTANCE,
+      AggregateRemoveRule.INSTANCE,
+      ProjectRemoveRule.NAME_CALC_INSTANCE,
+      SortRemoveRule.INSTANCE,
 
       DrillMergeProjectRule.getInstance(true, RelFactories.DEFAULT_PROJECT_FACTORY, context.getFunctionRegistry()),
-      AggregateExpandDistinctAggregatesRule.INSTANCE, //RemoveDistinctAggregateRule.INSTANCE, //
+      AggregateExpandDistinctAggregatesRule.INSTANCE,
       DrillReduceAggregatesRule.INSTANCE,
 
       /*