You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2015/04/22 08:10:07 UTC

[04/13] drill git commit: DRILL-1384: Part 4 - Modify cost estimation for DrillProject and Drill Logical Join operator. Use cpu, io, network, meory cost in Drill cost compare method.

DRILL-1384: Part 4 - Modify cost estimation for DrillProject and Drill Logical Join operator. Use cpu, io, network, meory cost in Drill cost compare method.

Modify DrillProjectRelBase

Use a visitor to identify complex filed with named segment only in DrillProjectRelBase.

Modify cost estimation for Drill Logical Join operator. Use meory cost in Drill cost compare method.

Put the memory costing into DrillCostBase comparison.

Revise based on review comments.

Revise code based on review comments.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/4a3ad5f5
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/4a3ad5f5
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/4a3ad5f5

Branch: refs/heads/master
Commit: 4a3ad5f533b9915e254ee55a70fde6de4e87f8dc
Parents: b9d4d10
Author: Jinfeng Ni <jn...@maprtech.com>
Authored: Mon Mar 30 10:50:05 2015 -0700
Committer: Jinfeng Ni <jn...@apache.org>
Committed: Tue Apr 21 16:29:37 2015 -0700

----------------------------------------------------------------------
 .../exec/planner/common/DrillJoinRelBase.java   |  42 ++++++-
 .../planner/common/DrillProjectRelBase.java     | 109 ++++++++++++++++++-
 .../drill/exec/planner/cost/DrillCostBase.java  |  26 +++--
 .../planner/logical/DrillFilterJoinRules.java   |  17 ---
 .../exec/planner/logical/DrillRuleSets.java     |   9 +-
 .../exec/planner/physical/HashJoinPrel.java     |  34 +-----
 6 files changed, 166 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
index 592e95b..8dc5cf1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
@@ -22,6 +22,10 @@ import java.util.HashSet;
 import java.util.List;
 
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.expr.holders.IntHolder;
+import org.apache.drill.exec.planner.cost.DrillCostBase;
 import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory;
 import org.apache.drill.exec.planner.physical.PrelUtil;
 import org.apache.calcite.rel.InvalidRelException;
@@ -60,7 +64,10 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
       return ((DrillCostFactory)planner.getCostFactory()).makeInfiniteCost();
     }
 
-    return super.computeSelfCost(planner);
+    // We do not know which join method, i.e HASH-join or MergeJoin, will be used in Logical Planning.
+    // Here, we assume to use Hash-join, since this is a more commonly-used Join method in Drill.
+    return computeHashJoinCost(planner);
+    // return super.computeSelfCost(planner);
   }
 
   @Override
@@ -91,4 +98,37 @@ public abstract class DrillJoinRelBase extends Join implements DrillRelNode {
     return this.rightKeys;
   }
 
+  protected RelOptCost computeHashJoinCost(RelOptPlanner planner) {
+    double probeRowCount = RelMetadataQuery.getRowCount(this.getLeft());
+    double buildRowCount = RelMetadataQuery.getRowCount(this.getRight());
+
+    // cpu cost of hashing the join keys for the build side
+    double cpuCostBuild = DrillCostBase.HASH_CPU_COST * getRightKeys().size() * buildRowCount;
+    // cpu cost of hashing the join keys for the probe side
+    double cpuCostProbe = DrillCostBase.HASH_CPU_COST * getLeftKeys().size() * probeRowCount;
+
+    // cpu cost of evaluating each leftkey=rightkey join condition
+    double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * this.getLeftKeys().size();
+
+    double factor = PrelUtil.getPlannerSettings(planner).getOptions()
+        .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val;
+    long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions()
+        .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
+
+    // table + hashValues + links
+    double memCost =
+        (
+            (fieldWidth * this.getRightKeys().size()) +
+                IntHolder.WIDTH +
+                IntHolder.WIDTH
+        ) * buildRowCount * factor;
+
+    double cpuCost = joinConditionCost * (probeRowCount) // probe size determine the join condition comparison cost
+        + cpuCostBuild + cpuCostProbe ;
+
+    DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
+
+    return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost);
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
index 51b4e3f..42ef6ac 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
@@ -18,9 +18,20 @@
 package org.apache.drill.exec.planner.common;
 
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexDynamicParam;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexLocalRef;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexRangeRef;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.drill.common.expression.FieldReference;
 import org.apache.drill.common.expression.LogicalExpression;
 import org.apache.drill.common.logical.data.NamedExpression;
@@ -49,10 +60,13 @@ import com.google.common.collect.Lists;
  * Base class for logical and physical Project implemented in Drill
  */
 public abstract class DrillProjectRelBase extends Project implements DrillRelNode {
+  private final int nonSimpleFieldCount ;
+
   protected DrillProjectRelBase(Convention convention, RelOptCluster cluster, RelTraitSet traits, RelNode child, List<RexNode> exps,
       RelDataType rowType) {
     super(cluster, traits, child, exps, rowType, Flags.BOXED);
     assert getConvention() == convention;
+    nonSimpleFieldCount = this.getRowType().getFieldCount() - getSimpleFieldCount();
   }
 
   @Override
@@ -62,8 +76,9 @@ public abstract class DrillProjectRelBase extends Project implements DrillRelNod
     }
 
     // cost is proportional to the number of rows and number of columns being projected
-    double rowCount = RelMetadataQuery.getRowCount(this);
-    double cpuCost = DrillCostBase.PROJECT_CPU_COST * getRowType().getFieldCount();
+    double rowCount = nonSimpleFieldCount >0 ? RelMetadataQuery.getRowCount(this) : 0;
+    double cpuCost = DrillCostBase.PROJECT_CPU_COST * rowCount * nonSimpleFieldCount;
+
     DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory();
     return costFactory.makeCost(rowCount, cpuCost, 0, 0);
   }
@@ -100,4 +115,92 @@ public abstract class DrillProjectRelBase extends Project implements DrillRelNod
     return expressions;
   }
 
+  private int getSimpleFieldCount() {
+    int cnt = 0;
+
+    final ComplexFieldWithNamedSegmentIdentifier complexFieldIdentifer = new ComplexFieldWithNamedSegmentIdentifier();
+    // SimpleField, either column name, or complex field reference with only named segment ==> no array segment
+    // a, a.b.c are simple fields.
+    // a[1].b.c, a.b[1], a.b.c[1] are not simple fields, since they all contain array segment.
+    //  a + b, a * 10 + b, etc are not simple fields, since they are expressions.
+    for (RexNode expr : this.getProjects()) {
+      if (expr instanceof RexInputRef) {
+        // Simple Field reference.
+        cnt ++;
+      } else if (expr instanceof RexCall && expr.accept(complexFieldIdentifer)) {
+        // Complex field with named segments only.
+        cnt ++;
+      }
+    }
+    return cnt;
+  }
+
+  private static class ComplexFieldWithNamedSegmentIdentifier extends RexVisitorImpl<Boolean> {
+    protected ComplexFieldWithNamedSegmentIdentifier() {
+      super(true);
+    }
+
+    @Override
+    public Boolean visitInputRef(RexInputRef inputRef) {
+      return true;
+    }
+
+    @Override
+    public Boolean visitLocalRef(RexLocalRef localRef) {
+      return doUnknown(localRef);
+    }
+
+    @Override
+    public Boolean visitLiteral(RexLiteral literal) {
+      return doUnknown(literal);
+    }
+
+    @Override
+    public Boolean visitOver(RexOver over) {
+      return doUnknown(over);
+    }
+
+    @Override
+    public Boolean visitCorrelVariable(RexCorrelVariable correlVariable) {
+      return doUnknown(correlVariable);
+    }
+
+    @Override
+    public Boolean visitCall(RexCall call) {
+      if (call.getOperator() == SqlStdOperatorTable.ITEM) {
+        final RexNode op0 = call.getOperands().get(0);
+        final RexNode op1 = call.getOperands().get(1);
+
+        if (op0 instanceof RexInputRef &&
+            op1 instanceof RexLiteral && ((RexLiteral) op1).getTypeName() == SqlTypeName.CHAR) {
+          return true;
+        } else if (op0 instanceof RexCall &&
+            op1 instanceof RexLiteral && ((RexLiteral) op1).getTypeName() == SqlTypeName.CHAR) {
+          return op0.accept(this);
+        }
+      }
+
+      return false;
+    }
+
+    @Override
+    public Boolean visitDynamicParam(RexDynamicParam dynamicParam) {
+      return doUnknown(dynamicParam);
+    }
+
+    @Override
+    public Boolean visitRangeRef(RexRangeRef rangeRef) {
+      return doUnknown(rangeRef);
+    }
+
+    @Override
+    public Boolean visitFieldAccess(RexFieldAccess fieldAccess) {
+      return doUnknown(fieldAccess);
+    }
+
+    private boolean doUnknown(Object o) {
+      return false;
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillCostBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillCostBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillCostBase.java
index 0e032f9..229a1cb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillCostBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillCostBase.java
@@ -57,6 +57,9 @@ public class DrillCostBase implements DrillRelOptCost {
   // add to the end of hash chain if no match found
   public static final int HASH_CPU_COST = 8 * BASE_CPU_COST;
 
+  // The ratio to convert memory cost into CPU cost.
+  public static final double MEMORY_TO_CPU_RATIO = 1.0;
+
   public static final int RANGE_PARTITION_CPU_COST = 12 * BASE_CPU_COST;
 
   // cost of comparing one field with another (ignoring data types for now)
@@ -165,7 +168,8 @@ public class DrillCostBase implements DrillRelOptCost {
       || (this.cpu == Double.POSITIVE_INFINITY)
       || (this.io == Double.POSITIVE_INFINITY)
       || (this.network == Double.POSITIVE_INFINITY)
-      || (this.rowCount == Double.POSITIVE_INFINITY);
+      || (this.rowCount == Double.POSITIVE_INFINITY)
+      || (this.memory == Double.POSITIVE_INFINITY) ;
   }
 
   @Override
@@ -179,7 +183,8 @@ public class DrillCostBase implements DrillRelOptCost {
           && (this.cpu == ((DrillCostBase) other).cpu)
           && (this.io == ((DrillCostBase) other).io)
           && (this.network == ((DrillCostBase) other).network)
-          && (this.rowCount == ((DrillCostBase) other).rowCount));
+          && (this.rowCount == ((DrillCostBase) other).rowCount)
+          && (this.memory == ((DrillCostBase) other).memory));
   }
 
   @Override
@@ -192,7 +197,8 @@ public class DrillCostBase implements DrillRelOptCost {
       || ((Math.abs(this.cpu - that.cpu) < RelOptUtil.EPSILON)
           && (Math.abs(this.io - that.io) < RelOptUtil.EPSILON)
           && (Math.abs(this.network - that.network) < RelOptUtil.EPSILON)
-          && (Math.abs(this.rowCount - that.rowCount) < RelOptUtil.EPSILON));
+          && (Math.abs(this.rowCount - that.rowCount) < RelOptUtil.EPSILON)
+          && (Math.abs(this.memory - that.memory) < RelOptUtil.EPSILON));
   }
 
   @Override
@@ -200,20 +206,16 @@ public class DrillCostBase implements DrillRelOptCost {
     DrillCostBase that = (DrillCostBase) other;
 
     return this == that
-      || ( (this.cpu + this.io + this.network) <=
-           (that.cpu + that.io + that.network)
-          && this.rowCount <= that.rowCount
-         );
+      || ( (this.cpu + this.io + this.network + this.memory * DrillCostBase.MEMORY_TO_CPU_RATIO)
+        <= (that.cpu + that.io + that.network + that.memory * DrillCostBase.MEMORY_TO_CPU_RATIO));
   }
 
   @Override
   public boolean isLt(RelOptCost other) {
     DrillCostBase that = (DrillCostBase) other;
 
-     return ( (this.cpu + this.io + this.network) <
-             (that.cpu + that.io + that.network)
-            && this.rowCount <= that.rowCount
-           );
+     return  ( (this.cpu + this.io + this.network + this.memory * DrillCostBase.MEMORY_TO_CPU_RATIO)
+         < (that.cpu + that.io + that.network + that.memory * DrillCostBase.MEMORY_TO_CPU_RATIO) );
   }
 
   @Override
@@ -249,7 +251,7 @@ public class DrillCostBase implements DrillRelOptCost {
     if (this == INFINITY) {
       return this;
     }
-    return new DrillCostBase(rowCount * factor, cpu * factor, io * factor, network * factor);
+    return new DrillCostBase(rowCount * factor, cpu * factor, io * factor, network * factor, memory * factor);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillFilterJoinRules.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillFilterJoinRules.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillFilterJoinRules.java
index d2edfbd..2affb0c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillFilterJoinRules.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillFilterJoinRules.java
@@ -60,23 +60,6 @@ public class DrillFilterJoinRules {
             return true;
           }
 
-          // following logic applies to INNER join only.
-//          if (RelOptUtil.isEqui(join.getLeft(), join.getRight(), exp)) {
-//            return true;
-//          }
-//          if (exp instanceof RexCall) {
-//            final RexCall call = (RexCall) exp;
-//            final SqlKind kind = call.getKind();
-//            final List<RexNode> operands = call.getOperands();
-//
-//            if (  (kind == SqlKind.EQUALS
-//                || kind == SqlKind.IS_DISTINCT_FROM)
-//               && operands.get(0) instanceof RexInputRef
-//               && operands.get(1) instanceof RexInputRef) {
-//              return true;
-//            }
-//          }
-
           return false;
         }
       };

http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
index 68f9b20..d035def 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillRuleSets.java
@@ -23,7 +23,6 @@ import java.util.List;
 
 import org.apache.calcite.rel.rules.AggregateExpandDistinctAggregatesRule;
 import org.apache.calcite.rel.rules.AggregateRemoveRule;
-import org.apache.calcite.rel.rules.FilterJoinRule;
 import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
 import org.apache.calcite.rel.rules.ProjectRemoveRule;
 import org.apache.calcite.rel.rules.ReduceExpressionsRule;
@@ -120,10 +119,10 @@ public class DrillRuleSets {
         // Add support for WHERE style joins.
 //      PushFilterPastProjectRule.INSTANCE, // Replaced by DrillPushFilterPastProjectRule
       DrillPushFilterPastProjectRule.INSTANCE,
-      DrillFilterJoinRules.DRILL_FILTER_ON_JOIN, //FilterJoinRule.FILTER_ON_JOIN,   // PushFilterPastJoinRule
-      DrillFilterJoinRules.DRILL_JOIN, //FilterJoinRule.JOIN,             // PushFilterPastJoinRule
-      JoinPushThroughJoinRule.RIGHT,   // PushJoinThroughJoinRule
-      JoinPushThroughJoinRule.LEFT,    // PushJoinThroughJoinRule
+      DrillFilterJoinRules.DRILL_FILTER_ON_JOIN,
+      DrillFilterJoinRules.DRILL_JOIN,
+      JoinPushThroughJoinRule.RIGHT,
+      JoinPushThroughJoinRule.LEFT,
       // End support for WHERE style joins.
 
       //Add back rules

http://git-wip-us.apache.org/repos/asf/drill/blob/4a3ad5f5/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashJoinPrel.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashJoinPrel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashJoinPrel.java
index 9db8e7d..aca55a0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashJoinPrel.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashJoinPrel.java
@@ -22,17 +22,12 @@ import java.util.List;
 
 import org.apache.calcite.rel.core.Join;
 import org.apache.drill.common.logical.data.JoinCondition;
-import org.apache.drill.exec.ExecConstants;
-import org.apache.drill.exec.expr.holders.IntHolder;
 import org.apache.drill.exec.physical.base.PhysicalOperator;
 import org.apache.drill.exec.physical.config.HashJoinPOP;
-import org.apache.drill.exec.planner.cost.DrillCostBase;
-import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory;
 import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
 import org.apache.calcite.rel.InvalidRelException;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptCost;
 import org.apache.calcite.plan.RelOptPlanner;
@@ -72,34 +67,7 @@ public class HashJoinPrel  extends JoinPrel {
     if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
       return super.computeSelfCost(planner).multiplyBy(.1);
     }
-    double probeRowCount = RelMetadataQuery.getRowCount(this.getLeft());
-    double buildRowCount = RelMetadataQuery.getRowCount(this.getRight());
-
-    // cpu cost of hashing the join keys for the build side
-    double cpuCostBuild = DrillCostBase.HASH_CPU_COST * getRightKeys().size() * buildRowCount;
-    // cpu cost of hashing the join keys for the probe side
-    double cpuCostProbe = DrillCostBase.HASH_CPU_COST * getLeftKeys().size() * probeRowCount;
-
-    // cpu cost of evaluating each leftkey=rightkey join condition
-    double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * this.getLeftKeys().size();
-
-    double cpuCost = joinConditionCost * (buildRowCount + probeRowCount) + cpuCostBuild + cpuCostProbe;
-
-    double factor = PrelUtil.getPlannerSettings(planner).getOptions()
-      .getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val;
-    long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions()
-      .getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
-
-    // table + hashValues + links
-    double memCost =
-      (
-        (fieldWidth * this.getRightKeys().size()) +
-          IntHolder.WIDTH +
-          IntHolder.WIDTH
-      ) * buildRowCount * factor;
-
-    DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
-    return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost);
+    return computeHashJoinCost(planner);
   }
 
   @Override