You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2014/06/20 22:24:43 UTC

[07/32] git commit: DRILL-1023: Fix issue where joins are over estimated causing excess parallelization. Use correct interfaces for retrieving row counts.

DRILL-1023: Fix issue where joins are over estimated causing excess parallelization.  Use correct interfaces for retrieving row counts.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/a88ebb27
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/a88ebb27
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/a88ebb27

Branch: refs/heads/master
Commit: a88ebb27d405305f41a72efe2fbee7305dc25ba8
Parents: 0dec032
Author: Jacques Nadeau <ja...@apache.org>
Authored: Tue Jun 17 16:02:54 2014 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Wed Jun 18 21:37:46 2014 -0700

----------------------------------------------------------------------
 .../exec/planner/common/DrillJoinRelBase.java    |  9 +++++++--
 .../exec/planner/physical/AggPruleBase.java      |  2 +-
 .../planner/physical/PhysicalPlanCreator.java    |  2 +-
 .../exec/planner/physical/PlannerSettings.java   |  6 ++++++
 .../visitor/ExcessiveExchangeIdentifier.java     |  3 +--
 .../exec/server/options/SystemOptionManager.java |  1 +
 .../exec/server/options/TypeValidators.java      | 19 +++++++++++++++++++
 7 files changed, 36 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
index 80f767c..3b3aa1a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java
@@ -22,6 +22,7 @@ import java.util.HashSet;
 import java.util.List;
 
 import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory;
+import org.apache.drill.exec.planner.physical.PrelUtil;
 import org.eigenbase.rel.InvalidRelException;
 import org.eigenbase.rel.JoinRelBase;
 import org.eigenbase.rel.JoinRelType;
@@ -41,10 +42,12 @@ import com.google.common.collect.Lists;
 public abstract class DrillJoinRelBase extends JoinRelBase implements DrillRelNode {
   protected List<Integer> leftKeys = Lists.newArrayList();
   protected List<Integer> rightKeys = Lists.newArrayList() ;
+  private final double joinRowFactor;
 
   public DrillJoinRelBase(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition,
       JoinRelType joinType) throws InvalidRelException {
     super(cluster, traits, left, right, condition, joinType, Collections.<String> emptySet());
+    this.joinRowFactor = PrelUtil.getPlannerSettings(cluster.getPlanner()).getRowCountEstimateFactor();
   }
 
   @Override
@@ -55,8 +58,10 @@ public abstract class DrillJoinRelBase extends JoinRelBase implements DrillRelNo
     return super.computeSelfCost(planner);
   }
 
-
-
+  @Override
+  public double getRows() {
+    return joinRowFactor * Math.max(this.getLeft().getRows(), this.getRight().getRows());
+  }
 
   /**
    * Returns whether there are any elements in common between left and right.

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java
index 1b1cc94..7b7e3b7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java
@@ -62,7 +62,7 @@ public abstract class AggPruleBase extends Prule {
   protected boolean create2PhasePlan(RelOptRuleCall call, DrillAggregateRel aggregate) {
     PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
     RelNode child = call.rel(0).getInputs().get(0);
-    boolean smallInput = child.computeSelfCost(call.getPlanner()).getRows() < settings.getSliceTarget();
+    boolean smallInput = child.getRows() < settings.getSliceTarget();
     if (! settings.isMultiPhaseAggEnabled() || settings.isSingleMode() || smallInput) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java
index bf1e51a..130ac87 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java
@@ -60,7 +60,7 @@ public class PhysicalPlanCreator {
 
   public PhysicalOperator addMetadata(Prel originalPrel, PhysicalOperator op){
     op.setOperatorId(opIdMap.get(originalPrel).getAsSingleInt());
-    op.setCost(originalPrel.computeSelfCost(originalPrel.getCluster().getPlanner()).getRows());
+    op.setCost(originalPrel.getRows());
     return op;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java
index edad125..e10b620 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java
@@ -24,6 +24,7 @@ import org.apache.drill.exec.server.options.OptionManager;
 import org.apache.drill.exec.server.options.OptionValidator;
 import org.apache.drill.exec.server.options.TypeValidators.BooleanValidator;
 import org.apache.drill.exec.server.options.TypeValidators.PositiveLongValidator;
+import org.apache.drill.exec.server.options.TypeValidators.RangeDoubleValidator;
 
 public class PlannerSettings implements FrameworkContext{
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlannerSettings.class);
@@ -41,6 +42,7 @@ public class PlannerSettings implements FrameworkContext{
   public static final OptionValidator MULTIPHASE = new BooleanValidator("planner.enable_multiphase_agg", true);
   public static final OptionValidator BROADCAST = new BooleanValidator("planner.enable_broadcast_join", true);
   public static final OptionValidator BROADCAST_THRESHOLD = new PositiveLongValidator("planner.broadcast_threshold", MAX_BROADCAST_THRESHOLD, 1000000);
+  public static final OptionValidator JOIN_ROW_COUNT_ESTIMATE_FACTOR = new RangeDoubleValidator("planner.join.row_count_estimate_factor", 0, 100, 1.0d);
 
   public OptionManager options = null;
 
@@ -56,6 +58,10 @@ public class PlannerSettings implements FrameworkContext{
     return numEndPoints;
   }
 
+  public double getRowCountEstimateFactor(){
+    return options.getOption(JOIN_ROW_COUNT_ESTIMATE_FACTOR.getOptionName()).float_val;
+  }
+
   public boolean useDefaultCosting() {
     return useDefaultCosting;
   }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java
index ae4d661..168fd28 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java
@@ -88,8 +88,7 @@ public class ExcessiveExchangeIdentifier extends BasePrelVisitor<Prel, Excessive
     private int maxWidth = Integer.MAX_VALUE;
 
     public void add(Prel prel){
-      RelOptCost cost = prel.computeSelfCost(prel.getCluster().getPlanner());
-      maxRows = Math.max(cost.getRows(), maxRows);
+      maxRows = Math.max(prel.getRows(), maxRows);
     }
 
     public void setSingular(){

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index c950c5f..8503197 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -53,6 +53,7 @@ public class SystemOptionManager implements OptionManager{
       PlannerSettings.MULTIPHASE,
       PlannerSettings.BROADCAST,
       PlannerSettings.BROADCAST_THRESHOLD,
+      PlannerSettings.JOIN_ROW_COUNT_ESTIMATE_FACTOR,
       ExecConstants.OUTPUT_FORMAT_VALIDATOR,
       ExecConstants.PARQUET_BLOCK_SIZE_VALIDATOR,
       ExecConstants.SLICE_TARGET_OPTION,

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java
index bc6a9d3..a90807c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java
@@ -45,6 +45,25 @@ public class TypeValidators {
     }
   }
 
+  public static class RangeDoubleValidator extends DoubleValidator {
+    private final double min;
+    private final double max;
+
+    public RangeDoubleValidator(String name, double def, double min, double max) {
+      super(name, def);
+      this.min = min;
+      this.max = max;
+    }
+
+    @Override
+    public void extraValidate(OptionValue v) throws ExpressionParsingException {
+      if (v.float_val > max || v.float_val < min)
+        throw new ExpressionParsingException(String.format("Option %s must be between %d and %d.", getOptionName(), min,
+            max));
+    }
+
+  }
+
   public static class BooleanValidator extends TypeValidator{
     public BooleanValidator(String name, boolean def){
       super(name, Kind.BOOLEAN, OptionValue.createBoolean(OptionType.SYSTEM, name, def));