You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@doris.apache.org by GitBox <gi...@apache.org> on 2019/08/05 15:09:18 UTC

[GitHub] [incubator-doris] chenhao7253886 commented on a change in pull request #1566: Add switch for cost optimization

chenhao7253886 commented on a change in pull request #1566: Add switch for cost optimization
URL: https://github.com/apache/incubator-doris/pull/1566#discussion_r310653200
 
 

 ##########
 File path: fe/src/main/java/org/apache/doris/planner/DistributedPlanner.java
 ##########
 @@ -287,59 +287,73 @@ private PlanFragment createHashJoinFragment(HashJoinNode node, PlanFragment righ
                                                 PlanFragment leftChildFragment, long perNodeMemLimit,
                                                 ArrayList<PlanFragment> fragments)
             throws UserException {
-        // broadcast: send the rightChildFragment's output to each node executing
-        // the leftChildFragment; the cost across all nodes is proportional to the
-        // total amount of data sent
-        PlanNode rhsTree = rightChildFragment.getPlanRoot();
-        long rhsDataSize = 0;
-        long broadcastCost = 0;
-        if (rhsTree.getCardinality() != -1 && leftChildFragment.getNumNodes() != -1) {
-            rhsDataSize = Math.round((double) rhsTree.getCardinality() * rhsTree.getAvgRowSize());
-            broadcastCost = rhsDataSize * leftChildFragment.getNumNodes();
-        }
-        LOG.info("broadcast: cost=" + Long.toString(broadcastCost));
-        LOG.info("card=" + Long.toString(rhsTree.getCardinality())
-                + " row_size=" + Float.toString(rhsTree.getAvgRowSize())
-                + " #nodes=" + Integer.toString(leftChildFragment.getNumNodes()));
-
-        // repartition: both left- and rightChildFragment are partitioned on the
-        // join exprs
-        // TODO: take existing partition of input fragments into account to avoid
-        // unnecessary repartitioning
-        PlanNode lhsTree = leftChildFragment.getPlanRoot();
-        long partitionCost = 0;
-        if (lhsTree.getCardinality() != -1 && rhsTree.getCardinality() != -1) {
-            partitionCost = Math.round(
-                    (double) lhsTree.getCardinality() * lhsTree.getAvgRowSize() + (double) rhsTree
-                            .getCardinality() * rhsTree.getAvgRowSize());
-        }
-        LOG.info("partition: cost=" + Long.toString(partitionCost));
-        LOG.info(
-                "lhs card=" + Long.toString(lhsTree.getCardinality()) + " row_size=" + Float.toString(
-                        lhsTree.getAvgRowSize()));
-        LOG.info(
-                "rhs card=" + Long.toString(rhsTree.getCardinality()) + " row_size=" + Float.toString(
-                        rhsTree.getAvgRowSize()));
-        LOG.info(rhsTree.getExplainString());
-
         boolean doBroadcast;
-        // we do a broadcast join if
-        // - we're explicitly told to do so
-        // - or if it's cheaper and we weren't explicitly told to do a partitioned join
-        // - and we're not doing a full or right outer join (those require the left-hand
-        //   side to be partitioned for correctness)
-        // - and the expected size of the hash tbl doesn't exceed perNodeMemLimit
-        // we do a "<=" comparison of the costs so that we default to broadcast joins if
-        // we're unable to estimate the cost
-        if (node.getJoinOp() != JoinOperator.RIGHT_OUTER_JOIN
-                && node.getJoinOp() != JoinOperator.FULL_OUTER_JOIN
-                && (perNodeMemLimit == 0 || Math.round(
-                (double) rhsDataSize * PlannerContext.HASH_TBL_SPACE_OVERHEAD) <= perNodeMemLimit)
-                && (node.getInnerRef().isBroadcastJoin() || (!node.getInnerRef().isPartitionJoin()
-                && broadcastCost <= partitionCost))) {
-            doBroadcast = true;
+        if (!ctx_.getRootAnalyzer().getContext().getSessionVariable().isDisableCostOptimization()) {
+            // broadcast: send the rightChildFragment's output to each node executing
+            // the leftChildFragment; the cost across all nodes is proportional to the
+            // total amount of data sent
+            PlanNode rhsTree = rightChildFragment.getPlanRoot();
+            long rhsDataSize = 0;
+            long broadcastCost = 0;
+            if (rhsTree.getCardinality() != -1 && leftChildFragment.getNumNodes() != -1) {
+                rhsDataSize = Math.round((double) rhsTree.getCardinality() * rhsTree.getAvgRowSize());
+                broadcastCost = rhsDataSize * leftChildFragment.getNumNodes();
+            }
+            LOG.info("broadcast: cost=" + Long.toString(broadcastCost));
+            LOG.info("card=" + Long.toString(rhsTree.getCardinality())
+                    + " row_size=" + Float.toString(rhsTree.getAvgRowSize())
+                    + " #nodes=" + Integer.toString(leftChildFragment.getNumNodes()));
+
+            // repartition: both left- and rightChildFragment are partitioned on the
+            // join exprs
+            // TODO: take existing partition of input fragments into account to avoid
+            // unnecessary repartitioning
+            PlanNode lhsTree = leftChildFragment.getPlanRoot();
+            long partitionCost = 0;
+            if (lhsTree.getCardinality() != -1 && rhsTree.getCardinality() != -1) {
+                partitionCost = Math.round(
+                        (double) lhsTree.getCardinality() * lhsTree.getAvgRowSize() + (double) rhsTree
+                                .getCardinality() * rhsTree.getAvgRowSize());
+            }
+            LOG.info("partition: cost=" + Long.toString(partitionCost));
+            LOG.info(
+                    "lhs card=" + Long.toString(lhsTree.getCardinality()) + " row_size=" + Float.toString(
+                            lhsTree.getAvgRowSize()));
+            LOG.info(
+                    "rhs card=" + Long.toString(rhsTree.getCardinality()) + " row_size=" + Float.toString(
+                            rhsTree.getAvgRowSize()));
+            LOG.info(rhsTree.getExplainString());
+
+            // we do a broadcast join if
+            // - we're explicitly told to do so
+            // - or if it's cheaper and we weren't explicitly told to do a partitioned join
+            // - and we're not doing a full or right outer join (those require the left-hand
+            //   side to be partitioned for correctness)
+            // - and the expected size of the hash tbl doesn't exceed perNodeMemLimit
+            // we do a "<=" comparison of the costs so that we default to broadcast joins if
+            // we're unable to estimate the cost
+            if (node.getJoinOp() != JoinOperator.RIGHT_OUTER_JOIN
+                    && node.getJoinOp() != JoinOperator.FULL_OUTER_JOIN
+                    && node.getJoinOp() != JoinOperator.RIGHT_ANTI_JOIN
+                    && node.getJoinOp() != JoinOperator.RIGHT_SEMI_JOIN
+                    && (perNodeMemLimit == 0 || Math.round(
+                    (double) rhsDataSize * PlannerContext.HASH_TBL_SPACE_OVERHEAD) <= perNodeMemLimit)
+                    && (node.getInnerRef().isBroadcastJoin() || (!node.getInnerRef().isPartitionJoin()
+                    && broadcastCost <= partitionCost))) {
+                doBroadcast = true;
+            } else {
+                doBroadcast = false;
+            }
         } else {
-            doBroadcast = false;
+            if (node.getJoinOp() != JoinOperator.RIGHT_OUTER_JOIN
+                    && node.getJoinOp() != JoinOperator.RIGHT_ANTI_JOIN
+                    && node.getJoinOp() != JoinOperator.RIGHT_SEMI_JOIN
+                    && node.getJoinOp() != JoinOperator.FULL_OUTER_JOIN
+                    && !node.getInnerRef().isPartitionJoin()) {
 
 Review comment:
   I think it does't need to be considered, because i expect the join type is the hint even though it may exceed the upper limit.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@doris.apache.org
For additional commands, e-mail: dev-help@doris.apache.org