You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/07/14 07:58:55 UTC

[spark] branch branch-3.0 updated: [SPARK-32220][SQL][3.0][FOLLOW-UP] SHUFFLE_REPLICATE_NL Hint should not change Non-Cartesian Product join result

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4e33454  [SPARK-32220][SQL][3.0][FOLLOW-UP] SHUFFLE_REPLICATE_NL Hint should not change Non-Cartesian Product join result
4e33454 is described below

commit 4e33454add5528e1214ffa2a1f09500ede11f1ab
Author: angerszhu <an...@gmail.com>
AuthorDate: Tue Jul 14 00:56:47 2020 -0700

    [SPARK-32220][SQL][3.0][FOLLOW-UP] SHUFFLE_REPLICATE_NL Hint should not change Non-Cartesian Product join result
    
    ### What changes were proposed in this pull request?
    follow comment https://github.com/apache/spark/pull/29035#discussion_r453468999
    Explain for pr
    
    ### Why are the changes needed?
    add comment
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Not need
    
    Closes #29093 from AngersZhuuuu/SPARK-32220-3.0-FOLLOWUP.
    
    Authored-by: angerszhu <an...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../org/apache/spark/sql/execution/SparkStrategies.scala   | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index e041c54..28d1ccb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -244,7 +244,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       //   4. Pick cartesian product if join type is inner like.
       //   5. Pick broadcast nested loop join as the final solution. It may OOM but we don't have
       //      other choice.
-      case p @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, hint) =>
+      case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, nonEquiCond, left, right, hint) =>
         def createBroadcastHashJoin(buildLeft: Boolean, buildRight: Boolean) = {
           val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
           val wantToBuildRight = canBuildRight(joinType) && buildRight
@@ -254,7 +254,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               rightKeys,
               joinType,
               buildSide,
-              condition,
+              nonEquiCond,
               planLater(left),
               planLater(right)))
           }
@@ -269,7 +269,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               rightKeys,
               joinType,
               buildSide,
-              condition,
+              nonEquiCond,
               planLater(left),
               planLater(right)))
           }
@@ -278,7 +278,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         def createSortMergeJoin() = {
           if (RowOrdering.isOrderable(leftKeys)) {
             Some(Seq(joins.SortMergeJoinExec(
-              leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right))))
+              leftKeys, rightKeys, joinType, nonEquiCond, planLater(left), planLater(right))))
           } else {
             None
           }
@@ -286,7 +286,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         def createCartesianProduct() = {
           if (joinType.isInstanceOf[InnerLike]) {
-            Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), p.condition)))
+            // `CartesianProductExec` can't implicitly evaluate equal join condition, here we should
+            // pass the original condition which includes both equal and non-equal conditions.
+            Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), j.condition)))
           } else {
             None
           }
@@ -311,7 +313,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               // This join could be very slow or OOM
               val buildSide = getSmallerSide(left, right)
               Seq(joins.BroadcastNestedLoopJoinExec(
-                planLater(left), planLater(right), buildSide, joinType, condition))
+                planLater(left), planLater(right), buildSide, joinType, nonEquiCond))
             }
         }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org