You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/04/01 08:37:36 UTC

[spark] branch branch-2.4 updated: Revert "[SPARK-33935][SQL][2.4] Fix CBO cost function"

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 58a859a  Revert "[SPARK-33935][SQL][2.4] Fix CBO cost function"
58a859a is described below

commit 58a859a8b4ace17162f228babc5b42a7cf77df03
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Thu Apr 1 17:36:48 2021 +0900

    Revert "[SPARK-33935][SQL][2.4] Fix CBO cost function"
    
    This reverts commit 3e6a6b76ef1b25f80133e6cb8561ba943f3b29de per the discussion at https://github.com/apache/spark/pull/30965#discussion_r605396265.
    
    Closes #32020 from viirya/revert-SPARK-33935.
    
    Authored-by: Liang-Chi Hsieh <vi...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../sql/catalyst/optimizer/CostBasedJoinReorder.scala     | 13 ++++++++-----
 .../spark/sql/catalyst/optimizer/JoinReorderSuite.scala   | 15 ---------------
 .../optimizer/StarJoinCostBasedReorderSuite.scala         |  8 ++++----
 3 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index adff8b7..01634a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -344,11 +344,14 @@ object JoinReorderDP extends PredicateHelper with Logging {
     }
 
     def betterThan(other: JoinPlan, conf: SQLConf): Boolean = {
-      val thisCost = BigDecimal(this.planCost.card) * conf.joinReorderCardWeight +
-        BigDecimal(this.planCost.size) * (1 - conf.joinReorderCardWeight)
-      val otherCost = BigDecimal(other.planCost.card) * conf.joinReorderCardWeight +
-        BigDecimal(other.planCost.size) * (1 - conf.joinReorderCardWeight)
-      thisCost < otherCost
+      if (other.planCost.card == 0 || other.planCost.size == 0) {
+        false
+      } else {
+        val relativeRows = BigDecimal(this.planCost.card) / BigDecimal(other.planCost.card)
+        val relativeSize = BigDecimal(this.planCost.size) / BigDecimal(other.planCost.size)
+        relativeRows * conf.joinReorderCardWeight +
+          relativeSize * (1 - conf.joinReorderCardWeight) < 1
+      }
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index b733bcd..38a70f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
-import org.apache.spark.sql.catalyst.optimizer.JoinReorderDP.JoinPlan
 import org.apache.spark.sql.catalyst.plans.{Cross, Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -316,18 +315,4 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
       case (a1, a2) => a1.semanticEquals(a2)
     }
   }
-
-  test("SPARK-33935: betterThan should be consistent") {
-    val plan1 = JoinPlan(null, null, null, Cost(300, 80))
-    val plan2 = JoinPlan(null, null, null, Cost(500, 30))
-
-    // cost1 = 300*0.7 + 80*0.3 = 234
-    // cost2 = 500*0.7 + 30*0.3 = 359
-
-    assert(!plan1.betterThan(plan1, conf))
-    assert(!plan2.betterThan(plan2, conf))
-
-    assert(plan1.betterThan(plan2, conf))
-    assert(!plan2.betterThan(plan1, conf))
-  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
index 24abd8e..baae934 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
@@ -293,12 +293,12 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
           (nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
 
     val expected =
-      t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1")))
+      f1.join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
+        .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
+        .join(t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1"))), Inner,
+          Some(nameToAttr("t3_c1") === nameToAttr("t4_c1")))
         .join(t1.join(t2, Inner, Some(nameToAttr("t1_c1") === nameToAttr("t2_c1"))), Inner,
           Some(nameToAttr("t1_c2") === nameToAttr("t4_c2")))
-        .join(f1
-          .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
-          .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk"))))
         .select(outputsOf(d1, t1, t2, t3, t4, f1, d2): _*)
 
     assertEqualPlans(query, expected)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org