You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/04/01 14:19:14 UTC

spark git commit: [SPARK-20186][SQL] BroadcastHint should use child's stats

Repository: spark
Updated Branches:
  refs/heads/master 89d6822f7 -> 2287f3d0b


[SPARK-20186][SQL] BroadcastHint should use child's stats

## What changes were proposed in this pull request?

`BroadcastHint` should use child's statistics and set `isBroadcastable` to true.

## How was this patch tested?

Added a new stats estimation test for `BroadcastHint`.

Author: wangzhenhua <wa...@huawei.com>

Closes #17504 from wzhfy/broadcastHintEstimation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2287f3d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2287f3d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2287f3d0

Branch: refs/heads/master
Commit: 2287f3d0b85730995bedc489a017de5700d6e1e4
Parents: 89d6822
Author: wangzhenhua <wa...@huawei.com>
Authored: Sat Apr 1 22:19:08 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Apr 1 22:19:08 2017 +0800

----------------------------------------------------------------------
 .../plans/logical/basicLogicalOperators.scala   |  2 +-
 .../BasicStatsEstimationSuite.scala             | 21 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2287f3d0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 5cbf263..19db42c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -383,7 +383,7 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode {
 
   // set isBroadcastable to true so the child will be broadcasted
   override def computeStats(conf: CatalystConf): Statistics =
-    super.computeStats(conf).copy(isBroadcastable = true)
+    child.stats(conf).copy(isBroadcastable = true)
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/2287f3d0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index e5dc811..0d92c1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -35,6 +35,23 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
     // row count * (overhead + column size)
     size = Some(10 * (8 + 4)))
 
+  test("BroadcastHint estimation") {
+    val filter = Filter(Literal(true), plan)
+    val filterStatsCboOn = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false,
+      rowCount = Some(10), attributeStats = AttributeMap(Seq(attribute -> colStat)))
+    val filterStatsCboOff = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false)
+    checkStats(
+      filter,
+      expectedStatsCboOn = filterStatsCboOn,
+      expectedStatsCboOff = filterStatsCboOff)
+
+    val broadcastHint = BroadcastHint(filter)
+    checkStats(
+      broadcastHint,
+      expectedStatsCboOn = filterStatsCboOn.copy(isBroadcastable = true),
+      expectedStatsCboOff = filterStatsCboOff.copy(isBroadcastable = true))
+  }
+
   test("limit estimation: limit < child's rowCount") {
     val localLimit = LocalLimit(Literal(2), plan)
     val globalLimit = GlobalLimit(Literal(2), plan)
@@ -97,9 +114,11 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
       plan: LogicalPlan,
       expectedStatsCboOn: Statistics,
       expectedStatsCboOff: Statistics): Unit = {
-    assert(plan.stats(conf.copy(cboEnabled = true)) == expectedStatsCboOn)
     // Invalidate statistics
     plan.invalidateStatsCache()
+    assert(plan.stats(conf.copy(cboEnabled = true)) == expectedStatsCboOn)
+
+    plan.invalidateStatsCache()
     assert(plan.stats(conf.copy(cboEnabled = false)) == expectedStatsCboOff)
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org