You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/04/01 14:19:14 UTC
spark git commit: [SPARK-20186][SQL] BroadcastHint should use child's
stats
Repository: spark
Updated Branches:
refs/heads/master 89d6822f7 -> 2287f3d0b
[SPARK-20186][SQL] BroadcastHint should use child's stats
## What changes were proposed in this pull request?
`BroadcastHint` should use child's statistics and set `isBroadcastable` to true.
## How was this patch tested?
Added a new stats estimation test for `BroadcastHint`.
Author: wangzhenhua <wa...@huawei.com>
Closes #17504 from wzhfy/broadcastHintEstimation.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2287f3d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2287f3d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2287f3d0
Branch: refs/heads/master
Commit: 2287f3d0b85730995bedc489a017de5700d6e1e4
Parents: 89d6822
Author: wangzhenhua <wa...@huawei.com>
Authored: Sat Apr 1 22:19:08 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Apr 1 22:19:08 2017 +0800
----------------------------------------------------------------------
.../plans/logical/basicLogicalOperators.scala | 2 +-
.../BasicStatsEstimationSuite.scala | 21 +++++++++++++++++++-
2 files changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2287f3d0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 5cbf263..19db42c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -383,7 +383,7 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode {
// set isBroadcastable to true so the child will be broadcasted
override def computeStats(conf: CatalystConf): Statistics =
- super.computeStats(conf).copy(isBroadcastable = true)
+ child.stats(conf).copy(isBroadcastable = true)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/2287f3d0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index e5dc811..0d92c1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -35,6 +35,23 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
// row count * (overhead + column size)
size = Some(10 * (8 + 4)))
+ test("BroadcastHint estimation") {
+ val filter = Filter(Literal(true), plan)
+ val filterStatsCboOn = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false,
+ rowCount = Some(10), attributeStats = AttributeMap(Seq(attribute -> colStat)))
+ val filterStatsCboOff = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false)
+ checkStats(
+ filter,
+ expectedStatsCboOn = filterStatsCboOn,
+ expectedStatsCboOff = filterStatsCboOff)
+
+ val broadcastHint = BroadcastHint(filter)
+ checkStats(
+ broadcastHint,
+ expectedStatsCboOn = filterStatsCboOn.copy(isBroadcastable = true),
+ expectedStatsCboOff = filterStatsCboOff.copy(isBroadcastable = true))
+ }
+
test("limit estimation: limit < child's rowCount") {
val localLimit = LocalLimit(Literal(2), plan)
val globalLimit = GlobalLimit(Literal(2), plan)
@@ -97,9 +114,11 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
plan: LogicalPlan,
expectedStatsCboOn: Statistics,
expectedStatsCboOff: Statistics): Unit = {
- assert(plan.stats(conf.copy(cboEnabled = true)) == expectedStatsCboOn)
// Invalidate statistics
plan.invalidateStatsCache()
+ assert(plan.stats(conf.copy(cboEnabled = true)) == expectedStatsCboOn)
+
+ plan.invalidateStatsCache()
assert(plan.stats(conf.copy(cboEnabled = false)) == expectedStatsCboOff)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org