You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/07/06 11:12:22 UTC
spark git commit: [SPARK-21273][SQL][FOLLOW-UP] Add missing test
cases back and revise code style
Repository: spark
Updated Branches:
refs/heads/master b8e4d567a -> d540dfbff
[SPARK-21273][SQL][FOLLOW-UP] Add missing test cases back and revise code style
## What changes were proposed in this pull request?
Add missing test cases back and revise code style
Follow up the previous PR: https://github.com/apache/spark/pull/18479
## How was this patch tested?
Unit test
Please review http://spark.apache.org/contributing.html before opening a pull request.
Author: Wang Gengliang <lt...@gmail.com>
Closes #18548 from gengliangwang/stat_propagation_revise.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d540dfbf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d540dfbf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d540dfbf
Branch: refs/heads/master
Commit: d540dfbff33aa2f8571e0de149dfa3f4e7321113
Parents: b8e4d56
Author: Wang Gengliang <lt...@gmail.com>
Authored: Thu Jul 6 19:12:15 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Jul 6 19:12:15 2017 +0800
----------------------------------------------------------------------
.../plans/logical/LogicalPlanVisitor.scala | 2 +-
.../BasicStatsEstimationSuite.scala | 45 ++++++++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d540dfbf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index b230458..2652f6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -38,10 +38,10 @@ trait LogicalPlanVisitor[T] {
case p: Range => visitRange(p)
case p: Repartition => visitRepartition(p)
case p: RepartitionByExpression => visitRepartitionByExpr(p)
+ case p: ResolvedHint => visitHint(p)
case p: Sample => visitSample(p)
case p: ScriptTransformation => visitScriptTransform(p)
case p: Union => visitUnion(p)
- case p: ResolvedHint => visitHint(p)
case p: LogicalPlan => default(p)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/d540dfbf/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 5fd21a0..913be6d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -78,6 +78,37 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
checkStats(globalLimit, stats)
}
+ test("sample estimation") {
+ val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 1000).toLong, plan)
+ checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5)))
+
+ // Child doesn't have rowCount in stats
+ val childStats = Statistics(sizeInBytes = 120)
+ val childPlan = DummyLogicalPlan(childStats, childStats)
+ val sample2 =
+ Sample(0.0, 0.11, withReplacement = false, (math.random * 1000).toLong, childPlan)
+ checkStats(sample2, Statistics(sizeInBytes = 14))
+ }
+
+ test("estimate statistics when the conf changes") {
+ val expectedDefaultStats =
+ Statistics(
+ sizeInBytes = 40,
+ rowCount = Some(10),
+ attributeStats = AttributeMap(Seq(
+ AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), Some(10), 0, 4, 4))))
+ val expectedCboStats =
+ Statistics(
+ sizeInBytes = 4,
+ rowCount = Some(1),
+ attributeStats = AttributeMap(Seq(
+ AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), Some(5), 0, 4, 4))))
+
+ val plan = DummyLogicalPlan(defaultStats = expectedDefaultStats, cboStats = expectedCboStats)
+ checkStats(
+ plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
+ }
+
/** Check estimated stats when cbo is turned on/off. */
private def checkStats(
plan: LogicalPlan,
@@ -99,3 +130,17 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
private def checkStats(plan: LogicalPlan, expectedStats: Statistics): Unit =
checkStats(plan, expectedStats, expectedStats)
}
+
+/**
+ * This class is used for unit-testing the cbo switch, it mimics a logical plan which computes
+ * a simple statistics or a cbo estimated statistics based on the conf.
+ */
+private case class DummyLogicalPlan(
+ defaultStats: Statistics,
+ cboStats: Statistics)
+ extends LeafNode {
+
+ override def output: Seq[Attribute] = Nil
+
+ override def computeStats(): Statistics = if (conf.cboEnabled) cboStats else defaultStats
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org