You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/02 02:02:28 UTC
[spark] branch branch-3.0 updated: [SPARK-31870][SQL][TESTS] Fix
"Do not optimize skew join if additional shuffle" test having no skew join
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 6dec082 [SPARK-31870][SQL][TESTS] Fix "Do not optimize skew join if additional shuffle" test having no skew join
6dec082 is described below
commit 6dec082185a714d4ed3818a2d2d4d011fcd372db
Author: manuzhang <ow...@gmail.com>
AuthorDate: Tue Jun 2 02:00:58 2020 +0000
[SPARK-31870][SQL][TESTS] Fix "Do not optimize skew join if additional shuffle" test having no skew join
### What changes were proposed in this pull request?
Fix configurations and ensure there is skew join in the test "Do not optimize skew join if additional shuffle".
### Why are the changes needed?
The existing "Do not optimize skew join if additional shuffle" test has no skew join at all.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Fixed existing test.
Closes #28679 from manuzhang/spark-31870.
Authored-by: manuzhang <ow...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 283814a426fb67289ca7923b9d13c5a897f9a98a)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../execution/adaptive/AdaptiveQueryExecSuite.scala | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index fed7cdc..ac0267a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -597,21 +597,29 @@ class AdaptiveQueryExecSuite
withSQLConf(
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
- SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "700") {
+ SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "100",
+ SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100") {
withTempView("skewData1", "skewData2") {
spark
.range(0, 1000, 1, 10)
- .selectExpr("id % 2 as key1", "id as value1")
+ .selectExpr("id % 3 as key1", "id as value1")
.createOrReplaceTempView("skewData1")
spark
.range(0, 1000, 1, 10)
.selectExpr("id % 1 as key2", "id as value2")
.createOrReplaceTempView("skewData2")
- val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
- "SELECT key1 FROM skewData1 join skewData2 ON key1 = key2 group by key1")
+
+ def checkSkewJoin(query: String, optimizeSkewJoin: Boolean): Unit = {
+ val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(query)
+ val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
+ assert(innerSmj.size == 1 && innerSmj.head.isSkewJoin == optimizeSkewJoin)
+ }
+
+ checkSkewJoin(
+ "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2", true)
// Additional shuffle introduced, so disable the "OptimizeSkewedJoin" optimization
- val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
- assert(innerSmj.size == 1 && !innerSmj.head.isSkewJoin)
+ checkSkewJoin(
+ "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 GROUP BY key1", false)
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org