You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/02 02:02:28 UTC

[spark] branch branch-3.0 updated: [SPARK-31870][SQL][TESTS] Fix "Do not optimize skew join if additional shuffle" test having no skew join

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 6dec082  [SPARK-31870][SQL][TESTS] Fix "Do not optimize skew join if additional shuffle" test having no skew join
6dec082 is described below

commit 6dec082185a714d4ed3818a2d2d4d011fcd372db
Author: manuzhang <ow...@gmail.com>
AuthorDate: Tue Jun 2 02:00:58 2020 +0000

    [SPARK-31870][SQL][TESTS] Fix "Do not optimize skew join if additional shuffle" test having no skew join
    
    ### What changes were proposed in this pull request?
    Fix configurations and ensure there is skew join in the test "Do not optimize skew join if additional shuffle".
    
    ### Why are the changes needed?
    The existing "Do not optimize skew join if additional shuffle" test has no skew join at all.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Fixed existing test.
    
    Closes #28679 from manuzhang/spark-31870.
    
    Authored-by: manuzhang <ow...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 283814a426fb67289ca7923b9d13c5a897f9a98a)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../execution/adaptive/AdaptiveQueryExecSuite.scala  | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index fed7cdc..ac0267a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -597,21 +597,29 @@ class AdaptiveQueryExecSuite
     withSQLConf(
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "700") {
+      SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "100",
+      SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100") {
       withTempView("skewData1", "skewData2") {
         spark
           .range(0, 1000, 1, 10)
-          .selectExpr("id % 2 as key1", "id as value1")
+          .selectExpr("id % 3 as key1", "id as value1")
           .createOrReplaceTempView("skewData1")
         spark
           .range(0, 1000, 1, 10)
           .selectExpr("id % 1 as key2", "id as value2")
           .createOrReplaceTempView("skewData2")
-        val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
-          "SELECT key1 FROM skewData1 join skewData2 ON key1 = key2 group by key1")
+
+        def checkSkewJoin(query: String, optimizeSkewJoin: Boolean): Unit = {
+          val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(query)
+          val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
+          assert(innerSmj.size == 1 && innerSmj.head.isSkewJoin == optimizeSkewJoin)
+        }
+
+        checkSkewJoin(
+          "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2", true)
         // Additional shuffle introduced, so disable the "OptimizeSkewedJoin" optimization
-        val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
-        assert(innerSmj.size == 1 && !innerSmj.head.isSkewJoin)
+        checkSkewJoin(
+          "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 GROUP BY key1", false)
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org