You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/29 02:16:12 UTC

[spark] branch branch-3.1 updated: [SPARK-36272][SQL][TEST] Change shuffled hash join metrics test to check relative value of build size

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new b7dde9b  [SPARK-36272][SQL][TEST] Change shuffled hash join metrics test to check relative value of build size
b7dde9b is described below

commit b7dde9bd937e3f6fe16deccdc953e9c0a9f856a9
Author: Cheng Su <ch...@fb.com>
AuthorDate: Thu Jul 29 11:14:34 2021 +0900

    [SPARK-36272][SQL][TEST] Change shuffled hash join metrics test to check relative value of build size
    
    ### What changes were proposed in this pull request?
    
    This is a follow up of https://github.com/apache/spark/pull/33447, where the unit test is disabled, due to failure after memory setting changed. I found the root cause is after https://github.com/apache/spark/pull/33447, in unit test, Spark memory page byte size is changed from `67108864` to `33554432` [1]. So the shuffled hash join build size is also changed accordingly due to [memory page byte size change](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apach [...]
    
    [1]: I printed out the memory page byte size explicitly in unit test - `org.apache.spark.SparkException: chengsu pageSizeBytes: 33554432!` in https://github.com/c21/spark/runs/3186680616?check_suite_focus=true .
    
    ### Why are the changes needed?
    
    Make previously disabled unit test work.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Changed unit test itself.
    
    Closes #33494 from c21/test.
    
    Authored-by: Cheng Su <ch...@fb.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit 6a8dd3229ae70f0458306b5386898e4b8e717b7f)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../spark/sql/execution/metric/SQLMetricsSuite.scala | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 8a8f65a..6628567 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -365,18 +365,16 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       }
   }
 
-  // TODO (SPARK-36272): Reenable this after we figure out why the expected size doesn't
-  // match after we adjust building's memory settings.
-  ignore("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
+  test("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
     val uniqueLeftDf = Seq(("1", "1"), ("11", "11")).toDF("key", "value")
     val nonUniqueLeftDf = Seq(("1", "1"), ("1", "2"), ("11", "11")).toDF("key", "value")
     val rightDf = (1 to 10).map(i => (i.toString, i.toString)).toDF("key2", "value")
     Seq(
       // Test unique key on build side
-      (uniqueLeftDf, rightDf, 11, 134228048, 10, 134221824),
+      (uniqueLeftDf, rightDf, 11, 10),
       // Test non-unique key on build side
-      (nonUniqueLeftDf, rightDf, 12, 134228552, 11, 134221824)
-    ).foreach { case (leftDf, rightDf, fojRows, fojBuildSize, rojRows, rojBuildSize) =>
+      (nonUniqueLeftDf, rightDf, 12, 11)
+    ).foreach { case (leftDf, rightDf, fojRows, rojRows) =>
       val fojDf = leftDf.hint("shuffle_hash").join(
         rightDf, $"key" === $"key2", "full_outer")
       fojDf.collect()
@@ -384,8 +382,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
         case s: ShuffledHashJoinExec => s
       }
       assert(fojPlan.isDefined, "The query plan should have shuffled hash join")
-      testMetricsInSparkPlanOperator(fojPlan.get,
-        Map("numOutputRows" -> fojRows, "buildDataSize" -> fojBuildSize))
+      testMetricsInSparkPlanOperator(fojPlan.get, Map("numOutputRows" -> fojRows))
+      val fojBuildSize = fojPlan.get.metrics("buildDataSize").value
 
       // Test right outer join as well to verify build data size to be different
       // from full outer join. This makes sure we take extra BitSet/OpenHashSet
@@ -397,8 +395,10 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
         case s: ShuffledHashJoinExec => s
       }
       assert(rojPlan.isDefined, "The query plan should have shuffled hash join")
-      testMetricsInSparkPlanOperator(rojPlan.get,
-        Map("numOutputRows" -> rojRows, "buildDataSize" -> rojBuildSize))
+      testMetricsInSparkPlanOperator(rojPlan.get, Map("numOutputRows" -> rojRows))
+      val rojBuildSize = rojPlan.get.metrics("buildDataSize").value
+      assert(fojBuildSize > rojBuildSize && rojBuildSize > 0,
+        "Build size of full outer join should be larger than the size of right outer join")
     }
   }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org