You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/29 02:16:12 UTC
[spark] branch branch-3.1 updated: [SPARK-36272][SQL][TEST] Change
shuffled hash join metrics test to check relative value of build size
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new b7dde9b [SPARK-36272][SQL][TEST] Change shuffled hash join metrics test to check relative value of build size
b7dde9b is described below
commit b7dde9bd937e3f6fe16deccdc953e9c0a9f856a9
Author: Cheng Su <ch...@fb.com>
AuthorDate: Thu Jul 29 11:14:34 2021 +0900
[SPARK-36272][SQL][TEST] Change shuffled hash join metrics test to check relative value of build size
### What changes were proposed in this pull request?
This is a follow up of https://github.com/apache/spark/pull/33447, where the unit test is disabled, due to failure after memory setting changed. I found the root cause is after https://github.com/apache/spark/pull/33447, in unit test, Spark memory page byte size is changed from `67108864` to `33554432` [1]. So the shuffled hash join build size is also changed accordingly due to [memory page byte size change](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apach [...]
[1]: I printed out the memory page byte size explicitly in unit test - `org.apache.spark.SparkException: chengsu pageSizeBytes: 33554432!` in https://github.com/c21/spark/runs/3186680616?check_suite_focus=true .
### Why are the changes needed?
Make previously disabled unit test work.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Changed unit test itself.
Closes #33494 from c21/test.
Authored-by: Cheng Su <ch...@fb.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit 6a8dd3229ae70f0458306b5386898e4b8e717b7f)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../spark/sql/execution/metric/SQLMetricsSuite.scala | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 8a8f65a..6628567 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -365,18 +365,16 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
}
}
- // TODO (SPARK-36272): Reenable this after we figure out why the expected size doesn't
- // match after we adjust building's memory settings.
- ignore("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
+ test("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
val uniqueLeftDf = Seq(("1", "1"), ("11", "11")).toDF("key", "value")
val nonUniqueLeftDf = Seq(("1", "1"), ("1", "2"), ("11", "11")).toDF("key", "value")
val rightDf = (1 to 10).map(i => (i.toString, i.toString)).toDF("key2", "value")
Seq(
// Test unique key on build side
- (uniqueLeftDf, rightDf, 11, 134228048, 10, 134221824),
+ (uniqueLeftDf, rightDf, 11, 10),
// Test non-unique key on build side
- (nonUniqueLeftDf, rightDf, 12, 134228552, 11, 134221824)
- ).foreach { case (leftDf, rightDf, fojRows, fojBuildSize, rojRows, rojBuildSize) =>
+ (nonUniqueLeftDf, rightDf, 12, 11)
+ ).foreach { case (leftDf, rightDf, fojRows, rojRows) =>
val fojDf = leftDf.hint("shuffle_hash").join(
rightDf, $"key" === $"key2", "full_outer")
fojDf.collect()
@@ -384,8 +382,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
case s: ShuffledHashJoinExec => s
}
assert(fojPlan.isDefined, "The query plan should have shuffled hash join")
- testMetricsInSparkPlanOperator(fojPlan.get,
- Map("numOutputRows" -> fojRows, "buildDataSize" -> fojBuildSize))
+ testMetricsInSparkPlanOperator(fojPlan.get, Map("numOutputRows" -> fojRows))
+ val fojBuildSize = fojPlan.get.metrics("buildDataSize").value
// Test right outer join as well to verify build data size to be different
// from full outer join. This makes sure we take extra BitSet/OpenHashSet
@@ -397,8 +395,10 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
case s: ShuffledHashJoinExec => s
}
assert(rojPlan.isDefined, "The query plan should have shuffled hash join")
- testMetricsInSparkPlanOperator(rojPlan.get,
- Map("numOutputRows" -> rojRows, "buildDataSize" -> rojBuildSize))
+ testMetricsInSparkPlanOperator(rojPlan.get, Map("numOutputRows" -> rojRows))
+ val rojBuildSize = rojPlan.get.metrics("buildDataSize").value
+ assert(fojBuildSize > rojBuildSize && rojBuildSize > 0,
+ "Build size of full outer join should be larger than the size of right outer join")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org