You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/03/06 08:17:44 UTC
[spark] branch branch-3.4 updated: [SPARK-42677][SQL][TESTS] Fix the invalid tests for broadcast hint
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 06c38ba1008 [SPARK-42677][SQL][TESTS] Fix the invalid tests for broadcast hint
06c38ba1008 is described below
commit 06c38ba100810bc2fd7a318420d03576f13ba470
Author: Jiaan Geng <be...@163.com>
AuthorDate: Mon Mar 6 16:16:49 2023 +0800
[SPARK-42677][SQL][TESTS] Fix the invalid tests for broadcast hint
### What changes were proposed in this pull request?
Currently, there are a lot of test cases for broadcast hint is invalid. Because the data size is smaller than broadcast threshold.
### Why are the changes needed?
Fix the invalid tests for broadcast hint.
### Does this PR introduce _any_ user-facing change?
'No'.
Just modify the test cases.
### How was this patch tested?
Correct test cases.
Closes #40293 from beliefer/SPARK-42677.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
(cherry picked from commit db171551d39dc8876cf170bb6e86b2340c768465)
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../org/apache/spark/sql/DataFrameJoinSuite.scala | 38 +++++++++++++---------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index e4f6b4cb40c..56e9520fdab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -195,22 +195,28 @@ class DataFrameJoinSuite extends QueryTest
val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value")
val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value")
- // equijoin - should be converted into broadcast join
- val plan1 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan
- assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 1)
-
- // no join key -- should not be a broadcast join
- val plan2 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan
- assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 0)
-
- // planner should not crash without a join
- broadcast(df1).queryExecution.sparkPlan
-
- // SPARK-12275: no physical plan for BroadcastHint in some condition
- withTempPath { path =>
- df1.write.parquet(path.getCanonicalPath)
- val pf1 = spark.read.parquet(path.getCanonicalPath)
- assert(df1.crossJoin(broadcast(pf1)).count() === 4)
+ withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+ // equijoin - should not be converted into broadcast join without hint
+ val plan1 = df1.join(df2, "key").queryExecution.sparkPlan
+ assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 0)
+
+ // equijoin - should be converted into broadcast join with hint
+ val plan2 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan
+ assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 1)
+
+ // no join key -- should not be a broadcast join
+ val plan3 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan
+ assert(plan3.collect { case p: BroadcastHashJoinExec => p }.size === 0)
+
+ // planner should not crash without a join
+ broadcast(df1).queryExecution.sparkPlan
+
+ // SPARK-12275: no physical plan for BroadcastHint in some condition
+ withTempPath { path =>
+ df1.write.parquet(path.getCanonicalPath)
+ val pf1 = spark.read.parquet(path.getCanonicalPath)
+ assert(df1.crossJoin(broadcast(pf1)).count() === 4)
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org