You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/09/01 05:55:57 UTC
[spark] branch branch-3.0 updated: [SPARK-32659][SQL][FOLLOWUP]
Improve test for pruning DPP on non-atomic type
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 98a1247 [SPARK-32659][SQL][FOLLOWUP] Improve test for pruning DPP on non-atomic type
98a1247 is described below
commit 98a1247f379170afbd9a64a3a51d6235ff3df02e
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Tue Sep 1 05:51:04 2020 +0000
[SPARK-32659][SQL][FOLLOWUP] Improve test for pruning DPP on non-atomic type
### What changes were proposed in this pull request?
Improve test for pruning DPP on non-atomic type:
- Avoid creating new partition tables. This may take 30 seconds..
- Add test `array` type.
### Why are the changes needed?
Improve test.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
N/A
Closes #29595 from wangyum/SPARK-32659-test.
Authored-by: Yuming Wang <yu...@ebay.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit a701bc79e3bb936e8fb4a4fbe11e9e0bd0ccd8ac)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/DynamicPartitionPruningSuite.scala | 68 +++++++++-------------
1 file changed, 26 insertions(+), 42 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index bf91bdb7..47c5c9ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql
import org.scalatest.GivenWhenThen
-import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, DynamicPruningExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
import org.apache.spark.sql.catalyst.plans.ExistenceJoin
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper}
@@ -1311,48 +1312,31 @@ abstract class DynamicPartitionPruningSuiteBase
}
test("SPARK-32659: Fix the data issue when pruning DPP on non-atomic type") {
- withSQLConf(
- SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "2", // Make sure insert DPP
- SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false") {
- withTable("df1", "df2") {
- spark.range(1000)
- .select(col("id"), col("id").as("k"))
- .write
- .partitionBy("k")
- .format(tableFormat)
- .mode("overwrite")
- .saveAsTable("df1")
-
- spark.range(100)
- .select(col("id"), col("id").as("k"))
- .write
- .partitionBy("k")
- .format(tableFormat)
- .mode("overwrite")
- .saveAsTable("df2")
-
- Seq(CodegenObjectFactoryMode.NO_CODEGEN,
- CodegenObjectFactoryMode.CODEGEN_ONLY).foreach { mode =>
- Seq(true, false).foreach { pruning =>
- withSQLConf(
- SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
- SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
- val df = sql(
- """
- |SELECT df1.id, df2.k
- |FROM df1
- | JOIN df2
- | ON struct(df1.k) = struct(df2.k)
- | AND df2.id < 2
- |""".stripMargin)
- if (pruning) {
- checkPartitionPruningPredicate(df, true, false)
- } else {
- checkPartitionPruningPredicate(df, false, false)
- }
-
- checkAnswer(df, Row(0, 0) :: Row(1, 1) :: Nil)
+ Seq(NO_CODEGEN, CODEGEN_ONLY).foreach { mode =>
+ Seq(true, false).foreach { pruning =>
+ withSQLConf(
+ SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
+ SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
+ Seq("struct", "array").foreach { dataType =>
+ val df = sql(
+ s"""
+ |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
+ |JOIN dim_stats s
+ |ON $dataType(f.store_id) = $dataType(s.store_id) WHERE s.country = 'DE'
+ """.stripMargin)
+
+ if (pruning) {
+ checkPartitionPruningPredicate(df, false, true)
+ } else {
+ checkPartitionPruningPredicate(df, false, false)
}
+
+ checkAnswer(df,
+ Row(1030, 2, 10, 3) ::
+ Row(1040, 2, 50, 3) ::
+ Row(1050, 2, 50, 3) ::
+ Row(1060, 2, 50, 3) :: Nil
+ )
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org