You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/03/16 02:13:40 UTC
[kyuubi] branch branch-1.7 updated: [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data
This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new 3d10e8f00 [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data
3d10e8f00 is described below
commit 3d10e8f007c899bde609231da555c9d50b999b62
Author: Yikf <yi...@apache.org>
AuthorDate: Thu Mar 16 10:12:44 2023 +0800
[KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data
### _Why are the changes needed?_
This PR aims to close https://github.com/apache/kyuubi/issues/4525.
The root cause of this problem is that Apache Spark does predicate push-down in `V2ScanRelationPushDown`, but the spark-hive-connector does not apply push-down predicates for data filtering.
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
Closes #4528 from Yikf/KYUUBI-4525.
Closes #4525
a65a1873f [Yikf] Partitioning predicates should take effect to filter data
Authored-by: Yikf <yi...@apache.org>
Signed-off-by: Cheng Pan <ch...@apache.org>
(cherry picked from commit 41e9505722ffe69a83fe43cce60cfbbb445e2a35)
Signed-off-by: Cheng Pan <ch...@apache.org>
---
.../connector/hive/read/HiveScanBuilder.scala | 4 +++-
.../spark/connector/hive/HiveQuerySuite.scala | 24 ++++++++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
index 8e90cc3ab..89836e712 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
@@ -37,6 +37,8 @@ case class HiveScanBuilder(
catalogTable = table,
dataSchema = dataSchema,
readDataSchema = readDataSchema(),
- readPartitionSchema = readPartitionSchema())
+ readPartitionSchema = readPartitionSchema(),
+ partitionFilters = partitionFilters,
+ dataFilters = dataFilters)
}
}
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
index e61325647..16ea03234 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@@ -107,6 +107,30 @@ class HiveQuerySuite extends KyuubiHiveTest {
}
}
+ test("[KYUUBI #4525] Partitioning predicates should take effect to filter data") {
+ withSparkSession(Map("hive.exec.dynamic.partition.mode" -> "nonstrict")) { spark =>
+ val table = "hive.default.employee"
+ withTempPartitionedTable(spark, table) {
+ spark.sql(
+ s"""
+ | INSERT OVERWRITE
+ | $table
+ | VALUES("yi", "2022", "0808"),("yi", "2023", "0316")
+ |""".stripMargin).collect()
+
+ checkQueryResult(
+ s"select * from $table where year = '2022'",
+ spark,
+ Array(Row.apply("yi", "2022", "0808")))
+
+ checkQueryResult(
+ s"select * from $table where year = '2023'",
+ spark,
+ Array(Row.apply("yi", "2023", "0316")))
+ }
+ }
+ }
+
test("Partitioned table insert and all static insert") {
withSparkSession() { spark =>
val table = "hive.default.employee"