You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/03/16 02:13:40 UTC
[kyuubi] branch branch-1.7 updated: [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new 3d10e8f00 [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data
3d10e8f00 is described below

commit 3d10e8f007c899bde609231da555c9d50b999b62
Author: Yikf <yi...@apache.org>
AuthorDate: Thu Mar 16 10:12:44 2023 +0800

    [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data
    
    ### _Why are the changes needed?_
    
    This PR aims to close https://github.com/apache/kyuubi/issues/4525.
    
    The root cause of this problem is that Apache Spark does predicate push-down in `V2ScanRelationPushDown`, but the spark-hive-connector does not apply push-down predicates for data filtering.
    
    ### _How was this patch tested?_
    - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #4528 from Yikf/KYUUBI-4525.
    
    Closes #4525
    
    a65a1873f [Yikf] Partitioning predicates should take effect to filter data
    
    Authored-by: Yikf <yi...@apache.org>
    Signed-off-by: Cheng Pan <ch...@apache.org>
    (cherry picked from commit 41e9505722ffe69a83fe43cce60cfbbb445e2a35)
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .../connector/hive/read/HiveScanBuilder.scala      |  4 +++-
 .../spark/connector/hive/HiveQuerySuite.scala      | 24 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
index 8e90cc3ab..89836e712 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
@@ -37,6 +37,8 @@ case class HiveScanBuilder(
       catalogTable = table,
       dataSchema = dataSchema,
       readDataSchema = readDataSchema(),
-      readPartitionSchema = readPartitionSchema())
+      readPartitionSchema = readPartitionSchema(),
+      partitionFilters = partitionFilters,
+      dataFilters = dataFilters)
   }
 }
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
index e61325647..16ea03234 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@@ -107,6 +107,30 @@ class HiveQuerySuite extends KyuubiHiveTest {
     }
   }
 
+  test("[KYUUBI #4525] Partitioning predicates should take effect to filter data") {
+    withSparkSession(Map("hive.exec.dynamic.partition.mode" -> "nonstrict")) { spark =>
+      val table = "hive.default.employee"
+      withTempPartitionedTable(spark, table) {
+        spark.sql(
+          s"""
+             | INSERT OVERWRITE
+             | $table
+             | VALUES("yi", "2022", "0808"),("yi", "2023", "0316")
+             |""".stripMargin).collect()
+
+        checkQueryResult(
+          s"select * from $table where year = '2022'",
+          spark,
+          Array(Row.apply("yi", "2022", "0808")))
+
+        checkQueryResult(
+          s"select * from $table where year = '2023'",
+          spark,
+          Array(Row.apply("yi", "2023", "0316")))
+      }
+    }
+  }
+
   test("Partitioned table insert and all static insert") {
     withSparkSession() { spark =>
       val table = "hive.default.employee"