You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/02/26 12:30:10 UTC

[spark] branch branch-3.1 updated: [SPARK-34550][SQL] Skip InSet null value during push filter to Hive metastore

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 6ee00a9  [SPARK-34550][SQL] Skip InSet null value during push filter to Hive metastore
6ee00a9 is described below

commit 6ee00a99605f3cd7a403f24ad1bae31d5701a56d
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Fri Feb 26 21:29:14 2021 +0900

    [SPARK-34550][SQL] Skip InSet null value during push filter to Hive metastore
    
    ### What changes were proposed in this pull request?
    
    Skip `InSet` null value during push filter to Hive metastore.
    
    ### Why are the changes needed?
    
    If `InSet` contains a null value, we should skip it and push other values to metastore. To keep same behavior with `In`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Add test.
    
    Closes #31659 from ulysses-you/SPARK-34550.
    
    Authored-by: ulysses-you <ul...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit 82267acfe8c78a70d56a6ae6ab9a1135c0dc0836)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../scala/org/apache/spark/sql/hive/client/HiveShim.scala |  4 ++--
 .../org/apache/spark/sql/hive/client/FiltersSuite.scala   | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 8ccb17c..db67480 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -700,7 +700,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       }
 
       def unapply(values: Set[Any]): Option[Seq[String]] = {
-        val extractables = values.toSeq.map(valueToLiteralString.lift)
+        val extractables = values.filter(_ != null).toSeq.map(valueToLiteralString.lift)
         if (extractables.nonEmpty && extractables.forall(_.isDefined)) {
           Some(extractables.map(_.get))
         } else {
@@ -715,7 +715,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       }
 
       def unapply(values: Set[Any]): Option[Seq[String]] = {
-        val extractables = values.toSeq.map(valueToLiteralString.lift)
+        val extractables = values.filter(_ != null).toSeq.map(valueToLiteralString.lift)
         if (extractables.nonEmpty && extractables.forall(_.isDefined)) {
           Some(extractables.map(_.get))
         } else {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 6962f9d..79b34bd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -187,5 +187,20 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     }
   }
 
+  test("SPARK-34538: Skip InSet null value during push filter to Hive metastore") {
+    withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "3") {
+      val intFilter = InSet(a("p", IntegerType), Set(null, 1, 2))
+      val intConverted = shim.convertFilters(testTable, Seq(intFilter), conf.sessionLocalTimeZone)
+      assert(intConverted == "(p = 1 or p = 2)")
+    }
+
+    withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "3") {
+      val dateFilter = InSet(a("p", DateType), Set(null,
+        Literal(Date.valueOf("2020-01-01")).eval(), Literal(Date.valueOf("2021-01-01")).eval()))
+      val dateConverted = shim.convertFilters(testTable, Seq(dateFilter), conf.sessionLocalTimeZone)
+      assert(dateConverted == "(p = 2020-01-01 or p = 2021-01-01)")
+    }
+  }
+
   private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)()
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org