You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hu...@apache.org on 2022/06/09 06:09:17 UTC
[spark] branch master updated: [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils

This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new dcfd9f01289 [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
dcfd9f01289 is described below

commit dcfd9f01289f26c1a25e97432710a13772b3ad4c
Author: Prashant Singh <ps...@amazon.com>
AuthorDate: Wed Jun 8 23:08:44 2022 -0700

    [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
    
    ### What changes were proposed in this pull request?
    
    We should not try casting everything returned by `removeLeadingZerosFromNumberTypePartition` to string, as it returns null value for the cases when partition has null value and is already replaced by `DEFAULT_PARTITION_NAME`
    
    ### Why are the changes needed?
    
    for null partitions where `removeLeadingZerosFromNumberTypePartition` is called it would throw a NPE and hence the query would fail.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added a UT, which would fail with an NPE otherwise.
    
    Closes #36810 from singhpk234/psinghvk/fix-npe.
    
    Authored-by: Prashant Singh <ps...@amazon.com>
    Signed-off-by: huaxingao <hu...@apple.com>
---
 .../spark/sql/execution/datasources/PartitioningUtils.scala       | 2 +-
 .../datasources/parquet/ParquetPartitionDiscoverySuite.scala      | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 166fc852899..e856bb5b9c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -359,7 +359,7 @@ object PartitioningUtils extends SQLConfHelper{
   def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String =
     dataType match {
       case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
-        castPartValueToDesiredType(dataType, value, null).toString
+        Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull
       case _ => value
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index b5947a4f820..fb5595322f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1259,6 +1259,14 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
     assert("p_int=10/p_float=1.0" === path)
   }
 
+  test("SPARK-39417: Null partition value") {
+    // null partition value is replaced by DEFAULT_PARTITION_NAME before hitting getPathFragment.
+    val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME)
+    val schema = new StructType().add("p_int", "int")
+    val path = PartitioningUtils.getPathFragment(spec, schema)
+    assert(s"p_int=${ExternalCatalogUtils.DEFAULT_PARTITION_NAME}" === path)
+  }
+
   test("read partitioned table - partition key included in Parquet file") {
     withTempDir { base =>
       for {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org