You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hu...@apache.org on 2022/06/09 06:09:17 UTC
[spark] branch master updated: [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new dcfd9f01289 [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
dcfd9f01289 is described below
commit dcfd9f01289f26c1a25e97432710a13772b3ad4c
Author: Prashant Singh <ps...@amazon.com>
AuthorDate: Wed Jun 8 23:08:44 2022 -0700
[SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
### What changes were proposed in this pull request?
We should not try casting everything returned by `removeLeadingZerosFromNumberTypePartition` to string, as it returns null value for the cases when partition has null value and is already replaced by `DEFAULT_PARTITION_NAME`
### Why are the changes needed?
for null partitions where `removeLeadingZerosFromNumberTypePartition` is called it would throw a NPE and hence the query would fail.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added a UT, which would fail with an NPE otherwise.
Closes #36810 from singhpk234/psinghvk/fix-npe.
Authored-by: Prashant Singh <ps...@amazon.com>
Signed-off-by: huaxingao <hu...@apple.com>
---
.../spark/sql/execution/datasources/PartitioningUtils.scala | 2 +-
.../datasources/parquet/ParquetPartitionDiscoverySuite.scala | 8 ++++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 166fc852899..e856bb5b9c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -359,7 +359,7 @@ object PartitioningUtils extends SQLConfHelper{
def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String =
dataType match {
case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
- castPartValueToDesiredType(dataType, value, null).toString
+ Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull
case _ => value
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index b5947a4f820..fb5595322f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1259,6 +1259,14 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
assert("p_int=10/p_float=1.0" === path)
}
+ test("SPARK-39417: Null partition value") {
+ // null partition value is replaced by DEFAULT_PARTITION_NAME before hitting getPathFragment.
+ val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME)
+ val schema = new StructType().add("p_int", "int")
+ val path = PartitioningUtils.getPathFragment(spec, schema)
+ assert(s"p_int=${ExternalCatalogUtils.DEFAULT_PARTITION_NAME}" === path)
+ }
+
test("read partitioned table - partition key included in Parquet file") {
withTempDir { base =>
for {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org