You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/02/24 13:34:40 UTC
[spark] branch branch-3.1 updated: [SPARK-34515][SQL] Fix NPE if
InSet contains null value during getPartitionsByFilter
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 8daa317 [SPARK-34515][SQL] Fix NPE if InSet contains null value during getPartitionsByFilter
8daa317 is described below
commit 8daa317b77892af6ab6ff19f2147eb722fcc1d78
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Wed Feb 24 21:32:19 2021 +0800
[SPARK-34515][SQL] Fix NPE if InSet contains null value during getPartitionsByFilter
### What changes were proposed in this pull request?
Skip null value during rewrite `InSet` to `>= and <=` at getPartitionsByFilter.
### Why are the changes needed?
Spark will convert `InSet` to `>= and <=` if it's values size over `spark.sql.hive.metastorePartitionPruningInSetThreshold` during pruning partition . At this case, if values contain a null, we will get such exception
```
java.lang.NullPointerException
at org.apache.spark.unsafe.types.UTF8String.compareTo(UTF8String.java:1389)
at org.apache.spark.unsafe.types.UTF8String.compareTo(UTF8String.java:50)
at scala.math.LowPriorityOrderingImplicits$$anon$3.compare(Ordering.scala:153)
at java.util.TimSort.countRunAndMakeAscending(TimSort.java:355)
at java.util.TimSort.sort(TimSort.java:220)
at java.util.Arrays.sort(Arrays.java:1438)
at scala.collection.SeqLike.sorted(SeqLike.scala:659)
at scala.collection.SeqLike.sorted$(SeqLike.scala:647)
at scala.collection.AbstractSeq.sorted(Seq.scala:45)
at org.apache.spark.sql.hive.client.Shim_v0_13.convert$1(HiveShim.scala:772)
at org.apache.spark.sql.hive.client.Shim_v0_13.$anonfun$convertFilters$4(HiveShim.scala:826)
at scala.collection.immutable.Stream.flatMap(Stream.scala:489)
at org.apache.spark.sql.hive.client.Shim_v0_13.convertFilters(HiveShim.scala:826)
at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:848)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getPartitionsByFilter$1(HiveClientImpl.scala:750)
```
### Does this PR introduce _any_ user-facing change?
Yes, bug fix.
### How was this patch tested?
Add test.
Closes #31632 from ulysses-you/SPARK-34515.
Authored-by: ulysses-you <ul...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 999d3b89b6df14a5ccb94ffc2ffadb82964e9f7d)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../main/scala/org/apache/spark/sql/hive/client/HiveShim.scala | 4 +++-
.../scala/org/apache/spark/sql/hive/client/FiltersSuite.scala | 8 ++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index ed08864..8ccb17c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -769,7 +769,9 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
case InSet(child, values) if useAdvanced && values.size > inSetThreshold =>
val dataType = child.dataType
- val sortedValues = values.toSeq.sorted(TypeUtils.getInterpretedOrdering(dataType))
+ // Skip null here is safe, more details could see at ExtractableLiterals.
+ val sortedValues = values.filter(_ != null).toSeq
+ .sorted(TypeUtils.getInterpretedOrdering(dataType))
convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)),
LessThanOrEqual(child, Literal(sortedValues.last, dataType))))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 12ed0e5..6962f9d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -179,5 +179,13 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
}
}
+ test("SPARK-34515: Fix NPE if InSet contains null value during getPartitionsByFilter") {
+ withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "2") {
+ val filter = InSet(a("p", IntegerType), Set(null, 1, 2))
+ val converted = shim.convertFilters(testTable, Seq(filter), conf.sessionLocalTimeZone)
+ assert(converted == "(p >= 1 and p <= 2)")
+ }
+ }
+
private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org