You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/03/17 18:50:46 UTC
[spark] branch branch-3.0 updated: [SPARK-31171][SQL] size(null)
should return null under ansi mode
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b9371e3 [SPARK-31171][SQL] size(null) should return null under ansi mode
b9371e3 is described below
commit b9371e38abbc351106453b14172d6919be0eca82
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Mar 17 11:48:54 2020 -0700
[SPARK-31171][SQL] size(null) should return null under ansi mode
Make `size(null)` return null under ANSI mode, regardless of the `spark.sql.legacy.sizeOfNull` config.
In https://github.com/apache/spark/pull/27834, we change the result of `size(null)` to be -1 to match the 2.4 behavior and avoid breaking changes.
However, it's true that the "return -1" behavior is error-prone when being used with aggregate functions. The current ANSI mode controls a bunch of "better behaviors" like failing on overflow. We don't enable these "better behaviors" by default because they are too breaking. The "return null" behavior of `size(null)` is a good fit of the ANSI mode.
No as ANSI mode is off by default.
new tests
Closes #27936 from cloud-fan/null.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit dc5ebc2d5b8122121d89a9175737bea95ae10126)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 ++++++---
.../catalyst/expressions/CollectionExpressionsSuite.scala | 6 ++++++
.../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 12 ++++++++++++
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e49593e..1331350 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1940,8 +1940,8 @@ object SQLConf {
val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull")
.internal()
- .doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " +
- "The size function returns null for null input if the flag is disabled.")
+ .doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " +
+ "null. Otherwise, it returns -1, which was inherited from Hive.")
.booleanConf
.createWithDefault(true)
@@ -2759,7 +2759,10 @@ class SQLConf extends Serializable with Logging {
def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING)
- def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
+ def legacySizeOfNull: Boolean = {
+ // size(null) should return null under ansi mode.
+ getConf(SQLConf.LEGACY_SIZE_OF_NULL) && !getConf(ANSI_ENABLED)
+ }
def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 3cfc66f..173f248 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -74,6 +74,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
testSize(sizeOfNull = null)
}
+ // size(null) should return null under ansi mode.
+ withSQLConf(
+ SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+ SQLConf.ANSI_ENABLED.key -> "true") {
+ testSize(sizeOfNull = null)
+ }
}
test("MapKeys/MapValues") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index a613c33..c41eb98 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -490,6 +490,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
testSizeOfArray(sizeOfNull = null)
}
+ // size(null) should return null under ansi mode.
+ withSQLConf(
+ SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+ SQLConf.ANSI_ENABLED.key -> "true") {
+ testSizeOfArray(sizeOfNull = null)
+ }
}
test("dataframe arrays_zip function") {
@@ -569,6 +575,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
testSizeOfMap(sizeOfNull = null)
}
+ // size(null) should return null under ansi mode.
+ withSQLConf(
+ SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+ SQLConf.ANSI_ENABLED.key -> "true") {
+ testSizeOfMap(sizeOfNull = null)
+ }
}
test("map_keys/map_values function") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org