You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/10/20 02:36:24 UTC
spark git commit: [SPARK-17989][SQL] Check ascendingOrder type in
sort_array function rather than throwing ClassCastException
Repository: spark
Updated Branches:
refs/heads/master 444c2d22e -> 4b2011ec9
[SPARK-17989][SQL] Check ascendingOrder type in sort_array function rather than throwing ClassCastException
## What changes were proposed in this pull request?
This PR proposes to check the second argument, `ascendingOrder` rather than throwing `ClassCastException` exception message.
```sql
select sort_array(array('b', 'd'), '1');
```
**Before**
```
16/10/19 13:16:08 ERROR SparkSQLDriver: Failed in [select sort_array(array('b', 'd'), '1')]
java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Boolean
at scala.runtime.BoxesRunTime.unboxToBoolean(BoxesRunTime.java:85)
at org.apache.spark.sql.catalyst.expressions.SortArray.nullSafeEval(collectionOperations.scala:185)
at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:416)
at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:50)
at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:43)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:291)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:297)
```
**After**
```
Error in query: cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7;
```
## How was this patch tested?
Unit test in `DataFrameFunctionsSuite`.
Author: hyukjinkwon <gu...@gmail.com>
Closes #15532 from HyukjinKwon/SPARK-17989.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b2011ec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b2011ec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b2011ec
Branch: refs/heads/master
Commit: 4b2011ec9da1245923b5cbd883240fef0dbf3ef0
Parents: 444c2d2
Author: hyukjinkwon <gu...@gmail.com>
Authored: Wed Oct 19 19:36:21 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Oct 19 19:36:21 2016 -0700
----------------------------------------------------------------------
.../expressions/collectionOperations.scala | 8 +++++++-
.../test/resources/sql-tests/inputs/array.sql | 6 ++++++
.../resources/sql-tests/results/array.sql.out | 21 +++++++++++++++++---
3 files changed, 31 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c020029..f56bb39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -124,7 +124,13 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
- TypeCheckResult.TypeCheckSuccess
+ ascendingOrder match {
+ case Literal(_: Boolean, BooleanType) =>
+ TypeCheckResult.TypeCheckSuccess
+ case _ =>
+ TypeCheckResult.TypeCheckFailure(
+ "Sort order in second argument requires a boolean literal.")
+ }
case ArrayType(dt, _) =>
TypeCheckResult.TypeCheckFailure(
s"$prettyName does not support sorting array of type ${dt.simpleString}")
http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/inputs/array.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 4038a0d..984321a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -71,6 +71,12 @@ select
sort_array(timestamp_array)
from primitive_arrays;
+-- sort_array with an invalid string literal for the argument of sort order.
+select sort_array(array('b', 'd'), '1');
+
+-- sort_array with an invalid null literal casted as boolean for the argument of sort order.
+select sort_array(array('b', 'd'), cast(NULL as boolean));
+
-- size
select
size(boolean_array),
http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/results/array.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 4a1d149..499a3d5 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -124,8 +124,23 @@ struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array,
-- !query 8 output
[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
-
-- !query 9
+select sort_array(array('b', 'd'), '1')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+-- !query 10
+select sort_array(array('b', 'd'), cast(NULL as boolean))
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+-- !query 11
select
size(boolean_array),
size(tinyint_array),
@@ -138,7 +153,7 @@ select
size(date_array),
size(timestamp_array)
from primitive_arrays
--- !query 9 schema
+-- !query 11 schema
struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
--- !query 9 output
+-- !query 11 output
1 2 2 2 2 2 2 2 2 2
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org