You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/10/20 02:36:24 UTC

spark git commit: [SPARK-17989][SQL] Check ascendingOrder type in sort_array function rather than throwing ClassCastException

Repository: spark
Updated Branches:
  refs/heads/master 444c2d22e -> 4b2011ec9


[SPARK-17989][SQL] Check ascendingOrder type in sort_array function rather than throwing ClassCastException

## What changes were proposed in this pull request?

This PR proposes to check the second argument, `ascendingOrder`  rather than throwing `ClassCastException` exception message.

```sql
select sort_array(array('b', 'd'), '1');
```

**Before**

```
16/10/19 13:16:08 ERROR SparkSQLDriver: Failed in [select sort_array(array('b', 'd'), '1')]
java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Boolean
	at scala.runtime.BoxesRunTime.unboxToBoolean(BoxesRunTime.java:85)
	at org.apache.spark.sql.catalyst.expressions.SortArray.nullSafeEval(collectionOperations.scala:185)
	at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:416)
	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:50)
	at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:43)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:291)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:297)
```

**After**

```
Error in query: cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7;
```

## How was this patch tested?

Unit test in `DataFrameFunctionsSuite`.

Author: hyukjinkwon <gu...@gmail.com>

Closes #15532 from HyukjinKwon/SPARK-17989.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b2011ec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b2011ec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b2011ec

Branch: refs/heads/master
Commit: 4b2011ec9da1245923b5cbd883240fef0dbf3ef0
Parents: 444c2d2
Author: hyukjinkwon <gu...@gmail.com>
Authored: Wed Oct 19 19:36:21 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Oct 19 19:36:21 2016 -0700

----------------------------------------------------------------------
 .../expressions/collectionOperations.scala      |  8 +++++++-
 .../test/resources/sql-tests/inputs/array.sql   |  6 ++++++
 .../resources/sql-tests/results/array.sql.out   | 21 +++++++++++++++++---
 3 files changed, 31 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c020029..f56bb39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -124,7 +124,13 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
 
   override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
     case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-      TypeCheckResult.TypeCheckSuccess
+      ascendingOrder match {
+        case Literal(_: Boolean, BooleanType) =>
+          TypeCheckResult.TypeCheckSuccess
+        case _ =>
+          TypeCheckResult.TypeCheckFailure(
+            "Sort order in second argument requires a boolean literal.")
+      }
     case ArrayType(dt, _) =>
       TypeCheckResult.TypeCheckFailure(
         s"$prettyName does not support sorting array of type ${dt.simpleString}")

http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/inputs/array.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 4038a0d..984321a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -71,6 +71,12 @@ select
   sort_array(timestamp_array)
 from primitive_arrays;
 
+-- sort_array with an invalid string literal for the argument of sort order.
+select sort_array(array('b', 'd'), '1');
+
+-- sort_array with an invalid null literal casted as boolean for the argument of sort order.
+select sort_array(array('b', 'd'), cast(NULL as boolean));
+
 -- size
 select
   size(boolean_array),

http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/results/array.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 4a1d149..499a3d5 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -124,8 +124,23 @@ struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array,
 -- !query 8 output
 [true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
 
-
 -- !query 9
+select sort_array(array('b', 'd'), '1')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+-- !query 10
+select sort_array(array('b', 'd'), cast(NULL as boolean))
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+-- !query 11
 select
   size(boolean_array),
   size(tinyint_array),
@@ -138,7 +153,7 @@ select
   size(date_array),
   size(timestamp_array)
 from primitive_arrays
--- !query 9 schema
+-- !query 11 schema
 struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
--- !query 9 output
+-- !query 11 output
 1	2	2	2	2	2	2	2	2	2


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org