You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2016/08/18 11:44:18 UTC

spark git commit: [SPARK-17117][SQL] 1 / NULL should not fail analysis

Repository: spark
Updated Branches:
  refs/heads/master 412dba63b -> 68f5087d2


[SPARK-17117][SQL] 1 / NULL should not fail analysis

## What changes were proposed in this pull request?
This patch fixes the problem described in SPARK-17117, i.e. "SELECT 1 / NULL" throws an analysis exception:

```
org.apache.spark.sql.AnalysisException: cannot resolve '(1 / NULL)' due to data type mismatch: differing types in '(1 / NULL)' (int and null).
```

The problem is that division type coercion did not take null type into account.

## How was this patch tested?
A unit test for the type coercion, and a few end-to-end test cases using SQLQueryTestSuite.

Author: petermaxlee <pe...@gmail.com>

Closes #14695 from petermaxlee/SPARK-17117.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68f5087d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68f5087d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68f5087d

Branch: refs/heads/master
Commit: 68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b
Parents: 412dba6
Author: petermaxlee <pe...@gmail.com>
Authored: Thu Aug 18 13:44:13 2016 +0200
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Thu Aug 18 13:44:13 2016 +0200

----------------------------------------------------------------------
 .../sql/catalyst/analysis/TypeCoercion.scala    |  7 +-
 .../catalyst/analysis/TypeCoercionSuite.scala   |  9 ++-
 .../resources/sql-tests/inputs/arithmetic.sql   | 12 ++-
 .../sql-tests/results/arithmetic.sql.out        | 84 +++++++++++++++-----
 4 files changed, 89 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/68f5087d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 021952e..21e96aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -543,11 +543,14 @@ object TypeCoercion {
       // Decimal and Double remain the same
       case d: Divide if d.dataType == DoubleType => d
       case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
-      case Divide(left, right) if isNumeric(left) && isNumeric(right) =>
+      case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
         Divide(Cast(left, DoubleType), Cast(right, DoubleType))
     }
 
-    private def isNumeric(ex: Expression): Boolean = ex.dataType.isInstanceOf[NumericType]
+    private def isNumericOrNull(ex: Expression): Boolean = {
+      // We need to handle null types in case a query contains null literals.
+      ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/68f5087d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index a13c45f..9560563 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.sql.Timestamp
 
-import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{Division, FunctionArgumentConversion}
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -730,6 +730,13 @@ class TypeCoercionSuite extends PlanTest {
     // the right expression to Decimal.
     ruleTest(rules, sum(Divide(Decimal(4.0), 3)), sum(Divide(Decimal(4.0), 3)))
   }
+
+  test("SPARK-17117 null type coercion in divide") {
+    val rules = Seq(FunctionArgumentConversion, Division, ImplicitTypeCasts)
+    val nullLit = Literal.create(null, NullType)
+    ruleTest(rules, Divide(1L, nullLit), Divide(Cast(1L, DoubleType), Cast(nullLit, DoubleType)))
+    ruleTest(rules, Divide(nullLit, 1L), Divide(Cast(nullLit, DoubleType), Cast(1L, DoubleType)))
+  }
 }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/68f5087d/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
index cbe4041..f62b10c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
@@ -16,11 +16,19 @@ select + + 100;
 select - - max(key) from testdata;
 select + - key from testdata where key = 33;
 
+-- div
+select 5 / 2;
+select 5 / 0;
+select 5 / null;
+select null / 5;
+select 5 div 2;
+select 5 div 0;
+select 5 div null;
+select null div 5;
+
 -- other arithmetics
 select 1 + 2;
 select 1 - 2;
 select 2 * 5;
-select 5 / 2;
-select 5 div 2;
 select 5 % 3;
 select pmod(-7, 3);

http://git-wip-us.apache.org/repos/asf/spark/blob/68f5087d/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index f2b40a0..6abe048 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
+-- Number of queries: 28
 
 
 -- !query 0
@@ -123,35 +123,35 @@ struct<(- key):int>
 
 
 -- !query 15
-select 1 + 2
+select 5 / 2
 -- !query 15 schema
-struct<(1 + 2):int>
+struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
 -- !query 15 output
-3
+2.5
 
 
 -- !query 16
-select 1 - 2
+select 5 / 0
 -- !query 16 schema
-struct<(1 - 2):int>
+struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double>
 -- !query 16 output
--1
+NULL
 
 
 -- !query 17
-select 2 * 5
+select 5 / null
 -- !query 17 schema
-struct<(2 * 5):int>
+struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double>
 -- !query 17 output
-10
+NULL
 
 
 -- !query 18
-select 5 / 2
+select null / 5
 -- !query 18 schema
-struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
+struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double>
 -- !query 18 output
-2.5
+NULL
 
 
 -- !query 19
@@ -163,16 +163,64 @@ struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint>
 
 
 -- !query 20
-select 5 % 3
+select 5 div 0
 -- !query 20 schema
-struct<(5 % 3):int>
+struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint>
 -- !query 20 output
-2
+NULL
 
 
 -- !query 21
-select pmod(-7, 3)
+select 5 div null
 -- !query 21 schema
-struct<pmod(-7, 3):int>
+struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint>
 -- !query 21 output
+NULL
+
+
+-- !query 22
+select null div 5
+-- !query 22 schema
+struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint>
+-- !query 22 output
+NULL
+
+
+-- !query 23
+select 1 + 2
+-- !query 23 schema
+struct<(1 + 2):int>
+-- !query 23 output
+3
+
+
+-- !query 24
+select 1 - 2
+-- !query 24 schema
+struct<(1 - 2):int>
+-- !query 24 output
+-1
+
+
+-- !query 25
+select 2 * 5
+-- !query 25 schema
+struct<(2 * 5):int>
+-- !query 25 output
+10
+
+
+-- !query 26
+select 5 % 3
+-- !query 26 schema
+struct<(5 % 3):int>
+-- !query 26 output
+2
+
+
+-- !query 27
+select pmod(-7, 3)
+-- !query 27 schema
+struct<pmod(-7, 3):int>
+-- !query 27 output
 2


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org