You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/11/15 17:02:57 UTC

spark git commit: [SPARK-22469][SQL] Accuracy problem in comparison with string and numeric

Repository: spark
Updated Branches:
  refs/heads/master aa88b8dbb -> bc0848b4c


[SPARK-22469][SQL] Accuracy problem in comparison with string and numeric

## What changes were proposed in this pull request?
This fixes a problem caused by #15880
`select '1.5' > 0.5; // Result is NULL in Spark but is true in Hive.
`
When compare string and numeric, cast them as double like Hive.

Author: liutang123 <li...@yeah.net>

Closes #19692 from liutang123/SPARK-22469.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc0848b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc0848b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc0848b4

Branch: refs/heads/master
Commit: bc0848b4c1ab84ccef047363a70fd11df240dbbf
Parents: aa88b8d
Author: liutang123 <li...@yeah.net>
Authored: Wed Nov 15 09:02:54 2017 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Nov 15 09:02:54 2017 -0800

----------------------------------------------------------------------
 .../sql/catalyst/analysis/TypeCoercion.scala    |   7 +
 .../catalyst/analysis/TypeCoercionSuite.scala   |   3 +
 .../sql-tests/inputs/predicate-functions.sql    |   5 +
 .../results/predicate-functions.sql.out         | 140 ++++++++++++-------
 4 files changed, 105 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 532d22d..074eda5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -137,6 +137,13 @@ object TypeCoercion {
     case (DateType, TimestampType) => Some(StringType)
     case (StringType, NullType) => Some(StringType)
     case (NullType, StringType) => Some(StringType)
+
+    // There is no proper decimal type we can pick,
+    // using double type is the best we can do.
+    // See SPARK-22469 for details.
+    case (n: DecimalType, s: StringType) => Some(DoubleType)
+    case (s: StringType, n: DecimalType) => Some(DoubleType)
+
     case (l: StringType, r: AtomicType) if r != StringType => Some(r)
     case (l: AtomicType, r: StringType) if (l != StringType) => Some(l)
     case (l, r) => None

http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 793e04f..5dcd653 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -1152,6 +1152,9 @@ class TypeCoercionSuite extends AnalysisTest {
     ruleTest(PromoteStrings,
       EqualTo(Literal(Array(1, 2)), Literal("123")),
       EqualTo(Literal(Array(1, 2)), Literal("123")))
+    ruleTest(PromoteStrings,
+      GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))),
+      GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")), DoubleType)))
   }
 
   test("cast WindowFrame boundaries to the type they operate upon") {

http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
index 3b3d4ad..e99d5ce 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
@@ -2,12 +2,14 @@
 select 1 = 1;
 select 1 = '1';
 select 1.0 = '1';
+select 1.5 = '1.51';
 
 -- GreaterThan
 select 1 > '1';
 select 2 > '1.0';
 select 2 > '2.0';
 select 2 > '2.2';
+select '1.5' > 0.5;
 select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52');
 select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52';
  
@@ -16,6 +18,7 @@ select 1 >= '1';
 select 2 >= '1.0';
 select 2 >= '2.0';
 select 2.0 >= '2.2';
+select '1.5' >= 0.5;
 select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52');
 select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52';
  
@@ -24,6 +27,7 @@ select 1 < '1';
 select 2 < '1.0';
 select 2 < '2.0';
 select 2.0 < '2.2';
+select 0.5 < '1.5';
 select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52');
 select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52';
  
@@ -32,5 +36,6 @@ select 1 <= '1';
 select 2 <= '1.0';
 select 2 <= '2.0';
 select 2.0 <= '2.2';
+select 0.5 <= '1.5';
 select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52');
 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52';

http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index 8e7e04c..8cd0d51 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
+-- Number of queries: 31
 
 
 -- !query 0
@@ -21,12 +21,20 @@ true
 -- !query 2
 select 1.0 = '1'
 -- !query 2 schema
-struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean>
+struct<(CAST(1.0 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
 -- !query 2 output
 true
 
 
 -- !query 3
+select 1.5 = '1.51'
+-- !query 3 schema
+struct<(CAST(1.5 AS DOUBLE) = CAST(1.51 AS DOUBLE)):boolean>
+-- !query 3 output
+false
+
+
+-- !query 3
 select 1 > '1'
 -- !query 3 schema
 struct<(1 > CAST(1 AS INT)):boolean>
@@ -59,160 +67,192 @@ false
 
 
 -- !query 7
-select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
+select '1.5' > 0.5
 -- !query 7 schema
-struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(1.5 AS DOUBLE) > CAST(0.5 AS DOUBLE)):boolean>
 -- !query 7 output
-false
+true
 
 
 -- !query 8
-select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
+select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
 -- !query 8 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean>
 -- !query 8 output
 false
 
 
 -- !query 9
-select 1 >= '1'
+select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
 -- !query 9 schema
-struct<(1 >= CAST(1 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
 -- !query 9 output
-true
+false
 
 
 -- !query 10
-select 2 >= '1.0'
+select 1 >= '1'
 -- !query 10 schema
-struct<(2 >= CAST(1.0 AS INT)):boolean>
+struct<(1 >= CAST(1 AS INT)):boolean>
 -- !query 10 output
 true
 
 
 -- !query 11
-select 2 >= '2.0'
+select 2 >= '1.0'
 -- !query 11 schema
-struct<(2 >= CAST(2.0 AS INT)):boolean>
+struct<(2 >= CAST(1.0 AS INT)):boolean>
 -- !query 11 output
 true
 
 
 -- !query 12
-select 2.0 >= '2.2'
+select 2 >= '2.0'
 -- !query 12 schema
-struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean>
+struct<(2 >= CAST(2.0 AS INT)):boolean>
 -- !query 12 output
-false
+true
 
 
 -- !query 13
-select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
+select 2.0 >= '2.2'
 -- !query 13 schema
-struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(2.0 AS DOUBLE) >= CAST(2.2 AS DOUBLE)):boolean>
 -- !query 13 output
-true
+false
 
 
 -- !query 14
-select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
+select '1.5' >= 0.5
 -- !query 14 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
+struct<(CAST(1.5 AS DOUBLE) >= CAST(0.5 AS DOUBLE)):boolean>
 -- !query 14 output
-false
+true
 
 
 -- !query 15
-select 1 < '1'
+select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
 -- !query 15 schema
-struct<(1 < CAST(1 AS INT)):boolean>
+struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean>
 -- !query 15 output
-false
+true
 
 
 -- !query 16
-select 2 < '1.0'
+select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
 -- !query 16 schema
-struct<(2 < CAST(1.0 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
 -- !query 16 output
 false
 
 
 -- !query 17
-select 2 < '2.0'
+select 1 < '1'
 -- !query 17 schema
-struct<(2 < CAST(2.0 AS INT)):boolean>
+struct<(1 < CAST(1 AS INT)):boolean>
 -- !query 17 output
 false
 
 
 -- !query 18
-select 2.0 < '2.2'
+select 2 < '1.0'
 -- !query 18 schema
-struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean>
+struct<(2 < CAST(1.0 AS INT)):boolean>
 -- !query 18 output
-true
+false
 
 
 -- !query 19
-select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
+select 2 < '2.0'
 -- !query 19 schema
-struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean>
+struct<(2 < CAST(2.0 AS INT)):boolean>
 -- !query 19 output
 false
 
 
 -- !query 20
-select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
+select 2.0 < '2.2'
 -- !query 20 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
+struct<(CAST(2.0 AS DOUBLE) < CAST(2.2 AS DOUBLE)):boolean>
 -- !query 20 output
 true
 
 
 -- !query 21
-select 1 <= '1'
+select 0.5 < '1.5'
 -- !query 21 schema
-struct<(1 <= CAST(1 AS INT)):boolean>
+struct<(CAST(0.5 AS DOUBLE) < CAST(1.5 AS DOUBLE)):boolean>
 -- !query 21 output
 true
 
 
 -- !query 22
-select 2 <= '1.0'
+select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
 -- !query 22 schema
-struct<(2 <= CAST(1.0 AS INT)):boolean>
+struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean>
 -- !query 22 output
 false
 
 
 -- !query 23
-select 2 <= '2.0'
+select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
 -- !query 23 schema
-struct<(2 <= CAST(2.0 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
 -- !query 23 output
 true
 
 
 -- !query 24
-select 2.0 <= '2.2'
+select 1 <= '1'
 -- !query 24 schema
-struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean>
+struct<(1 <= CAST(1 AS INT)):boolean>
 -- !query 24 output
 true
 
 
 -- !query 25
-select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
+select 2 <= '1.0'
 -- !query 25 schema
-struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean>
+struct<(2 <= CAST(1.0 AS INT)):boolean>
 -- !query 25 output
-true
+false
 
 
 -- !query 26
-select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
+select 2 <= '2.0'
 -- !query 26 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
+struct<(2 <= CAST(2.0 AS INT)):boolean>
 -- !query 26 output
 true
+
+
+-- !query 27
+select 2.0 <= '2.2'
+-- !query 27 schema
+struct<(CAST(2.0 AS DOUBLE) <= CAST(2.2 AS DOUBLE)):boolean>
+-- !query 27 output
+true
+
+
+-- !query 28
+select 0.5 <= '1.5'
+-- !query 28 schema
+struct<(CAST(0.5 AS DOUBLE) <= CAST(1.5 AS DOUBLE)):boolean>
+-- !query 28 output
+true
+
+
+-- !query 29
+select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
+-- !query 29 schema
+struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean>
+-- !query 29 output
+true
+
+
+-- !query 30
+select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
+-- !query 30 schema
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
+-- !query 30 output
+true


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org