You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/10/05 06:48:33 UTC

spark git commit: [SPARK-17258][SQL] Parse scientific decimal literals as decimals

Repository: spark
Updated Branches:
  refs/heads/master c9fe10d4e -> 89516c1c4


[SPARK-17258][SQL] Parse scientific decimal literals as decimals

## What changes were proposed in this pull request?
Currently Spark SQL parses regular decimal literals (e.g. `10.00`) as decimals and scientific decimal literals (e.g. `10.0e10`) as doubles. The difference between the two confuses most users. This PR unifies the parsing behavior and also parses scientific decimal literals as decimals.

This implications in tests are limited to a single Hive compatibility test.

## How was this patch tested?
Updated tests in `ExpressionParserSuite` and `SQLQueryTestSuite`.

Author: Herman van Hovell <hv...@databricks.com>

Closes #14828 from hvanhovell/SPARK-17258.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/89516c1c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/89516c1c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/89516c1c

Branch: refs/heads/master
Commit: 89516c1c4a167249b0c82f60a62edb45ede3bd2c
Parents: c9fe10d
Author: Herman van Hovell <hv...@databricks.com>
Authored: Tue Oct 4 23:48:26 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Oct 4 23:48:26 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  7 +-----
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  8 -------
 .../catalyst/parser/ExpressionParserSuite.scala | 24 ++++++++++----------
 .../resources/sql-tests/inputs/literals.sql     |  8 ++++---
 .../sql-tests/results/arithmetic.sql.out        |  2 +-
 .../sql-tests/results/literals.sql.out          | 24 +++++++++++++-------
 .../hive/execution/HiveCompatibilitySuite.scala |  4 +++-
 7 files changed, 38 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c336a0c..87719d9 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -653,7 +653,6 @@ quotedIdentifier
 
 number
     : MINUS? DECIMAL_VALUE            #decimalLiteral
-    | MINUS? SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral
     | MINUS? INTEGER_VALUE            #integerLiteral
     | MINUS? BIGINT_LITERAL           #bigIntLiteral
     | MINUS? SMALLINT_LITERAL         #smallIntLiteral
@@ -944,12 +943,8 @@ INTEGER_VALUE
     ;
 
 DECIMAL_VALUE
-    : DECIMAL_DIGITS {isValidDecimal()}?
-    ;
-
-SCIENTIFIC_DECIMAL_VALUE
     : DIGIT+ EXPONENT
-    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
     ;
 
 DOUBLE_LITERAL

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index cd0c70a..bf3f302 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1283,14 +1283,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   }
 
   /**
-   * Create a double literal for a number denoted in scientific notation.
-   */
-  override def visitScientificDecimalLiteral(
-      ctx: ScientificDecimalLiteralContext): Literal = withOrigin(ctx) {
-    Literal(ctx.getText.toDouble)
-  }
-
-  /**
    * Create a decimal literal for a regular decimal number.
    */
   override def visitDecimalLiteral(ctx: DecimalLiteralContext): Literal = withOrigin(ctx) {

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 3718ac5..0fb1138 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -352,6 +352,10 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("literals") {
+    def testDecimal(value: String): Unit = {
+      assertEqual(value, Literal(BigDecimal(value).underlying))
+    }
+
     // NULL
     assertEqual("null", Literal(null))
 
@@ -362,20 +366,18 @@ class ExpressionParserSuite extends PlanTest {
     // Integral should have the narrowest possible type
     assertEqual("787324", Literal(787324))
     assertEqual("7873247234798249234", Literal(7873247234798249234L))
-    assertEqual("78732472347982492793712334",
-      Literal(BigDecimal("78732472347982492793712334").underlying()))
+    testDecimal("78732472347982492793712334")
 
     // Decimal
-    assertEqual("7873247234798249279371.2334",
-      Literal(BigDecimal("7873247234798249279371.2334").underlying()))
+    testDecimal("7873247234798249279371.2334")
 
     // Scientific Decimal
-    assertEqual("9.0e1", 90d)
-    assertEqual(".9e+2", 90d)
-    assertEqual("0.9e+2", 90d)
-    assertEqual("900e-1", 90d)
-    assertEqual("900.0E-1", 90d)
-    assertEqual("9.e+1", 90d)
+    testDecimal("9.0e1")
+    testDecimal(".9e+2")
+    testDecimal("0.9e+2")
+    testDecimal("900e-1")
+    testDecimal("900.0E-1")
+    testDecimal("9.e+1")
     intercept(".e3")
 
     // Tiny Int Literal
@@ -395,8 +397,6 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("10.0D", Literal(10.0D))
     intercept("-1.8E308D", s"does not fit in range")
     intercept("1.8E308D", s"does not fit in range")
-    // TODO we need to figure out if we should throw an exception here!
-    assertEqual("1E309", Literal(Double.PositiveInfinity))
 
     // BigDecimal Literal
     assertEqual("90912830918230182310293801923652346786BD",

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/core/src/test/resources/sql-tests/inputs/literals.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
index 40dceb1..37b4b76 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
@@ -50,14 +50,14 @@ select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1;
 select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5;
 -- negative double
 select .e3;
--- inf and -inf
+-- very large decimals (overflowing double).
 select 1E309, -1E309;
 
 -- decimal parsing
 select 0.3, -0.8, .5, -.18, 0.1111, .1111;
 
--- super large scientific notation numbers should still be valid doubles
-select 123456789012345678901234567890123456789e10, 123456789012345678901234567890123456789.1e10;
+-- super large scientific notation double literals should still be valid doubles
+select 123456789012345678901234567890123456789e10d, 123456789012345678901234567890123456789.1e10d;
 
 -- string
 select "Hello Peter!", 'hello lee!';
@@ -103,3 +103,5 @@ select x'2379ACFe';
 -- invalid hexadecimal binary literal
 select X'XuZ';
 
+-- Hive literal_double test.
+SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8;

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index 6abe048..ce42c01 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -29,7 +29,7 @@ struct<-5.2:decimal(2,1)>
 -- !query 3
 select +6.8e0
 -- !query 3 schema
-struct<6.8:double>
+struct<6.8:decimal(2,1)>
 -- !query 3 output
 6.8
 

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/core/src/test/resources/sql-tests/results/literals.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index e2d8dae..95d4413 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 42
+-- Number of queries: 43
 
 
 -- !query 0
@@ -167,17 +167,17 @@ select 1234567890123456789012345678901234567890.0
 -- !query 17
 select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1
 -- !query 17 schema
-struct<1.0:double,1.2:double,1.0E10:double,150000.0:double,0.1:double,0.1:double,10000.0:double,90.0:double,90.0:double,90.0:double,90.0:double>
+struct<1.0:double,1.2:double,1E+10:decimal(1,-10),1.5E+5:decimal(2,-4),0.1:double,0.1:double,1E+4:decimal(1,-4),9E+1:decimal(1,-1),9E+1:decimal(1,-1),90.0:decimal(3,1),9E+1:decimal(1,-1)>
 -- !query 17 output
-1.0	1.2	1.0E10	150000.0	0.1	0.1	10000.0	90.0	90.0	90.0	90.0
+1.0	1.2	10000000000	150000	0.1	0.1	10000	90	90	90	90
 
 
 -- !query 18
 select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5
 -- !query 18 schema
-struct<-1.0:double,-1.2:double,-1.0E10:double,-150000.0:double,-0.1:double,-0.1:double,-10000.0:double>
+struct<-1.0:double,-1.2:double,-1E+10:decimal(1,-10),-1.5E+5:decimal(2,-4),-0.1:double,-0.1:double,-1E+4:decimal(1,-4)>
 -- !query 18 output
--1.0	-1.2	-1.0E10	-150000.0	-0.1	-0.1	-10000.0
+-1.0	-1.2	-10000000000	-150000	-0.1	-0.1	-10000
 
 
 -- !query 19
@@ -197,9 +197,9 @@ select .e3
 -- !query 20
 select 1E309, -1E309
 -- !query 20 schema
-struct<Infinity:double,-Infinity:double>
+struct<1E+309:decimal(1,-309),-1E+309:decimal(1,-309)>
 -- !query 20 output
-Infinity	-Infinity
+1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000	-1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
 
 
 -- !query 21
@@ -211,7 +211,7 @@ struct<0.3:decimal(1,1),-0.8:decimal(1,1),0.5:decimal(1,1),-0.18:decimal(2,2),0.
 
 
 -- !query 22
-select 123456789012345678901234567890123456789e10, 123456789012345678901234567890123456789.1e10
+select 123456789012345678901234567890123456789e10d, 123456789012345678901234567890123456789.1e10d
 -- !query 22 schema
 struct<1.2345678901234568E48:double,1.2345678901234568E48:double>
 -- !query 22 output
@@ -408,3 +408,11 @@ contains illegal character for hexBinary: 0XuZ(line 1, pos 7)
 == SQL ==
 select X'XuZ'
 -------^^^
+
+
+-- !query 42
+SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8
+-- !query 42 schema
+struct<3.14:decimal(3,2),-3.14:decimal(3,2),3.14E+8:decimal(3,-6),3.14E-8:decimal(10,10),-3.14E+8:decimal(3,-6),-3.14E-8:decimal(10,10),3.14E+8:decimal(3,-6),3.14E+8:decimal(3,-6),3.14E-8:decimal(10,10)>
+-- !query 42 output
+3.14	-3.14	314000000	0.0000000314	-314000000	-0.0000000314	314000000	314000000	0.0000000314

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index bebcb8f..f5d10de 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -555,6 +555,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "varchar_2",
     "varchar_join1",
 
+    // This test assumes we parse scientific decimals as doubles (we parse them as decimals)
+    "literal_double",
+
     // These tests are duplicates of joinXYZ
     "auto_join0",
     "auto_join1",
@@ -832,7 +835,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "leftsemijoin_mr",
     "limit_pushdown_negative",
     "lineage1",
-    "literal_double",
     "literal_ints",
     "literal_string",
     "load_dyn_part1",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org