You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/20 18:15:29 UTC

[spark] branch branch-3.0 updated: [SPARK-32021][SQL][3.0] Increase precision of seconds and fractions of `make_interval`

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 2f3618c  [SPARK-32021][SQL][3.0] Increase precision of seconds and fractions of `make_interval`
2f3618c is described below

commit 2f3618cdf9cfa813d7a44ab9a7ab24035cb1fd33
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Sat Jun 20 11:12:03 2020 -0700

    [SPARK-32021][SQL][3.0] Increase precision of seconds and fractions of `make_interval`
    
    ### What changes were proposed in this pull request?
    Change precision of seconds and its fraction from 8 to 18 to be able to construct intervals of max allowed microseconds value (long).
    
    ### Why are the changes needed?
    To improve UX of Spark SQL.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes
    
    ### How was this patch tested?
    - Add tests to IntervalExpressionsSuite
    - Add an example to the `MakeInterval` expression
    - Add tests to `interval.sql`
    
    Authored-by: Max Gekk max.gekkgmail.com
    Signed-off-by: Dongjoon Hyun dongjoonapache.org
    (cherry picked from commit 66ba35666a49023cd8da30cf84aff5c6fb1d8799)
    
    Closes #28878 from MaxGekk/make_interval-sec-precision-3.0.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../catalyst/expressions/intervalExpressions.scala  |  8 +++++---
 .../expressions/IntervalExpressionsSuite.scala      |  9 ++++++++-
 .../sql-functions/sql-expression-schema.md          |  4 ++--
 .../test/resources/sql-tests/inputs/interval.sql    |  2 ++
 .../sql-tests/results/ansi/interval.sql.out         | 21 +++++++++++++++++++--
 .../resources/sql-tests/results/interval.sql.out    | 20 ++++++++++++++++++--
 6 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 1a569a7..8cfea1e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -149,6 +149,8 @@ case class DivideInterval(
        100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds
       > SELECT _FUNC_(100, null, 3);
        NULL
+      > SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001);
+       1 months 1 days 1 minutes 40.000001 seconds
   """,
   since = "3.0.0")
 // scalastyle:on line.size.limit
@@ -169,7 +171,7 @@ case class MakeInterval(
       days: Expression,
       hours: Expression,
       mins: Expression) = {
-    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, 8, 6)))
+    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
   }
   def this(
       years: Expression,
@@ -191,7 +193,7 @@ case class MakeInterval(
   // Accept `secs` as DecimalType to avoid loosing precision of microseconds while converting
   // them to the fractional part of `secs`.
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType,
-    IntegerType, IntegerType, IntegerType, DecimalType(8, 6))
+    IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6))
   override def dataType: DataType = CalendarIntervalType
   override def nullable: Boolean = true
 
@@ -211,7 +213,7 @@ case class MakeInterval(
         day.asInstanceOf[Int],
         hour.asInstanceOf[Int],
         min.asInstanceOf[Int],
-        sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, 8, 6)))
+        sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
     } catch {
       case _: ArithmeticException => null
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index 8c972a9..6b7be4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -190,7 +190,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         micros: Int = 0): Unit = {
       val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
       val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
-        Literal(days), Literal(hours), Literal(minutes), Literal(Decimal(secFrac, 8, 6)))
+        Literal(days), Literal(hours), Literal(minutes),
+        Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
       val totalMonths = years * MONTHS_PER_YEAR + months
       val totalDays = weeks * DAYS_PER_WEEK + days
       val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR
@@ -206,5 +207,11 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     check(years = 10000, micros = -1)
     check(-9999, -11, 0, -31, -23, -59, -59, -999, -999)
     check(years = -10000, micros = 1)
+    check(
+      hours = Int.MaxValue,
+      minutes = Int.MaxValue,
+      seconds = Int.MaxValue,
+      millis = Int.MaxValue,
+      micros = Int.MaxValue)
   }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 8040d0d..f8b1190 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -163,7 +163,7 @@
 | org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct<lcase(SparkSql):string> |
 | org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct<lower(SparkSql):string> |
 | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
-| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(8,6))):interval> |
+| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(18,6))):interval> |
 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
 | org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct<map_entries(map(1, a, 2, b)):array<struct<key:int,value:string>>> |
@@ -333,4 +333,4 @@
 | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
-| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
+| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 600df2c..a7e1afe 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -29,6 +29,8 @@ select make_interval(1, 2, 3, 4);
 select make_interval(1, 2, 3, 4, 5);
 select make_interval(1, 2, 3, 4, 5, 6);
 select make_interval(1, 2, 3, 4, 5, 6, 7.008009);
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456);
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789);
 
 -- cast string to intervals
 select cast('1 second' as interval);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 6a2b301..e83444d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 90
+-- Number of queries: 92
 
 
 -- !query
@@ -178,12 +178,29 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
 -- !query schema
-struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
 -- !query output
 1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
 
 
 -- !query
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
+-- !query output
+1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
+
+
+-- !query
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,1234567890123456789,20,0}) cannot be represented as Decimal(18, 6).
+
+
+-- !query
 select cast('1 second' as interval)
 -- !query schema
 struct<CAST(1 second AS INTERVAL):interval>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 94562ca..4cdc669 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 90
+-- Number of queries: 92
 
 
 -- !query
@@ -173,12 +173,28 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
 -- !query schema
-struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
 -- !query output
 1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
 
 
 -- !query
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
+-- !query output
+1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
+
+
+-- !query
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
+-- !query schema
+struct<make_interval(0, 0, 0, 0, 0, 0, CAST(1234567890123456789 AS DECIMAL(18,6))):interval>
+-- !query output
+NULL
+
+
+-- !query
 select cast('1 second' as interval)
 -- !query schema
 struct<CAST(1 second AS INTERVAL):interval>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org