You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/20 18:15:29 UTC
[spark] branch branch-3.0 updated: [SPARK-32021][SQL][3.0] Increase
precision of seconds and fractions of `make_interval`
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 2f3618c [SPARK-32021][SQL][3.0] Increase precision of seconds and fractions of `make_interval`
2f3618c is described below
commit 2f3618cdf9cfa813d7a44ab9a7ab24035cb1fd33
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Sat Jun 20 11:12:03 2020 -0700
[SPARK-32021][SQL][3.0] Increase precision of seconds and fractions of `make_interval`
### What changes were proposed in this pull request?
Change precision of seconds and its fraction from 8 to 18 to be able to construct intervals of max allowed microseconds value (long).
### Why are the changes needed?
To improve UX of Spark SQL.
### Does this PR introduce _any_ user-facing change?
Yes
### How was this patch tested?
- Add tests to IntervalExpressionsSuite
- Add an example to the `MakeInterval` expression
- Add tests to `interval.sql`
Authored-by: Max Gekk max.gekkgmail.com
Signed-off-by: Dongjoon Hyun dongjoonapache.org
(cherry picked from commit 66ba35666a49023cd8da30cf84aff5c6fb1d8799)
Closes #28878 from MaxGekk/make_interval-sec-precision-3.0.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../catalyst/expressions/intervalExpressions.scala | 8 +++++---
.../expressions/IntervalExpressionsSuite.scala | 9 ++++++++-
.../sql-functions/sql-expression-schema.md | 4 ++--
.../test/resources/sql-tests/inputs/interval.sql | 2 ++
.../sql-tests/results/ansi/interval.sql.out | 21 +++++++++++++++++++--
.../resources/sql-tests/results/interval.sql.out | 20 ++++++++++++++++++--
6 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 1a569a7..8cfea1e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -149,6 +149,8 @@ case class DivideInterval(
100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds
> SELECT _FUNC_(100, null, 3);
NULL
+ > SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001);
+ 1 months 1 days 1 minutes 40.000001 seconds
""",
since = "3.0.0")
// scalastyle:on line.size.limit
@@ -169,7 +171,7 @@ case class MakeInterval(
days: Expression,
hours: Expression,
mins: Expression) = {
- this(years, months, weeks, days, hours, mins, Literal(Decimal(0, 8, 6)))
+ this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
}
def this(
years: Expression,
@@ -191,7 +193,7 @@ case class MakeInterval(
// Accept `secs` as DecimalType to avoid loosing precision of microseconds while converting
// them to the fractional part of `secs`.
override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType,
- IntegerType, IntegerType, IntegerType, DecimalType(8, 6))
+ IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6))
override def dataType: DataType = CalendarIntervalType
override def nullable: Boolean = true
@@ -211,7 +213,7 @@ case class MakeInterval(
day.asInstanceOf[Int],
hour.asInstanceOf[Int],
min.asInstanceOf[Int],
- sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, 8, 6)))
+ sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
} catch {
case _: ArithmeticException => null
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index 8c972a9..6b7be4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -190,7 +190,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
micros: Int = 0): Unit = {
val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
- Literal(days), Literal(hours), Literal(minutes), Literal(Decimal(secFrac, 8, 6)))
+ Literal(days), Literal(hours), Literal(minutes),
+ Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
val totalMonths = years * MONTHS_PER_YEAR + months
val totalDays = weeks * DAYS_PER_WEEK + days
val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR
@@ -206,5 +207,11 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
check(years = 10000, micros = -1)
check(-9999, -11, 0, -31, -23, -59, -59, -999, -999)
check(years = -10000, micros = 1)
+ check(
+ hours = Int.MaxValue,
+ minutes = Int.MaxValue,
+ seconds = Int.MaxValue,
+ millis = Int.MaxValue,
+ micros = Int.MaxValue)
}
}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 8040d0d..f8b1190 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -163,7 +163,7 @@
| org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct<lcase(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct<lower(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
-| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(8,6))):interval> |
+| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(18,6))):interval> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp> |
| org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
| org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct<map_entries(map(1, a, 2, b)):array<struct<key:int,value:string>>> |
@@ -333,4 +333,4 @@
| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
-| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
+| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 600df2c..a7e1afe 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -29,6 +29,8 @@ select make_interval(1, 2, 3, 4);
select make_interval(1, 2, 3, 4, 5);
select make_interval(1, 2, 3, 4, 5, 6);
select make_interval(1, 2, 3, 4, 5, 6, 7.008009);
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456);
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789);
-- cast string to intervals
select cast('1 second' as interval);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 6a2b301..e83444d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 90
+-- Number of queries: 92
-- !query
@@ -178,12 +178,29 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
-- !query
select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
-- !query schema
-struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
-- !query output
1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
-- !query
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
+-- !query output
+1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
+
+
+-- !query
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,1234567890123456789,20,0}) cannot be represented as Decimal(18, 6).
+
+
+-- !query
select cast('1 second' as interval)
-- !query schema
struct<CAST(1 second AS INTERVAL):interval>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 94562ca..4cdc669 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 90
+-- Number of queries: 92
-- !query
@@ -173,12 +173,28 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
-- !query
select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
-- !query schema
-struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
-- !query output
1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
-- !query
+select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
+-- !query output
+1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
+
+
+-- !query
+select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
+-- !query schema
+struct<make_interval(0, 0, 0, 0, 0, 0, CAST(1234567890123456789 AS DECIMAL(18,6))):interval>
+-- !query output
+NULL
+
+
+-- !query
select cast('1 second' as interval)
-- !query schema
struct<CAST(1 second AS INTERVAL):interval>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org