You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/11/01 14:08:09 UTC
[spark] branch branch-2.4 updated: [SPARK-29651][SQL][2.4] Fix
parsing of interval seconds fraction
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 9b17a68 [SPARK-29651][SQL][2.4] Fix parsing of interval seconds fraction
9b17a68 is described below
commit 9b17a682e6ad25f396a2c9ccfbe7c402a403ba4a
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Fri Nov 1 22:07:26 2019 +0800
[SPARK-29651][SQL][2.4] Fix parsing of interval seconds fraction
### What changes were proposed in this pull request?
In the PR, I propose to extract parsing of the seconds interval units to the private method `parseNanos` in `CalendarInterval` and modify the code to correctly parse the fractional part of the seconds unit of intervals in the cases:
- When the fractional part has less than 9 digits
- The seconds unit is negative
This is a back port of the commit https://github.com/apache/spark/commit/3206a9987001d78cf2f48509a93d73af86f51cfe.
### Why are the changes needed?
The changes are needed to fix the issues:
```sql
spark-sql> select interval 10.123456 seconds;
interval 10 seconds 123 microseconds
```
The correct result must be `interval 10 seconds 123 milliseconds 456 microseconds`
```sql
spark-sql> select interval -10.123456789 seconds;
interval -9 seconds -876 milliseconds -544 microseconds
```
but the whole interval should be negated, and the result must be `interval -10 seconds -123 milliseconds -456 microseconds`, taking into account the truncation to microseconds.
### Does this PR introduce any user-facing change?
Yes. After changes:
```sql
spark-sql> select interval 10.123456 seconds;
interval 10 seconds 123 milliseconds 456 microseconds
spark-sql> select interval -10.123456789 seconds;
interval -10 seconds -123 milliseconds -456 microseconds
```
### How was this patch tested?
By existing test suite, `literals.sql` and new tests in `ExpressionParserSuite`.
Closes #26355 from MaxGekk/fix-interval-nanos-parsing-2.4.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/unsafe/types/CalendarInterval.java | 27 ++++++++++++++++++----
.../spark/unsafe/types/CalendarIntervalSuite.java | 2 +-
.../catalyst/parser/ExpressionParserSuite.scala | 15 +++++++++++-
.../resources/sql-tests/results/literals.sql.out | 4 ++--
4 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 1818fef..b75a1f5 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -19,6 +19,7 @@ package org.apache.spark.unsafe.types;
import java.io.Serializable;
import java.util.Locale;
+import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -177,11 +178,10 @@ public final class CalendarInterval implements Serializable {
long minutes = toLongWithRange("minute", m.group(4), 0, 59);
long seconds = toLongWithRange("second", m.group(5), 0, 59);
// Hive allow nanosecond precision interval
- String nanoStr = m.group(7) == null ? null : (m.group(7) + "000000000").substring(0, 9);
- long nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L);
+ long secondsFraction = parseNanos(m.group(7), seconds < 0);
result = new CalendarInterval(0, sign * (
days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
- seconds * MICROS_PER_SECOND + nanos / 1000L));
+ seconds * MICROS_PER_SECOND + secondsFraction));
} catch (Exception e) {
throw new IllegalArgumentException(
"Error parsing interval day-time string: " + e.getMessage(), e);
@@ -270,8 +270,8 @@ public final class CalendarInterval implements Serializable {
} else if (parts.length == 2) {
long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
- long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
- return seconds * MICROS_PER_SECOND + nanos / 1000L;
+ long secondsFraction = parseNanos(parts[1], seconds < 0);
+ return seconds * MICROS_PER_SECOND + secondsFraction;
} else {
throw new IllegalArgumentException(
@@ -357,4 +357,21 @@ public final class CalendarInterval implements Serializable {
sb.append(' ').append(value).append(' ').append(unit).append('s');
}
}
+
+ // Parses a string with nanoseconds, truncates the result and returns microseconds
+ private static long parseNanos(String nanosStr, boolean isNegative) {
+ if (nanosStr != null) {
+ int maxNanosLen = 9;
+ String alignedStr = nanosStr;
+ if (nanosStr.length() < maxNanosLen) {
+ alignedStr = (nanosStr + "000000000").substring(0, maxNanosLen);
+ }
+ long nanos = toLongWithRange("nanosecond", alignedStr, 0L, 999999999L);
+ long micros = TimeUnit.NANOSECONDS.toMicros(nanos);
+ micros = isNegative ? -micros : micros;
+ return micros;
+ } else {
+ return 0;
+ }
+ }
}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index c125ba5..2454c7d 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -201,7 +201,7 @@ public class CalendarIntervalSuite {
assertEquals(fromSingleUnitString("day", input), i);
input = "1999.38888";
- i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
+ i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 388 * MICROS_PER_MILLI + 880);
assertEquals(fromSingleUnitString("second", input), i);
try {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 1eec9e7..2bc41da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
@@ -608,7 +609,19 @@ class ExpressionParserSuite extends PlanTest {
// Hive nanosecond notation.
assertEqual("interval 13.123456789 seconds", intervalLiteral("second", "13.123456789"))
- assertEqual("interval -13.123456789 second", intervalLiteral("second", "-13.123456789"))
+ assertEqual(
+ "interval -13.123456789 second",
+ Literal(new CalendarInterval(
+ 0,
+ -13 * DateTimeUtils.MICROS_PER_SECOND - 123 * DateTimeUtils.MICROS_PER_MILLIS - 456)))
+ assertEqual(
+ "interval 13.123456 second",
+ Literal(new CalendarInterval(
+ 0,
+ 13 * DateTimeUtils.MICROS_PER_SECOND + 123 * DateTimeUtils.MICROS_PER_MILLIS + 456)))
+ assertEqual(
+ "interval 1.001 second",
+ Literal(CalendarInterval.fromString("interval 1 second 1 millisecond")))
// Non Existing unit
intercept("interval 10 nanoseconds", "No interval can be constructed")
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 7f30161..c3f55e1 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -323,9 +323,9 @@ select timestamp '2016-33-11 20:54:00.000'
-- !query 34
select interval 13.123456789 seconds, interval -13.123456789 second
-- !query 34 schema
-struct<interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval,interval -12 seconds -876 milliseconds -544 microseconds:calendarinterval>
+struct<interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval,interval -13 seconds -123 milliseconds -456 microseconds:calendarinterval>
-- !query 34 output
-interval 13 seconds 123 milliseconds 456 microseconds interval -12 seconds -876 milliseconds -544 microseconds
+interval 13 seconds 123 milliseconds 456 microseconds interval -13 seconds -123 milliseconds -456 microseconds
-- !query 35
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org