You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/11/01 14:08:09 UTC

[spark] branch branch-2.4 updated: [SPARK-29651][SQL][2.4] Fix parsing of interval seconds fraction

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 9b17a68  [SPARK-29651][SQL][2.4] Fix parsing of interval seconds fraction
9b17a68 is described below

commit 9b17a682e6ad25f396a2c9ccfbe7c402a403ba4a
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Fri Nov 1 22:07:26 2019 +0800

    [SPARK-29651][SQL][2.4] Fix parsing of interval seconds fraction
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to extract parsing of the seconds interval units to the private method `parseNanos` in `CalendarInterval` and modify the code to correctly parse the fractional part of the seconds unit of intervals in the cases:
    - When the fractional part has less than 9 digits
    - The seconds unit is negative
    
    This is a back port of the commit https://github.com/apache/spark/commit/3206a9987001d78cf2f48509a93d73af86f51cfe.
    
    ### Why are the changes needed?
    The changes are needed to fix the issues:
    ```sql
    spark-sql> select interval 10.123456 seconds;
    interval 10 seconds 123 microseconds
    ```
    The correct result must be `interval 10 seconds 123 milliseconds 456 microseconds`
    ```sql
    spark-sql> select interval -10.123456789 seconds;
    interval -9 seconds -876 milliseconds -544 microseconds
    ```
    but the whole interval should be negated, and the result must be `interval -10 seconds -123 milliseconds -456 microseconds`, taking into account the truncation to microseconds.
    
    ### Does this PR introduce any user-facing change?
    Yes. After changes:
    ```sql
    spark-sql> select interval 10.123456 seconds;
    interval 10 seconds 123 milliseconds 456 microseconds
    spark-sql> select interval -10.123456789 seconds;
    interval -10 seconds -123 milliseconds -456 microseconds
    ```
    
    ### How was this patch tested?
    By existing test suite, `literals.sql` and new tests in `ExpressionParserSuite`.
    
    Closes #26355 from MaxGekk/fix-interval-nanos-parsing-2.4.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/unsafe/types/CalendarInterval.java       | 27 ++++++++++++++++++----
 .../spark/unsafe/types/CalendarIntervalSuite.java  |  2 +-
 .../catalyst/parser/ExpressionParserSuite.scala    | 15 +++++++++++-
 .../resources/sql-tests/results/literals.sql.out   |  4 ++--
 4 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 1818fef..b75a1f5 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -19,6 +19,7 @@ package org.apache.spark.unsafe.types;
 
 import java.io.Serializable;
 import java.util.Locale;
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -177,11 +178,10 @@ public final class CalendarInterval implements Serializable {
         long minutes = toLongWithRange("minute", m.group(4), 0, 59);
         long seconds = toLongWithRange("second", m.group(5), 0, 59);
         // Hive allow nanosecond precision interval
-        String nanoStr = m.group(7) == null ? null : (m.group(7) + "000000000").substring(0, 9);
-        long nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L);
+        long secondsFraction = parseNanos(m.group(7), seconds < 0);
         result = new CalendarInterval(0, sign * (
           days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
-          seconds * MICROS_PER_SECOND + nanos / 1000L));
+          seconds * MICROS_PER_SECOND + secondsFraction));
       } catch (Exception e) {
         throw new IllegalArgumentException(
           "Error parsing interval day-time string: " + e.getMessage(), e);
@@ -270,8 +270,8 @@ public final class CalendarInterval implements Serializable {
     } else if (parts.length == 2) {
       long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
         Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
-      long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
-      return seconds * MICROS_PER_SECOND + nanos / 1000L;
+      long secondsFraction = parseNanos(parts[1], seconds < 0);
+      return seconds * MICROS_PER_SECOND + secondsFraction;
 
     } else {
       throw new IllegalArgumentException(
@@ -357,4 +357,21 @@ public final class CalendarInterval implements Serializable {
       sb.append(' ').append(value).append(' ').append(unit).append('s');
     }
   }
+
+  // Parses a string with nanoseconds, truncates the result and returns microseconds
+  private static long parseNanos(String nanosStr, boolean isNegative) {
+    if (nanosStr != null) {
+      int maxNanosLen = 9;
+      String alignedStr = nanosStr;
+      if (nanosStr.length() < maxNanosLen) {
+        alignedStr = (nanosStr + "000000000").substring(0, maxNanosLen);
+      }
+      long nanos = toLongWithRange("nanosecond", alignedStr, 0L, 999999999L);
+      long micros = TimeUnit.NANOSECONDS.toMicros(nanos);
+      micros = isNegative ? -micros : micros;
+      return micros;
+    } else {
+      return 0;
+    }
+  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index c125ba5..2454c7d 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -201,7 +201,7 @@ public class CalendarIntervalSuite {
     assertEquals(fromSingleUnitString("day", input), i);
 
     input = "1999.38888";
-    i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
+    i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 388 * MICROS_PER_MILLI + 880);
     assertEquals(fromSingleUnitString("second", input), i);
 
     try {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 1eec9e7..2bc41da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -608,7 +609,19 @@ class ExpressionParserSuite extends PlanTest {
 
     // Hive nanosecond notation.
     assertEqual("interval 13.123456789 seconds", intervalLiteral("second", "13.123456789"))
-    assertEqual("interval -13.123456789 second", intervalLiteral("second", "-13.123456789"))
+    assertEqual(
+      "interval -13.123456789 second",
+      Literal(new CalendarInterval(
+        0,
+        -13 * DateTimeUtils.MICROS_PER_SECOND - 123 * DateTimeUtils.MICROS_PER_MILLIS - 456)))
+    assertEqual(
+      "interval 13.123456 second",
+      Literal(new CalendarInterval(
+        0,
+        13 * DateTimeUtils.MICROS_PER_SECOND + 123 * DateTimeUtils.MICROS_PER_MILLIS + 456)))
+    assertEqual(
+      "interval 1.001 second",
+      Literal(CalendarInterval.fromString("interval 1 second 1 millisecond")))
 
     // Non Existing unit
     intercept("interval 10 nanoseconds", "No interval can be constructed")
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 7f30161..c3f55e1 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -323,9 +323,9 @@ select timestamp '2016-33-11 20:54:00.000'
 -- !query 34
 select interval 13.123456789 seconds, interval -13.123456789 second
 -- !query 34 schema
-struct<interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval,interval -12 seconds -876 milliseconds -544 microseconds:calendarinterval>
+struct<interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval,interval -13 seconds -123 milliseconds -456 microseconds:calendarinterval>
 -- !query 34 output
-interval 13 seconds 123 milliseconds 456 microseconds	interval -12 seconds -876 milliseconds -544 microseconds
+interval 13 seconds 123 milliseconds 456 microseconds	interval -13 seconds -123 milliseconds -456 microseconds
 
 
 -- !query 35


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org