You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/03 13:31:27 UTC

[spark] branch branch-3.0 updated: [SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 47edae9  [SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing
47edae9 is described below

commit 47edae9d848d745d0aef99cfd893d055c8fc6457
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Wed Jun 3 13:30:22 2020 +0000

    [SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing
    
    ### What changes were proposed in this pull request?
    
    This PR set the hour to 12/0 when the AMPM_OF_DAY field exists
    
    ### Why are the changes needed?
    
    When the hour is absent but the am-pm is present, the time is incorrect for pm
    
    ### Does this PR introduce _any_ user-facing change?
    yes, the change is user-facing but to change back to 2.4 to keep backward compatibility
    
    e.g.
    ```sql
    spark-sql> select to_timestamp('33:33 PM', 'mm:ss a');
    1970-01-01 12:33:33
    spark-sql> select to_timestamp('33:33 AM', 'mm:ss a');
    1970-01-01 00:33:33
    
    ```
    
    otherwise, the results are all `1970-01-01 00:33:33`
    
    ### How was this patch tested?
    
    add unit tests
    
    Closes #28713 from yaooqinn/SPARK-31896.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit afcc14c6d27f9e0bd113e0d86b64dc6fa4eed551)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../catalyst/util/DateTimeFormatterHelper.scala    |  8 ++++++++
 .../expressions/DateExpressionsSuite.scala         | 10 +++++++++
 .../spark/sql/util/TimestampFormatterSuite.scala   | 24 +++++++++++++++++++---
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index ffa7cd4..eeb56aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
       accessor.get(ChronoField.HOUR_OF_DAY)
     } else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
       // When we reach here, it means am/pm is not specified. Here we assume it's am.
+      // All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
+      // be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
       accessor.get(ChronoField.HOUR_OF_AMPM)
+    } else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
+      accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
+      // When reach here, the `hour` part is missing, and PM is specified.
+      // None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
+      // specified
+      12
     } else {
       0
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index f3b9d6e..dcb4759 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1176,4 +1176,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkNullify(l)
     }
   }
+
+
+  test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
+    checkEvaluation(
+      new ParseToTimestamp(Literal("PM"), Literal("a")).child,
+      Timestamp.valueOf("1970-01-01 12:00:00.0"))
+    checkEvaluation(
+      new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
+      Timestamp.valueOf("1970-01-01 12:11:11.0"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 9870d5e..efd97a4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -386,9 +386,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
   }
 
   test("missing am/pm field") {
-    val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
-    val micros = formatter.parse("2009 11:30:01")
-    assert(micros === date(2009, 1, 1, 11, 30, 1))
+    Seq("HH", "hh", "KK", "kk").foreach { hour =>
+      val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
+      val micros = formatter.parse("2009 11:30:01")
+      assert(micros === date(2009, 1, 1, 11, 30, 1))
+    }
   }
 
   test("missing time fields") {
@@ -397,6 +399,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
     assert(micros === date(2009, 1, 1, 11))
   }
 
+  test("missing hour field") {
+    val f1 = TimestampFormatter("mm:ss a", UTC)
+    val t1 = f1.parse("30:01 PM")
+    assert(t1 === date(1970, 1, 1, 12, 30, 1))
+    val t2 = f1.parse("30:01 AM")
+    assert(t2 === date(1970, 1, 1, 0, 30, 1))
+    val f2 = TimestampFormatter("mm:ss", UTC)
+    val t3 = f2.parse("30:01")
+    assert(t3 === date(1970, 1, 1, 0, 30, 1))
+    val f3 = TimestampFormatter("a", UTC)
+    val t4 = f3.parse("PM")
+    assert(t4 === date(1970, 1, 1, 12))
+    val t5 = f3.parse("AM")
+    assert(t5 === date(1970))
+  }
+
   test("explicitly forbidden datetime patterns") {
     // not support by the legacy one too
     Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org