You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/03 13:31:27 UTC
[spark] branch branch-3.0 updated: [SPARK-31896][SQL] Handle am-pm
timestamp parsing when hour is missing
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 47edae9 [SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing
47edae9 is described below
commit 47edae9d848d745d0aef99cfd893d055c8fc6457
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Wed Jun 3 13:30:22 2020 +0000
[SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing
### What changes were proposed in this pull request?
This PR set the hour to 12/0 when the AMPM_OF_DAY field exists
### Why are the changes needed?
When the hour is absent but the am-pm is present, the time is incorrect for pm
### Does this PR introduce _any_ user-facing change?
yes, the change is user-facing but to change back to 2.4 to keep backward compatibility
e.g.
```sql
spark-sql> select to_timestamp('33:33 PM', 'mm:ss a');
1970-01-01 12:33:33
spark-sql> select to_timestamp('33:33 AM', 'mm:ss a');
1970-01-01 00:33:33
```
otherwise, the results are all `1970-01-01 00:33:33`
### How was this patch tested?
add unit tests
Closes #28713 from yaooqinn/SPARK-31896.
Authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit afcc14c6d27f9e0bd113e0d86b64dc6fa4eed551)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../catalyst/util/DateTimeFormatterHelper.scala | 8 ++++++++
.../expressions/DateExpressionsSuite.scala | 10 +++++++++
.../spark/sql/util/TimestampFormatterSuite.scala | 24 +++++++++++++++++++---
3 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index ffa7cd4..eeb56aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
accessor.get(ChronoField.HOUR_OF_DAY)
} else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
// When we reach here, it means am/pm is not specified. Here we assume it's am.
+ // All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
+ // be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
accessor.get(ChronoField.HOUR_OF_AMPM)
+ } else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
+ accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
+ // When reach here, the `hour` part is missing, and PM is specified.
+ // None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
+ // specified
+ 12
} else {
0
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index f3b9d6e..dcb4759 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1176,4 +1176,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkNullify(l)
}
}
+
+
+ test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
+ checkEvaluation(
+ new ParseToTimestamp(Literal("PM"), Literal("a")).child,
+ Timestamp.valueOf("1970-01-01 12:00:00.0"))
+ checkEvaluation(
+ new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
+ Timestamp.valueOf("1970-01-01 12:11:11.0"))
+ }
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 9870d5e..efd97a4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -386,9 +386,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}
test("missing am/pm field") {
- val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
- val micros = formatter.parse("2009 11:30:01")
- assert(micros === date(2009, 1, 1, 11, 30, 1))
+ Seq("HH", "hh", "KK", "kk").foreach { hour =>
+ val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
+ val micros = formatter.parse("2009 11:30:01")
+ assert(micros === date(2009, 1, 1, 11, 30, 1))
+ }
}
test("missing time fields") {
@@ -397,6 +399,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
assert(micros === date(2009, 1, 1, 11))
}
+ test("missing hour field") {
+ val f1 = TimestampFormatter("mm:ss a", UTC)
+ val t1 = f1.parse("30:01 PM")
+ assert(t1 === date(1970, 1, 1, 12, 30, 1))
+ val t2 = f1.parse("30:01 AM")
+ assert(t2 === date(1970, 1, 1, 0, 30, 1))
+ val f2 = TimestampFormatter("mm:ss", UTC)
+ val t3 = f2.parse("30:01")
+ assert(t3 === date(1970, 1, 1, 0, 30, 1))
+ val f3 = TimestampFormatter("a", UTC)
+ val t4 = f3.parse("PM")
+ assert(t4 === date(1970, 1, 1, 12))
+ val t5 = f3.parse("AM")
+ assert(t5 === date(1970))
+ }
+
test("explicitly forbidden datetime patterns") {
// not support by the legacy one too
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org