You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/09/16 00:02:11 UTC

[arrow] branch master updated: ARROW-9986: [Rust] allow to_timestamp to parse local times without fractional seconds

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f621fc  ARROW-9986: [Rust] allow to_timestamp to parse local times without fractional seconds
2f621fc is described below

commit 2f621fc3ecda6634ffe292f99f78122dca567ffd
Author: alamb <an...@nerdnetworks.org>
AuthorDate: Tue Sep 15 18:01:43 2020 -0600

    ARROW-9986: [Rust] allow to_timestamp to parse local times without fractional seconds
    
    As pointed out on @jhorstmann: https://github.com/apache/arrow/pull/8161#issuecomment-691464193
    
    > One (not directly related) issue I noticed while trying this out, is that the local patterns seem to require the millisecond part, while for utc timestamps with "Z" they are optional:
    
    ```
    > select to_timestamp('2020-09-12T10:30:00') from test limit 1;
    ArrowError(ExternalError(General("Error parsing \'2020-09-12T10:30:00\' as timestamp")))
    
    > select to_timestamp('2020-09-12T10:30:00Z') from test limit 1;
    +-------------------------------------------+
    | totimestamp(Utf8("2020-09-12T10:30:00Z")) |
    +-------------------------------------------+
    | 1599906600000000000                       |
    +-------------------------------------------+
    ```
    
    This PR allows parsing local timestamp patterns without the fractional seconds and tests for same
    
    Closes #8179 from alamb/alamb/ARROW-9986-fractional-secs
    
    Authored-by: alamb <an...@nerdnetworks.org>
    Signed-off-by: Andy Grove <an...@nvidia.com>
---
 .../src/physical_plan/datetime_expressions.rs      | 67 +++++++++++++++++-----
 1 file changed, 53 insertions(+), 14 deletions(-)

diff --git a/rust/datafusion/src/physical_plan/datetime_expressions.rs b/rust/datafusion/src/physical_plan/datetime_expressions.rs
index a031f16..86a02db 100644
--- a/rust/datafusion/src/physical_plan/datetime_expressions.rs
+++ b/rust/datafusion/src/physical_plan/datetime_expressions.rs
@@ -44,6 +44,7 @@ use chrono::{prelude::*, LocalResult};
 /// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
 /// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
 /// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
+/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
 //
 /// Internally, this function uses the `chrono` library for the
 /// datetime parsing
@@ -110,12 +111,26 @@ fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
         return naive_datetime_to_timestamp(s, ts);
     }
 
+    // without a timezone specifier as a local time, using T as a
+    // separator, no fractional seconds
+    // Example: 2020-09-08T13:42:29
+    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
+        return naive_datetime_to_timestamp(s, ts);
+    }
+
     // without a timezone specifier as a local time, using ' ' as a separator
     // Example: 2020-09-08 13:42:29.190855
     if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
         return naive_datetime_to_timestamp(s, ts);
     }
 
+    // without a timezone specifier as a local time, using ' ' as a
+    // separator, no fractional seconds
+    // Example: 2020-09-08 13:42:29
+    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
+        return naive_datetime_to_timestamp(s, ts);
+    }
+
     // Note we don't pass along the error message from the underlying
     // chrono parsing because we tried several different format
     // strings and we don't know which the user was trying to
@@ -242,38 +257,62 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn string_to_timestamp_no_timezone() -> Result<()> {
-        // This test is designed to succeed in regardless of the local
-        // timezone the test machine is running. Thus it is still
-        // somewhat suceptable to bugs in the use of chrono
-        let naive_date_time = NaiveDateTime::new(
-            NaiveDate::from_ymd(2020, 09, 08),
-            NaiveTime::from_hms_nano(13, 42, 29, 190855),
-        );
-
+    /// Interprets a naive_datetime (with no explicit timzone offset)
+    /// using the local timezone and returns the timestamp in UTC (0
+    /// offset)
+    fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
         // Note: Use chrono APIs that are different than
         // naive_datetime_to_timestamp to compute the utc offset to
         // try and double check the logic
-        let utc_offset_secs = match Local.offset_from_local_datetime(&naive_date_time) {
+        let utc_offset_secs = match Local.offset_from_local_datetime(&naive_datetime) {
             LocalResult::Single(local_offset) => {
                 local_offset.fix().local_minus_utc() as i64
             }
             _ => panic!("Unexpected failure converting to local datetime"),
         };
         let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
-        let expected_date_time = naive_date_time.timestamp_nanos() - utc_offset_nanos;
+        naive_datetime.timestamp_nanos() - utc_offset_nanos
+    }
+
+    #[test]
+    fn string_to_timestamp_no_timezone() -> Result<()> {
+        // This test is designed to succeed in regardless of the local
+        // timezone the test machine is running. Thus it is still
+        // somewhat suceptable to bugs in the use of chrono
+        let naive_datetime = NaiveDateTime::new(
+            NaiveDate::from_ymd(2020, 09, 08),
+            NaiveTime::from_hms_nano(13, 42, 29, 190855),
+        );
 
         // Ensure both T and ' ' variants work
         assert_eq!(
-            expected_date_time,
+            naive_datetime_to_timestamp(&naive_datetime),
             parse_timestamp("2020-09-08T13:42:29.190855")?
         );
 
         assert_eq!(
-            expected_date_time,
+            naive_datetime_to_timestamp(&naive_datetime),
             parse_timestamp("2020-09-08 13:42:29.190855")?
         );
+
+        // Also ensure that parsing timestamps with no fractional
+        // second part works as well
+        let naive_datetime_whole_secs = NaiveDateTime::new(
+            NaiveDate::from_ymd(2020, 09, 08),
+            NaiveTime::from_hms(13, 42, 29),
+        );
+
+        // Ensure both T and ' ' variants work
+        assert_eq!(
+            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
+            parse_timestamp("2020-09-08T13:42:29")?
+        );
+
+        assert_eq!(
+            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
+            parse_timestamp("2020-09-08 13:42:29")?
+        );
+
         Ok(())
     }