You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/11 17:15:18 UTC

[arrow] branch master updated: ARROW-14442: [R] fix behaviour when converting timestamps with "" as tzone

This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 633687c1e6 ARROW-14442: [R] fix behaviour when converting timestamps with "" as tzone
633687c1e6 is described below

commit 633687c1e6f940c78986af206a4bb2a478f25906
Author: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
AuthorDate: Mon Apr 11 12:15:04 2022 -0500

    ARROW-14442: [R] fix behaviour when converting timestamps with "" as tzone
    
    Closes #12240 from dragosmg/timestampts_missing_timezone
    
    Authored-by: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
    Signed-off-by: Jonathan Keane <jk...@gmail.com>
---
 r/src/type_infer.cpp                         |  6 ++++--
 r/tests/testthat/test-Array.R                | 27 ++++++++++++++++++++++++---
 r/tests/testthat/test-dplyr-funcs-datetime.R |  4 ----
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp
index 2568757aa2..75b1e85c42 100644
--- a/r/src/type_infer.cpp
+++ b/r/src/type_infer.cpp
@@ -72,7 +72,8 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<INTSXP>(SEXP x) {
   } else if (Rf_inherits(x, "POSIXct")) {
     auto tzone_sexp = Rf_getAttrib(x, symbols::tzone);
     if (Rf_isNull(tzone_sexp)) {
-      return timestamp(TimeUnit::MICRO);
+      auto systzone_sexp = cpp11::package("base")["Sys.timezone"];
+      return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0)));
     } else {
       return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0)));
     }
@@ -88,7 +89,8 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<REALSXP>(SEXP x) {
   if (Rf_inherits(x, "POSIXct")) {
     auto tzone_sexp = Rf_getAttrib(x, symbols::tzone);
     if (Rf_isNull(tzone_sexp)) {
-      return timestamp(TimeUnit::MICRO);
+      auto systzone_sexp = cpp11::package("base")["Sys.timezone"];
+      return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0)));
     } else {
       return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0)));
     }
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 15d6d79247..2f75efb3d6 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -260,11 +260,11 @@ test_that("array supports POSIXct (ARROW-3340)", {
   expect_array_roundtrip(times2, timestamp("us", "US/Eastern"))
 })
 
-test_that("array supports POSIXct without timezone", {
-  # Make sure timezone is not set
+test_that("array uses local timezone for POSIXct without timezone", {
   withr::with_envvar(c(TZ = ""), {
     times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
-    expect_array_roundtrip(times, timestamp("us", ""))
+    expect_equal(attr(times, "tzone"), NULL)
+    expect_array_roundtrip(times, timestamp("us", Sys.timezone()))
 
     # Also test the INTSXP code path
     skip("Ingest_POSIXct only implemented for REALSXP")
@@ -272,6 +272,27 @@ test_that("array supports POSIXct without timezone", {
     attributes(times_int) <- attributes(times)
     expect_array_roundtrip(times_int, timestamp("us", ""))
   })
+
+  # If there is a timezone set, we record that
+  withr::with_timezone("Pacific/Marquesas", {
+    times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
+    expect_equal(attr(times, "tzone"), "Pacific/Marquesas")
+    expect_array_roundtrip(times, timestamp("us", "Pacific/Marquesas"))
+
+    times_with_tz <- strptime(
+      "2019-02-03 12:34:56",
+      format = "%Y-%m-%d %H:%M:%S",
+      tz = "Asia/Katmandu") + 1:10
+    expect_equal(attr(times, "tzone"), "Asia/Katmandu")
+    expect_array_roundtrip(times, timestamp("us", "Asia/Katmandu"))
+  })
+
+  # and although the TZ is NULL in R, we set it to the Sys.timezone()
+  withr::with_timezone(NA, {
+    times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
+    expect_equal(attr(times, "tzone"), NULL)
+    expect_array_roundtrip(times, timestamp("us", Sys.timezone()))
+  })
 })
 
 test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 16e4958f1c..c901742f65 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -693,7 +693,6 @@ test_that("extract yday from date", {
 })
 
 test_that("leap_year mirror lubridate", {
-
   compare_dplyr_binding(
     .input %>%
       mutate(x = leap_year(date)) %>%
@@ -721,7 +720,6 @@ test_that("leap_year mirror lubridate", {
       ))
     )
   )
-
 })
 
 test_that("am/pm mirror lubridate", {
@@ -741,10 +739,8 @@ test_that("am/pm mirror lubridate", {
         ),
         format = "%Y-%m-%d %H:%M:%S"
       )
-
     )
   )
-
 })
 
 test_that("extract tz", {