You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/11 18:16:22 UTC
[arrow] branch master updated: ARROW-14810 [R] Implement bindings for lubridate's `date_decimal()` and `decimal_date()`
This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 80bba5cbde ARROW-14810 [R] Implement bindings for lubridate's `date_decimal()` and `decimal_date()`
80bba5cbde is described below
commit 80bba5cbdef77e809a7b9bfec36eb5d6a61f0b5d
Author: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
AuthorDate: Mon Apr 11 13:16:13 2022 -0500
ARROW-14810 [R] Implement bindings for lubridate's `date_decimal()` and `decimal_date()`
This would allow the following operations:
``` r
library(dplyr, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
library(arrow, warn.conflicts = FALSE)
test_df <- tibble(
a = c(2007.38998954347, 1970.77732069883, 2020.96061799722,
2009.43465948477, 1975.71251467871, NA),
b = as.POSIXct(
c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06",
"2009-06-08 15:37:01", "1975-09-18 01:37:42", NA)
)
)
test_df %>%
mutate(
decimal_date_from_date = decimal_date(b),
date_from_decimal = date_decimal(a)
)
#> # A tibble: 6 × 4
#> a b decimal_date_from_date date_from_decimal
#> <dbl> <dttm> <dbl> <dttm>
#> 1 2007. 2007-05-23 08:18:30 2007. 2007-05-23 08:18:30
#> 2 1971. 1970-10-11 17:19:45 1971. 1970-10-11 17:19:45
#> 3 2021. 2020-12-17 14:04:06 2021. 2020-12-17 14:04:06
#> 4 2009. 2009-06-08 15:37:01 2009. 2009-06-08 15:37:01
#> 5 1976. 1975-09-18 01:37:42 1976. 1975-09-18 01:37:42
#> 6 NA NA NA NA
test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_date = decimal_date(b),
date_from_decimal = date_decimal(a)
) %>%
collect()
#> # A tibble: 6 × 4
#> a b decimal_date_from_date date_from_decimal
#> <dbl> <dttm> <dbl> <dttm>
#> 1 2007. 2007-05-23 08:18:30 2007. 2007-05-23 08:18:30
#> 2 1971. 1970-10-11 17:19:45 1971. 1970-10-11 17:19:45
#> 3 2021. 2020-12-17 14:04:06 2021. 2020-12-17 14:04:06
#> 4 2009. 2009-06-08 15:37:01 2009. 2009-06-08 15:37:01
#> 5 1976. 1975-09-18 01:37:42 1976. 1975-09-18 01:37:42
#> 6 NA NA NA NA
```
<sup>Created on 2022-03-28 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup>
Closes #12707 from dragosmg/decimal_dates
Authored-by: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
Signed-off-by: Jonathan Keane <jk...@gmail.com>
---
r/NEWS.md | 7 +++--
r/R/dplyr-funcs-datetime.R | 42 ++++++++++++++++++++++++++++
r/tests/testthat/test-dplyr-funcs-datetime.R | 31 +++++++++++++++++++-
3 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/r/NEWS.md b/r/NEWS.md
index 0a7d30d2a3..1a1f198e0f 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -22,10 +22,11 @@
* `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`.
* `lubridate`:
* component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs.
- * `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
+ * Added `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
+ * Added `decimal_date()` and `date_decimal()`
* date-time functionality:
- * `difftime` and `as.difftime()`
- * `as.Date()` to convert to date
+ * Added `difftime` and `as.difftime()`
+ * Added `as.Date()` to convert to date
# arrow 7.0.0
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 754d02a436..1ca485f56e 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -270,6 +270,20 @@ register_bindings_duration <- function() {
time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp(timezone = "UTC")))
}
+ # if time1 or time2 are timestamps they cannot be expressed in "s" /seconds
+ # otherwise they cannot be added subtracted with durations
+ # TODO delete the casting to "us" once
+ # https://issues.apache.org/jira/browse/ARROW-16060 is solved
+ if (inherits(time1, "Expression") &&
+ time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) {
+ time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp("us")))
+ }
+
+ if (inherits(time2, "Expression") &&
+ time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) {
+ time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp("us")))
+ }
+
# we need to go build the subtract expression instead of `time1 - time2` to
# prevent complaints when we try to subtract an R object from an Expression
subtract_output <- build_expr("-", time1, time2)
@@ -309,6 +323,34 @@ register_bindings_duration <- function() {
build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
})
+ register_binding("decimal_date", function(date) {
+ y <- build_expr("year", date)
+ start <- call_binding("make_datetime", year = y, tz = "UTC")
+ sofar <- call_binding("difftime", date, start, units = "secs")
+ total <- call_binding(
+ "if_else",
+ build_expr("is_leap_year", date),
+ Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
+ Expression$scalar(31536000L) # number of seconds in a regular year (365 days)
+ )
+ y + sofar$cast(int64()) / total
+ })
+ register_binding("date_decimal", function(decimal, tz = "UTC") {
+ y <- build_expr("floor", decimal)
+
+ start <- call_binding("make_datetime", year = y, tz = tz)
+ seconds <- call_binding(
+ "if_else",
+ build_expr("is_leap_year", start),
+ Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
+ Expression$scalar(31536000L) # number of seconds in a regular year (365 days)
+ )
+
+ fraction <- decimal - y
+ delta <- build_expr("floor", seconds * fraction)
+ delta <- delta$cast(int64())
+ start + delta$cast(duration("s"))
+ })
}
binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index c901742f65..b9277c08c4 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1145,7 +1145,7 @@ test_that("difftime works correctly", {
.input %>%
mutate(
secs2 = difftime(
- as.POSIXct("2022-03-07", tz = "Europe/Bucharest"),
+ as.POSIXct("2022-03-07", tz = "Pacific/Marquesas"),
time1,
units = "secs"
)
@@ -1224,3 +1224,32 @@ test_that("as.difftime()", {
collect()
)
})
+
+test_that("`decimal_date()` and `date_decimal()`", {
+ test_df <- tibble(
+ a = c(2007.38998954347, 1970.77732069883, 2020.96061799722,
+ 2009.43465948477, 1975.71251467871, NA),
+ b = as.POSIXct(
+ c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06",
+ "2009-06-08 15:37:01", "1975-09-18 01:37:42", NA)
+ ),
+ c = as.Date(
+ c("2007-05-23", "1970-10-11", "2020-12-17", "2009-06-08", "1975-09-18", NA)
+ )
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ mutate(
+ decimal_date_from_POSIXct = decimal_date(b),
+ decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")),
+ decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")),
+ decimal_date_from_date = decimal_date(c),
+ date_from_decimal = date_decimal(a),
+ date_from_decimal_r_obj = date_decimal(2022.178)
+ ) %>%
+ collect(),
+ test_df,
+ ignore_attr = "tzone"
+ )
+})