You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2022/05/04 13:39:48 UTC

[arrow] branch master updated: ARROW-16255: [R] Reorganise the datetime bindings

This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 893faa741f ARROW-16255: [R] Reorganise the datetime bindings
893faa741f is described below

commit 893faa741f34ee450070503566dafb7291e24d9f
Author: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
AuthorDate: Wed May 4 13:39:36 2022 +0000

    ARROW-16255: [R] Reorganise the datetime bindings
    
    The purpose of this PR is to reorganise the datetime bindings.
    
    Why?
    * some are in files where one wouldn't think to look (e.g. in `R/dplyr-funcs-type.R`)
    * the are a bunch of somewhat scattered helper functions
    * some of the `register_bindings_...()` functions are too complex and trigger the cyclocomp lint
    
    What?
    * create a separate file for the datetime helpers, called `R/dplyr-datetime-helpers.R`
    * all bindings are in `dplyr-funcs-datetime.R` (with the exception of `leap_year`, which was moved to `expressions.R`)
    * all tests are in `test-dplyr-funcs-datetime.R`
    
    Results
    * cyclomatic complexity for the `dplyr-funcs-datetime.R` reduced to 21 (from 26)
    
    <details><summary>&#9658; More details</summary>
    
    <p>
    
    | Binding                   | Old registering function               | New registering function |
    |---                            |---                                                   |---                                       |
    | `strptime`               | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `strftime`                | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    |  `format_ISO8601`| `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `second`                 | `register_bindings_datetime()` |  `register_bindings_datetime_components()` |
    | `wday`                    | `register_bindings_datetime()` |  `register_bindings_datetime_components()` |
    | `week`                    | `register_bindings_datetime()` |  `register_bindings_datetime_components()` |
    | `month`                  | `register_bindings_datetime()` | `register_bindings_datetime_components()` |
    |  `is.Date`                | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `is.instant`             | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `is.timepoint`         | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    |  `is.POSIXct`          | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `leap_year`            | `register_bindings_datetime()` | |
    |  `am`                      | `register_bindings_datetime()` | `register_bindings_datetime_components()` |
    |  `pm`                      | `register_bindings_datetime()` | `register_bindings_datetime_components()` |
    | `tz`                         | `register_bindings_datetime()` | `register_bindings_datetime_components()` |
    | `semester`            | `register_bindings_datetime()` | `register_bindings_datetime_components()` |
    | `date`                     | `register_bindings_datetime()` | `register_bindings_datetime_utility()` |
    | `make_datetime`  | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `make_date`         | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `ISOdatetime`       | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `ISOdate`              | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `difftime`               | `register_bindings_duration()` | `register_bindings_duration()` |
    | `as.difftime`          | `register_bindings_duration()` | `register_bindings_duration()` |
    | `decimal_date`     | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `date_decimal`     | `register_bindings_duration()` | `register_bindings_datetime_conversion()` |
    | `duration_helpers_map_factory` | `register_bindings_duration_helpers()` | `register_bindings_duration_helpers()` |
    | `dpicoseconds`   | `register_bindings_duration_helpers()` | `register_bindings_duration_helpers()` |
    | `make_difftime ` | `register_bindings_difftime_constructors()` | `register_bindings_duration_constructor()` |
    | `as.Date`              | `register_bindings_type_cast()` | `register_bindings_datetime_conversion()` |
    | `as_date `            | `register_bindings_type_cast()` | `register_bindings_datetime_conversion()` |
    | `as_datetime `     | `register_bindings_type_cast()` | `register_bindings_datetime_conversion()` |
    
    </p>
    
    </details>
    
    Closes #13029 from dragosmg/datetime_bindings_reorg
    
    Authored-by: Dragoș Moldovan-Grünfeld <dr...@gmail.com>
    Signed-off-by: Nic Crane <th...@gmail.com>
---
 r/DESCRIPTION                                |   1 +
 r/R/dplyr-datetime-helpers.R                 | 156 ++++++++++++
 r/R/dplyr-funcs-datetime.R                   | 356 ++++++++++++---------------
 r/R/dplyr-funcs-type.R                       |  65 -----
 r/R/dplyr-funcs.R                            |   3 -
 r/R/expression.R                             |   1 +
 r/tests/testthat/test-dplyr-funcs-datetime.R | 155 ++++++++++++
 r/tests/testthat/test-dplyr-funcs-type.R     | 154 ------------
 8 files changed, 464 insertions(+), 427 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 8c751520a7..29f5ee2932 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -91,6 +91,7 @@ Collate:
     'dplyr-arrange.R'
     'dplyr-collect.R'
     'dplyr-count.R'
+    'dplyr-datetime-helpers.R'
     'dplyr-distinct.R'
     'dplyr-eval.R'
     'dplyr-filter.R'
diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
new file mode 100644
index 0000000000..9acf8b1843
--- /dev/null
+++ b/r/R/dplyr-datetime-helpers.R
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) {
+  if (tolower(Sys.info()[["sysname"]]) == "windows" & locale != "C") {
+    # MingW C++ std::locale only supports "C" and "POSIX"
+    stop(paste0("On Windows, time locales other than 'C' are not supported in Arrow. ",
+                "Consider setting `Sys.setlocale('LC_TIME', 'C')`"))
+  }
+  locale
+}
+
+.helpers_function_map <- list(
+  "dminutes" = list(60, "s"),
+  "dhours" = list(3600, "s"),
+  "ddays" = list(86400, "s"),
+  "dweeks" = list(604800, "s"),
+  "dmonths" = list(2629800, "s"),
+  "dyears" = list(31557600, "s"),
+  "dseconds" = list(1, "s"),
+  "dmilliseconds" = list(1, "ms"),
+  "dmicroseconds" = list(1, "us"),
+  "dnanoseconds" = list(1, "ns")
+)
+make_duration <- function(x, unit) {
+  x <- build_expr("cast", x, options = cast_options(to_type = int64()))
+  x$cast(duration(unit))
+}
+
+binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {
+  if (usetz) {
+    format <- paste(format, "%Z")
+  }
+
+  if (call_binding("is.POSIXct", x)) {
+    # the casting part might not be required once
+    # https://issues.apache.org/jira/browse/ARROW-14442 is solved
+    # TODO revisit the steps below once the PR for that issue is merged
+    if (tz == "" && x$type()$timezone() != "") {
+      tz <- x$type()$timezone()
+    } else if (tz == "") {
+      tz <- Sys.timezone()
+    }
+    x <- build_expr("cast", x, options = cast_options(to_type = timestamp(x$type()$unit(), tz)))
+  }
+
+  build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME")))
+}
+
+# this is a helper function used for creating a difftime / duration objects from
+# several of the accepted pieces (second, minute, hour, day, week)
+duration_from_chunks <- function(chunks) {
+  accepted_chunks <- c("second", "minute", "hour", "day", "week")
+  matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]
+
+  if (any(is.na(matched_chunks))) {
+    abort(
+      paste0(
+        "named `difftime` units other than: ",
+        oxford_paste(accepted_chunks, quote_symbol = "`"),
+        " not supported in Arrow. \nInvalid `difftime` parts: ",
+        oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`")
+      )
+    )
+  }
+
+  matched_chunks <- matched_chunks[!is.na(matched_chunks)]
+
+  chunks <- chunks[matched_chunks]
+  chunk_duration <- c(
+    "second" = 1L,
+    "minute" = 60L,
+    "hour" = 3600L,
+    "day" = 86400L,
+    "week" = 604800L
+  )
+
+  # transform the duration of each chunk in seconds and add everything together
+  duration <- 0
+  for (chunk in names(chunks)) {
+    duration <- duration + chunks[[chunk]] * chunk_duration[[chunk]]
+  }
+  duration
+}
+
+
+binding_as_date <- function(x,
+                            format = NULL,
+                            tryFormats = "%Y-%m-%d",
+                            origin = "1970-01-01") {
+
+  if (is.null(format) && length(tryFormats) > 1) {
+    abort("`as.Date()` with multiple `tryFormats` is not supported in Arrow")
+  }
+
+  if (call_binding("is.Date", x)) {
+    return(x)
+
+    # cast from character
+  } else if (call_binding("is.character", x)) {
+    x <- binding_as_date_character(x, format, tryFormats)
+
+    # cast from numeric
+  } else if (call_binding("is.numeric", x)) {
+    x <- binding_as_date_numeric(x, origin)
+  }
+
+  build_expr("cast", x, options = cast_options(to_type = date32()))
+}
+
+binding_as_date_character <- function(x,
+                                      format = NULL,
+                                      tryFormats = "%Y-%m-%d") {
+  format <- format %||% tryFormats[[1]]
+  # unit = 0L is the identifier for seconds in valid_time32_units
+  build_expr("strptime", x, options = list(format = format, unit = 0L))
+}
+
+binding_as_date_numeric <- function(x, origin = "1970-01-01") {
+
+  # Arrow does not support direct casting from double to date32(), but for
+  # integer-like values we can go via int32()
+  # https://issues.apache.org/jira/browse/ARROW-15798
+  # TODO revisit if arrow decides to support double -> date casting
+  if (!call_binding("is.integer", x)) {
+    x <- build_expr("cast", x, options = cast_options(to_type = int32()))
+  }
+
+  if (origin != "1970-01-01") {
+    delta_in_sec <- call_binding("difftime", origin, "1970-01-01")
+    # TODO: revisit once either of these issues is addressed:
+    #   https://issues.apache.org/jira/browse/ARROW-16253 (helper function for
+    #   casting from double to duration) or
+    #   https://issues.apache.org/jira/browse/ARROW-15862 (casting from int32
+    #   -> duration or double -> duration)
+    delta_in_sec <- build_expr("cast", delta_in_sec, options = cast_options(to_type = int64()))
+    delta_in_days <- (delta_in_sec / 86400L)$cast(int32())
+    x <- build_expr("+", x, delta_in_days)
+  }
+
+  x
+}
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 4f2517e8ef..4d7ea050a0 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -15,16 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) {
-  if (tolower(Sys.info()[["sysname"]]) == "windows" & locale != "C") {
-    # MingW C++ std::locale only supports "C" and "POSIX"
-    stop(paste0("On Windows, time locales other than 'C' are not supported in Arrow. ",
-                "Consider setting `Sys.setlocale('LC_TIME', 'C')`"))
-  }
-  locale
+# Split up into several register functions by category to reduce cyclomatic
+# complexity (linter)
+register_bindings_datetime <- function() {
+  register_bindings_datetime_utility()
+  register_bindings_datetime_components()
+  register_bindings_datetime_conversion()
+  register_bindings_duration()
+  register_bindings_duration_constructor()
+  register_bindings_duration_helpers()
 }
 
-register_bindings_datetime <- function() {
+register_bindings_datetime_utility <- function() {
   register_binding("strptime", function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL,
                                         unit = "ms") {
     # Arrow uses unit for time parsing, strptime() does not.
@@ -91,6 +93,29 @@ register_bindings_datetime <- function() {
     Expression$create("strftime", x, options = list(format = format, locale = "C"))
   })
 
+  register_binding("is.Date", function(x) {
+    inherits(x, "Date") ||
+      (inherits(x, "Expression") && x$type_id() %in% Type[c("DATE32", "DATE64")])
+  })
+
+  is_instant_binding <- function(x) {
+    inherits(x, c("POSIXt", "POSIXct", "POSIXlt", "Date")) ||
+      (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")])
+  }
+  register_binding("is.instant", is_instant_binding)
+  register_binding("is.timepoint", is_instant_binding)
+
+  register_binding("is.POSIXct", function(x) {
+    inherits(x, "POSIXct") ||
+      (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")])
+  })
+
+  register_binding("date", function(x) {
+    build_expr("cast", x, options = list(to_type = date32()))
+  })
+}
+
+register_bindings_datetime_components <- function() {
   register_binding("second", function(x) {
     Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
   })
@@ -149,27 +174,6 @@ register_bindings_datetime <- function() {
     build_expr("month", x)
   })
 
-  register_binding("is.Date", function(x) {
-    inherits(x, "Date") ||
-      (inherits(x, "Expression") && x$type_id() %in% Type[c("DATE32", "DATE64")])
-  })
-
-  is_instant_binding <- function(x) {
-    inherits(x, c("POSIXt", "POSIXct", "POSIXlt", "Date")) ||
-      (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")])
-  }
-  register_binding("is.instant", is_instant_binding)
-  register_binding("is.timepoint", is_instant_binding)
-
-  register_binding("is.POSIXct", function(x) {
-    inherits(x, "POSIXct") ||
-      (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")])
-  })
-
-  register_binding("leap_year", function(date) {
-    Expression$create("is_leap_year", date)
-  })
-
   register_binding("am", function(x) {
     hour <- Expression$create("hour", x)
     hour < 12
@@ -200,12 +204,9 @@ register_bindings_datetime <- function() {
       return(semester)
     }
   })
-  register_binding("date", function(x) {
-    build_expr("cast", x, options = list(to_type = date32()))
-  })
 }
 
-register_bindings_duration <- function() {
+register_bindings_datetime_conversion <- function() {
   register_binding("make_datetime", function(year = 1970L,
                                              month = 1L,
                                              day = 1L,
@@ -223,10 +224,12 @@ register_bindings_duration <- function() {
     x <- call_binding("str_c", year, month, day, hour, min, sec, sep = "-")
     build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L))
   })
+
   register_binding("make_date", function(year = 1970L, month = 1L, day = 1L) {
     x <- call_binding("make_datetime", year, month, day)
     build_expr("cast", x, options = cast_options(to_type = date32()))
   })
+
   register_binding("ISOdatetime", function(year,
                                            month,
                                            day,
@@ -245,6 +248,7 @@ register_bindings_duration <- function() {
 
     call_binding("make_datetime", year, month, day, hour, min, sec, tz)
   })
+
   register_binding("ISOdate", function(year,
                                        month,
                                        day,
@@ -254,6 +258,105 @@ register_bindings_duration <- function() {
                                        tz = "UTC") {
     call_binding("make_datetime", year, month, day, hour, min, sec, tz)
   })
+
+  register_binding("as.Date", function(x,
+                                       format = NULL,
+                                       tryFormats = "%Y-%m-%d",
+                                       origin = "1970-01-01",
+                                       tz = "UTC") {
+    # base::as.Date() and lubridate::as_date() differ in the way they use the
+    # `tz` argument. Both cast to the desired timezone, if present. The
+    # difference appears when the `tz` argument is not set: `as.Date()` uses the
+    # default value ("UTC"), while `as_date()` keeps the original attribute
+    # => we only cast when we want the behaviour of the base version or when
+    # `tz` is set (i.e. not NULL)
+    if (call_binding("is.POSIXct", x)) {
+      x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
+    }
+
+    binding_as_date(
+      x = x,
+      format = format,
+      tryFormats = tryFormats,
+      origin = origin
+    )
+  })
+
+  register_binding("as_date", function(x,
+                                       format = NULL,
+                                       origin = "1970-01-01",
+                                       tz = NULL) {
+    # base::as.Date() and lubridate::as_date() differ in the way they use the
+    # `tz` argument. Both cast to the desired timezone, if present. The
+    # difference appears when the `tz` argument is not set: `as.Date()` uses the
+    # default value ("UTC"), while `as_date()` keeps the original attribute
+    # => we only cast when we want the behaviour of the base version or when
+    # `tz` is set (i.e. not NULL)
+    if (call_binding("is.POSIXct", x) && !is.null(tz)) {
+      x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
+    }
+    binding_as_date(
+      x = x,
+      format = format,
+      origin = origin
+    )
+  })
+
+  register_binding("as_datetime", function(x,
+                                           origin = "1970-01-01",
+                                           tz = "UTC",
+                                           format = NULL) {
+    if (call_binding("is.numeric", x)) {
+      delta <- call_binding("difftime", origin, "1970-01-01")
+      delta <- build_expr("cast", delta, options = cast_options(to_type = int64()))
+      x <- build_expr("cast", x, options = cast_options(to_type = int64()))
+      x <- build_expr("+", x, delta)
+    }
+
+    if (call_binding("is.character", x) && !is.null(format)) {
+      # unit = 0L is the identifier for seconds in valid_time32_units
+      x <- build_expr(
+        "strptime",
+        x,
+        options = list(format = format, unit = 0L, error_is_null = TRUE)
+      )
+    }
+    output <- build_expr("cast", x, options = cast_options(to_type = timestamp()))
+    build_expr("assume_timezone", output, options = list(timezone = tz))
+  })
+
+  register_binding("decimal_date", function(date) {
+    y <- build_expr("year", date)
+    start <- call_binding("make_datetime", year = y, tz = "UTC")
+    sofar <- call_binding("difftime", date, start, units = "secs")
+    total <- call_binding(
+      "if_else",
+      build_expr("is_leap_year", date),
+      Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
+      Expression$scalar(31536000L)  # number of seconds in a regular year (365 days)
+    )
+    y + sofar$cast(int64()) / total
+  })
+
+  register_binding("date_decimal", function(decimal, tz = "UTC") {
+    y <- build_expr("floor", decimal)
+
+    start <- call_binding("make_datetime", year = y, tz = tz)
+    seconds <- call_binding(
+      "if_else",
+      build_expr("is_leap_year", start),
+      Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
+      Expression$scalar(31536000L)  # number of seconds in a regular year (365 days)
+    )
+
+    fraction <- decimal - y
+    delta <- build_expr("floor", seconds * fraction)
+    delta <- delta$cast(int64())
+    start + delta$cast(duration("s"))
+  })
+}
+
+register_bindings_duration <- function() {
   register_binding("difftime", function(time1,
                                         time2,
                                         tz,
@@ -329,75 +432,9 @@ register_bindings_duration <- function() {
 
     build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
   })
-  register_binding("decimal_date", function(date) {
-    y <- build_expr("year", date)
-    start <- call_binding("make_datetime", year = y, tz = "UTC")
-    sofar <- call_binding("difftime", date, start, units = "secs")
-    total <- call_binding(
-      "if_else",
-      build_expr("is_leap_year", date),
-      Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
-      Expression$scalar(31536000L)  # number of seconds in a regular year (365 days)
-    )
-    y + sofar$cast(int64()) / total
-  })
-  register_binding("date_decimal", function(decimal, tz = "UTC") {
-    y <- build_expr("floor", decimal)
-
-    start <- call_binding("make_datetime", year = y, tz = tz)
-    seconds <- call_binding(
-      "if_else",
-      build_expr("is_leap_year", start),
-      Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
-      Expression$scalar(31536000L)  # number of seconds in a regular year (365 days)
-    )
-
-    fraction <- decimal - y
-    delta <- build_expr("floor", seconds * fraction)
-    delta <- delta$cast(int64())
-    start + delta$cast(duration("s"))
-  })
-}
-
-.helpers_function_map <- list(
-  "dminutes" = list(60, "s"),
-  "dhours" = list(3600, "s"),
-  "ddays" = list(86400, "s"),
-  "dweeks" = list(604800, "s"),
-  "dmonths" = list(2629800, "s"),
-  "dyears" = list(31557600, "s"),
-  "dseconds" = list(1, "s"),
-  "dmilliseconds" = list(1, "ms"),
-  "dmicroseconds" = list(1, "us"),
-  "dnanoseconds" = list(1, "ns")
-)
-make_duration <- function(x, unit) {
-  x <- build_expr("cast", x, options = cast_options(to_type = int64()))
-  x$cast(duration(unit))
 }
-register_bindings_duration_helpers <- function() {
-  duration_helpers_map_factory <- function(value, unit) {
-    force(value)
-    force(unit)
-    function(x = 1) make_duration(x * value, unit)
-  }
 
-  for (name in names(.helpers_function_map)) {
-    register_binding(
-      name,
-      duration_helpers_map_factory(
-        .helpers_function_map[[name]][[1]],
-        .helpers_function_map[[name]][[2]]
-      )
-    )
-  }
-
-  register_binding("dpicoseconds", function(x = 1) {
-    abort("Duration in picoseconds not supported in Arrow.")
-  })
-}
-
-register_bindings_difftime_constructors <- function() {
+register_bindings_duration_constructor <- function() {
   register_binding("make_difftime", function(num = NULL,
                                              units = "secs",
                                              ...) {
@@ -427,115 +464,24 @@ register_bindings_difftime_constructors <- function() {
   })
 }
 
-binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {
-  if (usetz) {
-    format <- paste(format, "%Z")
-  }
-
-  if (call_binding("is.POSIXct", x)) {
-    # the casting part might not be required once
-    # https://issues.apache.org/jira/browse/ARROW-14442 is solved
-    # TODO revisit the steps below once the PR for that issue is merged
-    if (tz == "" && x$type()$timezone() != "") {
-      tz <- x$type()$timezone()
-    } else if (tz == "") {
-      tz <- Sys.timezone()
-    }
-    x <- build_expr("cast", x, options = cast_options(to_type = timestamp(x$type()$unit(), tz)))
+register_bindings_duration_helpers <- function() {
+  duration_helpers_map_factory <- function(value, unit) {
+    force(value)
+    force(unit)
+    function(x = 1) make_duration(x * value, unit)
   }
 
-  build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME")))
-}
-
-# this is a helper function used for creating a difftime / duration objects from
-# several of the accepted pieces (second, minute, hour, day, week)
-duration_from_chunks <- function(chunks) {
-  accepted_chunks <- c("second", "minute", "hour", "day", "week")
-  matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]
-
-  if (any(is.na(matched_chunks))) {
-    abort(
-      paste0(
-        "named `difftime` units other than: ",
-        oxford_paste(accepted_chunks, quote_symbol = "`"),
-        " not supported in Arrow. \nInvalid `difftime` parts: ",
-        oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`")
+  for (name in names(.helpers_function_map)) {
+    register_binding(
+      name,
+      duration_helpers_map_factory(
+        .helpers_function_map[[name]][[1]],
+        .helpers_function_map[[name]][[2]]
       )
     )
   }
 
-  matched_chunks <- matched_chunks[!is.na(matched_chunks)]
-
-  chunks <- chunks[matched_chunks]
-  chunk_duration <- c(
-    "second" = 1L,
-    "minute" = 60L,
-    "hour" = 3600L,
-    "day" = 86400L,
-    "week" = 604800L
-  )
-
-  # transform the duration of each chunk in seconds and add everything together
-  duration <- 0
-  for (chunk in names(chunks)) {
-    duration <- duration + chunks[[chunk]] * chunk_duration[[chunk]]
-  }
-  duration
-}
-
-binding_as_date <- function(x,
-                            format = NULL,
-                            tryFormats = "%Y-%m-%d",
-                            origin = "1970-01-01") {
-
-  if (is.null(format) && length(tryFormats) > 1) {
-    abort("`as.Date()` with multiple `tryFormats` is not supported in Arrow")
-  }
-
-  if (call_binding("is.Date", x)) {
-    return(x)
-
-    # cast from character
-  } else if (call_binding("is.character", x)) {
-    x <- binding_as_date_character(x, format, tryFormats)
-
-    # cast from numeric
-  } else if (call_binding("is.numeric", x)) {
-    x <- binding_as_date_numeric(x, origin)
-  }
-
-  build_expr("cast", x, options = cast_options(to_type = date32()))
-}
-
-binding_as_date_character <- function(x,
-                                      format = NULL,
-                                      tryFormats = "%Y-%m-%d") {
-  format <- format %||% tryFormats[[1]]
-  # unit = 0L is the identifier for seconds in valid_time32_units
-  build_expr("strptime", x, options = list(format = format, unit = 0L))
-}
-
-binding_as_date_numeric <- function(x, origin = "1970-01-01") {
-
-  # Arrow does not support direct casting from double to date32(), but for
-  # integer-like values we can go via int32()
-  # https://issues.apache.org/jira/browse/ARROW-15798
-  # TODO revisit if arrow decides to support double -> date casting
-  if (!call_binding("is.integer", x)) {
-    x <- build_expr("cast", x, options = cast_options(to_type = int32()))
-  }
-
-  if (origin != "1970-01-01") {
-    delta_in_sec <- call_binding("difftime", origin, "1970-01-01")
-    # TODO: revisit once either of these issues is addressed:
-    #   https://issues.apache.org/jira/browse/ARROW-16253 (helper function for
-    #   casting from double to duration) or
-    #   https://issues.apache.org/jira/browse/ARROW-15862 (casting from int32
-    #   -> duration or double -> duration)
-    delta_in_sec <- build_expr("cast", delta_in_sec, options = cast_options(to_type = int64()))
-    delta_in_days <- (delta_in_sec / 86400L)$cast(int32())
-    x <- build_expr("+", x, delta_in_days)
-  }
-
-  x
+  register_binding("dpicoseconds", function(x = 1) {
+    abort("Duration in picoseconds not supported in Arrow.")
+  })
 }
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index e3700cf35b..653719fa2c 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -77,71 +77,6 @@ register_bindings_type_cast <- function() {
   register_binding("as.numeric", function(x) {
     build_expr("cast", x, options = cast_options(to_type = float64()))
   })
-  register_binding("as.Date", function(x,
-                                       format = NULL,
-                                       tryFormats = "%Y-%m-%d",
-                                       origin = "1970-01-01",
-                                       tz = "UTC") {
-    # base::as.Date() and lubridate::as_date() differ in the way they use the
-    # `tz` argument. Both cast to the desired timezone, if present. The
-    # difference appears when the `tz` argument is not set: `as.Date()` uses the
-    # default value ("UTC"), while `as_date()` keeps the original attribute
-    # => we only cast when we want the behaviour of the base version or when
-    # `tz` is set (i.e. not NULL)
-    if (call_binding("is.POSIXct", x)) {
-      x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
-    }
-
-    binding_as_date(
-      x = x,
-      format = format,
-      tryFormats = tryFormats,
-      origin = origin
-    )
-  })
-
-  register_binding("as_date", function(x,
-                                       format = NULL,
-                                       origin = "1970-01-01",
-                                       tz = NULL) {
-    # base::as.Date() and lubridate::as_date() differ in the way they use the
-    # `tz` argument. Both cast to the desired timezone, if present. The
-    # difference appears when the `tz` argument is not set: `as.Date()` uses the
-    # default value ("UTC"), while `as_date()` keeps the original attribute
-    # => we only cast when we want the behaviour of the base version or when
-    # `tz` is set (i.e. not NULL)
-    if (call_binding("is.POSIXct", x) && !is.null(tz)) {
-      x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
-    }
-    binding_as_date(
-      x = x,
-      format = format,
-      origin = origin
-    )
-  })
-
-  register_binding("as_datetime", function(x,
-                                           origin = "1970-01-01",
-                                           tz = "UTC",
-                                           format = NULL) {
-    if (call_binding("is.numeric", x)) {
-      delta <- call_binding("difftime", origin, "1970-01-01")
-      delta <- build_expr("cast", delta, options = cast_options(to_type = int64()))
-      x <- build_expr("cast", x, options = cast_options(to_type = int64()))
-      x <- build_expr("+", x, delta)
-    }
-
-    if (call_binding("is.character", x) && !is.null(format)) {
-      # unit = 0L is the identifier for seconds in valid_time32_units
-      x <- build_expr(
-        "strptime",
-        x,
-        options = list(format = format, unit = 0L, error_is_null = TRUE)
-      )
-    }
-    output <- build_expr("cast", x, options = cast_options(to_type = timestamp()))
-    build_expr("assume_timezone", output, options = list(timezone = tz))
-  })
 
   register_binding("is", function(object, class2) {
     if (is.string(class2)) {
diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R
index c66ed04893..4d7cb3bc63 100644
--- a/r/R/dplyr-funcs.R
+++ b/r/R/dplyr-funcs.R
@@ -106,9 +106,6 @@ create_binding_cache <- function() {
   register_bindings_aggregate()
   register_bindings_conditional()
   register_bindings_datetime()
-  register_bindings_difftime_constructors()
-  register_bindings_duration()
-  register_bindings_duration_helpers()
   register_bindings_math()
   register_bindings_string()
   register_bindings_type()
diff --git a/r/R/expression.R b/r/R/expression.R
index eb37950c34..be43de01e1 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -75,6 +75,7 @@
   "mday" = "day",
   "yday" = "day_of_year",
   "year" = "year",
+  "leap_year" = "is_leap_year",
 
   # type conversion functions
   "as.factor" = "dictionary_encode"
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 47626a6cb1..a7afe4c5b9 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1466,3 +1466,158 @@ test_that("make_difftime()", {
     )
   )
 })
+
+test_that("`as.Date()` and `as_date()`", {
+  test_df <- tibble::tibble(
+    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Pacific/Marquesas"),
+    dt_europe = ymd_hms("2010-08-03 00:50:50", tz = "Europe/London"),
+    dt_utc = ymd_hms("2010-08-03 00:50:50"),
+    date_var = as.Date("2022-02-25"),
+    difference_date = ymd_hms("2010-08-03 00:50:50", tz = "Pacific/Marquesas"),
+    character_ymd_var = "2022-02-25 00:00:01",
+    character_ydm_var = "2022/25/02 00:00:01",
+    integer_var = 32L,
+    integerish_var = 32,
+    double_var = 34.56
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        date_dv1 = as.Date(date_var),
+        date_pv1 = as.Date(posixct_var),
+        date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"),
+        date_utc1 = as.Date(dt_utc),
+        date_europe1 = as.Date(dt_europe),
+        date_char_ymd1 = as.Date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
+        date_char_ydm1 = as.Date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
+        date_int1 = as.Date(integer_var, origin = "1970-01-01"),
+        date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"),
+        date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"),
+        date_dv2 = as_date(date_var),
+        date_pv2 = as_date(posixct_var),
+        date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"),
+        date_utc2 = as_date(dt_utc),
+        date_europe2 = as_date(dt_europe),
+        date_char_ymd2 = as_date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
+        date_char_ydm2 = as_date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
+        date_int2 = as_date(integer_var, origin = "1970-01-01"),
+        date_int_origin2 = as_date(integer_var, origin = "1970-01-03"),
+        date_integerish2 = as_date(integerish_var, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # we do not support multiple tryFormats
+  compare_dplyr_binding(
+    .input %>%
+      mutate(date_char_ymd = as.Date(character_ymd_var,
+                                     tryFormats = c("%Y-%m-%d", "%Y/%m/%d"))) %>%
+      collect(),
+    test_df,
+    warning = TRUE
+  )
+
+  # strptime does not support a partial format - testing an error surfaced from
+  # C++ (hence not testing the content of the error message)
+  # TODO revisit once - https://issues.apache.org/jira/browse/ARROW-15813
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_char_ymd = as_date(character_ymd_var)) %>%
+      collect()
+  )
+
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_char_ymd = as.Date(character_ymd_var)) %>%
+      collect(),
+    regexp = "Failed to parse string: '2022-02-25 00:00:01' as a scalar of type timestamp[s]",
+    fixed = TRUE
+  )
+
+
+  # we do not support as.Date() with double/ float (error surfaced from C++)
+  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-15798
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  # we do not support as_date with double/ float (error surfaced from C++)
+  # TODO: revisit after https://issues.apache.org/jira/browse/ARROW-15798
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(date_double = as_date(double_var, origin = "1970-01-01")) %>%
+      collect()
+  )
+
+  # difference between as.Date() and as_date():
+  #`as.Date()` ignores the `tzone` attribute and uses the value of the `tz` arg
+  # to `as.Date()`
+  # `as_date()` does the opposite: uses the tzone attribute of the POSIXct object
+  # passsed if`tz` is NULL
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        date_diff_lubridate = as_date(difference_date),
+        date_diff_base = as.Date(difference_date)
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("`as_datetime()`", {
+  test_df <- tibble(
+    date = as.Date(c("2022-03-22", "2021-07-30", NA)),
+    char_date = c("2022-03-22", "2021-07-30 14:32:47", NA),
+    char_date_non_iso = c("2022-22-03 12:34:56", "2021-30-07 14:32:47", NA),
+    int_date = c(10L, 25L, NA),
+    integerish_date = c(10, 25, NA),
+    double_date = c(10.1, 25.2, NA)
+  )
+
+  test_df %>%
+    arrow_table() %>%
+    mutate(
+      ddate = as_datetime(date),
+      dchar_date_no_tz = as_datetime(char_date),
+      dchar_date_non_iso = as_datetime(char_date_non_iso, format = "%Y-%d-%m %H:%M:%S"),
+      dint_date = as_datetime(int_date, origin = "1970-01-02"),
+      dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"),
+      dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01")
+    ) %>%
+    collect()
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ddate = as_datetime(date),
+        dchar_date_no_tz = as_datetime(char_date),
+        dchar_date_with_tz = as_datetime(char_date, tz = "Pacific/Marquesas"),
+        dint_date = as_datetime(int_date, origin = "1970-01-02"),
+        dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"),
+        dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01")
+      ) %>%
+      collect(),
+    test_df
+  )
+
+  # Arrow does not support conversion of double to date
+  # the below should error with an error message originating in the C++ code
+  expect_error(
+    test_df %>%
+      arrow_table() %>%
+      mutate(
+        ddouble_date = as_datetime(double_date)
+      ) %>%
+      collect(),
+    regexp = "Float value 10.1 was truncated converting to int64"
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R
index e4283e39b5..c1ea465fef 100644
--- a/r/tests/testthat/test-dplyr-funcs-type.R
+++ b/r/tests/testthat/test-dplyr-funcs-type.R
@@ -802,160 +802,6 @@ test_that("nested structs can be created from scalars and existing data frames",
 
   })
 
-test_that("`as.Date()` and `as_date()`", {
-  test_df <- tibble::tibble(
-    posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Pacific/Marquesas"),
-    dt_europe = ymd_hms("2010-08-03 00:50:50", tz = "Europe/London"),
-    dt_utc = ymd_hms("2010-08-03 00:50:50"),
-    date_var = as.Date("2022-02-25"),
-    difference_date = ymd_hms("2010-08-03 00:50:50", tz = "Pacific/Marquesas"),
-    character_ymd_var = "2022-02-25 00:00:01",
-    character_ydm_var = "2022/25/02 00:00:01",
-    integer_var = 32L,
-    integerish_var = 32,
-    double_var = 34.56
-  )
-
-  compare_dplyr_binding(
-    .input %>%
-      mutate(
-        date_dv1 = as.Date(date_var),
-        date_pv1 = as.Date(posixct_var),
-        date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"),
-        date_utc1 = as.Date(dt_utc),
-        date_europe1 = as.Date(dt_europe),
-        date_char_ymd1 = as.Date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
-        date_char_ydm1 = as.Date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
-        date_int1 = as.Date(integer_var, origin = "1970-01-01"),
-        date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"),
-        date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"),
-        date_dv2 = as_date(date_var),
-        date_pv2 = as_date(posixct_var),
-        date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"),
-        date_utc2 = as_date(dt_utc),
-        date_europe2 = as_date(dt_europe),
-        date_char_ymd2 = as_date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
-        date_char_ydm2 = as_date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
-        date_int2 = as_date(integer_var, origin = "1970-01-01"),
-        date_int_origin2 = as_date(integer_var, origin = "1970-01-03"),
-        date_integerish2 = as_date(integerish_var, origin = "1970-01-01")
-      ) %>%
-      collect(),
-    test_df
-  )
-
-  # we do not support multiple tryFormats
-  compare_dplyr_binding(
-    .input %>%
-      mutate(date_char_ymd = as.Date(character_ymd_var,
-                                     tryFormats = c("%Y-%m-%d", "%Y/%m/%d"))) %>%
-      collect(),
-    test_df,
-    warning = TRUE
-  )
-
-  # strptime does not support a partial format - testing an error surfaced from
-  # C++ (hence not testing the content of the error message)
-  # TODO revisit once - https://issues.apache.org/jira/browse/ARROW-15813
-  expect_error(
-    test_df %>%
-      arrow_table() %>%
-      mutate(date_char_ymd = as_date(character_ymd_var)) %>%
-      collect()
-  )
-
-  expect_error(
-    test_df %>%
-      arrow_table() %>%
-      mutate(date_char_ymd = as.Date(character_ymd_var)) %>%
-      collect(),
-    regexp = "Failed to parse string: '2022-02-25 00:00:01' as a scalar of type timestamp[s]",
-    fixed = TRUE
-  )
-
-  # we do not support as.Date() with double/ float (error surfaced from C++)
-  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-15798
-  expect_error(
-    test_df %>%
-      arrow_table() %>%
-      mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
-      collect()
-  )
-
-  # we do not support as_date with double/ float (error surfaced from C++)
-  # TODO: revisit after https://issues.apache.org/jira/browse/ARROW-15798
-  expect_error(
-    test_df %>%
-      arrow_table() %>%
-      mutate(date_double = as_date(double_var, origin = "1970-01-01")) %>%
-      collect()
-  )
-
-  # difference between as.Date() and as_date():
-  #`as.Date()` ignores the `tzone` attribute and uses the value of the `tz` arg
-  # to `as.Date()`
-  # `as_date()` does the opposite: uses the tzone attribute of the POSIXct object
-  # passsed if`tz` is NULL
-  compare_dplyr_binding(
-    .input %>%
-      transmute(
-        date_diff_lubridate = as_date(difference_date),
-        date_diff_base = as.Date(difference_date)
-      ) %>%
-      collect(),
-    test_df
-  )
-})
-
-test_that("`as_datetime()`", {
-  test_df <- tibble(
-    date = as.Date(c("2022-03-22", "2021-07-30", NA)),
-    char_date = c("2022-03-22", "2021-07-30 14:32:47", NA),
-    char_date_non_iso = c("2022-22-03 12:34:56", "2021-30-07 14:32:47", NA),
-    int_date = c(10L, 25L, NA),
-    integerish_date = c(10, 25, NA),
-    double_date = c(10.1, 25.2, NA)
-  )
-
-  test_df %>%
-    arrow_table() %>%
-    mutate(
-      ddate = as_datetime(date),
-      dchar_date_no_tz = as_datetime(char_date),
-      dchar_date_non_iso = as_datetime(char_date_non_iso, format = "%Y-%d-%m %H:%M:%S"),
-      dint_date = as_datetime(int_date, origin = "1970-01-02"),
-      dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"),
-      dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01")
-    ) %>%
-    collect()
-
-  compare_dplyr_binding(
-    .input %>%
-      mutate(
-        ddate = as_datetime(date),
-        dchar_date_no_tz = as_datetime(char_date),
-        dchar_date_with_tz = as_datetime(char_date, tz = "Pacific/Marquesas"),
-        dint_date = as_datetime(int_date, origin = "1970-01-02"),
-        dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"),
-        dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01")
-      ) %>%
-      collect(),
-    test_df
-  )
-
-  # Arrow does not support conversion of double to date
-  # the below should error with an error message originating in the C++ code
-  expect_error(
-    test_df %>%
-      arrow_table() %>%
-      mutate(
-        ddouble_date = as_datetime(double_date)
-      ) %>%
-      collect(),
-    regexp = "Float value 10.1 was truncated converting to int64"
-  )
-})
-
 test_that("format date/time", {
   # locale issues
   # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16399 is done