You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/12/17 00:11:01 UTC

[GitHub] [arrow] nealrichardson opened a new pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

nealrichardson opened a new pull request #8947:
URL: https://github.com/apache/arrow/pull/8947


   Based on ARROW-10322


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#issuecomment-747128831


   https://issues.apache.org/jira/browse/ARROW-9187


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] nealrichardson commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
nealrichardson commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r549807276



##########
File path: r/R/expression.R
##########
@@ -91,9 +122,20 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "<=" = "less_equal",
   "&" = "and_kleene",
   "|" = "or_kleene",
-  "%in%" = "is_in_meta_binary"
+  "+" = "add_checked",
+  "-" = "subtract_checked",
+  "*" = "multiply_checked",
+  "/" = "divide_checked",
+  "%/%" = "divide_checked",
+  "%in%" = "is_in_meta_binary",
+  "%%" = "divide_checked"

Review comment:
       maybe add a comment that we don't actually use "divide_checked" with `%%`

##########
File path: r/R/expression.R
##########
@@ -173,84 +205,84 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     ToString = function() dataset___expr__ToString(self)
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+    } else if (.Generic == "%/%") {
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+      return(Expression$create("cast", Expression$create(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+    } else if (.Generic == "%%") {
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.

Review comment:
       That should work: `Ops.Expression` should dispatch correctly because e1 and e2 are both Expressions. What happens when you do it?

##########
File path: r/R/expression.R
##########
@@ -91,9 +122,20 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   "<=" = "less_equal",
   "&" = "and_kleene",
   "|" = "or_kleene",
-  "%in%" = "is_in_meta_binary"
+  "+" = "add_checked",
+  "-" = "subtract_checked",
+  "*" = "multiply_checked",
+  "/" = "divide_checked",
+  "%/%" = "divide_checked",
+  "%in%" = "is_in_meta_binary",
+  "%%" = "divide_checked"
 )
 
+
+# ‘"^"’

Review comment:
       I'd move this up into the map above (commented out of course) and add next to it the JIRA number you create

##########
File path: r/tests/testthat/test-compute-arith.R
##########
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# TODO:

Review comment:
       Are these TODOs done? Can you delete this?

##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -133,6 +133,42 @@ test_that("filtering with expression", {
   )
 })
 
+test_that("filtering with arithmetic", {
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl %/% 2 > 3) %>%

Review comment:
       All these test are with `dbl`; should we add some with `int` or other columns? Or do you think the other types are better tested elsewhere (test-compute-arith.R) and this is really just testing the dplyr NSE?

##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -133,6 +133,42 @@ test_that("filtering with expression", {
   )
 })
 
+test_that("filtering with arithmetic", {
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl %/% 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl,
+    # TODO: why are record batched versions problematic?

Review comment:
       We should resolve this

##########
File path: r/R/expression.R
##########
@@ -173,84 +205,84 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     ToString = function() dataset___expr__ToString(self)
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))

Review comment:
       You could define a `cast` method on Expression, `function(to_type, ...)`, and then you could rewrite these as `e1 <- e1$cast(float64())`, which I think reads nicer 

##########
File path: r/tests/testthat/test-compute-arith.R
##########
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# TODO:
+# * More tests for edge cases, esp. with division; add test helpers here?
+# * Is there a better "autocasting" solution? See what rules C++ Datasets do
+# * test-dplyr tests (Added one addition, and one summarize, but check to see if
+# we can make summarize route through arrow need more?)
+# * then, dataset tests, special casing for division
+
+test_that("Addition", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_type_equal(a, int32())
+  expect_type_equal(a + 4, int32())
+  expect_equal(a + 4, Array$create(c(5:8, NA_integer_)))
+  expect_identical(as.vector(a + 4), c(5:8, NA_integer_))
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+
+  # overflow errors — this is slightly different from R's `NA` coercion when
+  # overflowing, but better than the alternative of silently restarting
+  casted <- a$cast(int8())
+  expect_error(casted + 257)
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this")

Review comment:
       From what you showed me in the dataset cpp code, the implicit casting it does (cast scalar to type of the column) also wouldn't promote as expected here. Might be worth making a dataset test that shows that, skipping it, and reporting a JIRA for Ben.

##########
File path: r/R/expression.R
##########
@@ -173,84 +205,84 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     ToString = function() dataset___expr__ToString(self)
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+    } else if (.Generic == "%/%") {
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+      return(Expression$create("cast", Expression$create(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))

Review comment:
       ```suggestion
         # In R, integer division works like floor(float division)
         out <- build_dataset_expression("/", e1, e2)
         return(out$cast(int32(), allow_float_truncate = TRUE))
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jonkeane commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
jonkeane commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r550266972



##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -133,6 +133,42 @@ test_that("filtering with expression", {
   )
 })
 
+test_that("filtering with arithmetic", {
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl %/% 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl,
+    # TODO: why are record batched versions problematic?

Review comment:
       _turns out_ this works in neither tables nor record batches (I misunderstood the control flow of `expect_dplyr_equal()` that specifying `skip_record_batch` would actually skip both record batch and table tests. I've updated `expect_dplyr_equal()` to prevent that mistake/misunderstanding in the future.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jonkeane commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
jonkeane commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r549861998



##########
File path: r/R/expression.R
##########
@@ -173,84 +205,84 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     ToString = function() dataset___expr__ToString(self)
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+    } else if (.Generic == "%/%") {
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))
+      e2 <- Expression$create("cast", e2, options = list(to_type = float64()))
+      return(Expression$create("cast", Expression$create(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+    } else if (.Generic == "%%") {
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.

Review comment:
       Ah that works fine (though casting all of the operands to be right here doesn't quite work (the previous hack didn't work in this case, but I failed to have a test).
   
   With array expressions, I get "cannot add bindings to a locked environment" (I'm pushing a commit that will have this in a comment in a sec)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] nealrichardson commented on pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
nealrichardson commented on pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#issuecomment-755762668


   Rebase wasn't clean so I opened #9117 with the commits cherry-picked.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] nealrichardson closed pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
nealrichardson closed pull request #8947:
URL: https://github.com/apache/arrow/pull/8947


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jonkeane commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
jonkeane commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r549834792



##########
File path: r/R/expression.R
##########
@@ -173,84 +205,84 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     ToString = function() dataset___expr__ToString(self)
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- Expression$create("cast", e1, options = list(to_type = float64()))

Review comment:
       Oh, right, of course. That's nice




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jonkeane commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
jonkeane commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r550505455



##########
File path: r/tests/testthat/test-compute-arith.R
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Addition", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_type_equal(a, int32())
+  expect_type_equal(a + 4, int32())
+  expect_equal(a + 4, Array$create(c(5:8, NA_integer_)))
+  expect_identical(as.vector(a + 4), c(5:8, NA_integer_))
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+
+  # overflow errors — this is slightly different from R's `NA` coercion when
+  # overflowing, but better than the alternative of silently restarting
+  casted <- a$cast(int8())
+  expect_error(casted + 257)
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_type_equal(a + 4.1, float64())
+  expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
+})
+
+test_that("Subtraction", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a - 3, Array$create(c(-2:1, NA_integer_)))
+})
+
+test_that("Multiplication", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a * 2, Array$create(c(1:4 * 2L, NA_integer_)))
+})
+
+test_that("Division", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+  expect_equal(a / 2 / 2, Array$create(c(1:4 / 2 / 2, NA_real_)))
+  expect_equal(a %/% 2 %/% 2, Array$create(c(0L, 0L, 0L, 1L, NA_integer_)))
+
+  b <- a$cast(float64())
+  expect_equal(b / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(b %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+
+  # the behavior of %/% matches R's (i.e. the integer of the quotient, not
+  # simply dividing two integers)
+  expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
+  # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
+  # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
+
+  expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
+
+  expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
+})
+
+test_that("Dates casting", {
+  a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
+})
+
+test_that("Datetimes", {
+  a <- Array$create(c(Sys.time() + 1:4, NA_integer_))
+  b <- Scalar$create(Sys.time())
+  result <- a - b
+  expect_is(result$type, "DataType")
+  expect_identical(result$type$ToString(), "duration[us]")

Review comment:
       Ok, I've made ARROW-11090 — I'm honestly not sure these tests are even worth keeping + skipping with that since we will almost certainly make better support for durations (and other time-related types that don't have as much support) arrow > R at that point.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] nealrichardson commented on a change in pull request #8947: ARROW-9187: [R] Add bindings for arithmetic kernels

Posted by GitBox <gi...@apache.org>.
nealrichardson commented on a change in pull request #8947:
URL: https://github.com/apache/arrow/pull/8947#discussion_r550293049



##########
File path: r/R/expression.R
##########
@@ -59,6 +59,44 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   } else {
     e1 <- .wrap_arrow(e1, .Generic, e2$type)
     e2 <- .wrap_arrow(e2, .Generic, e1$type)
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- e1$cast(float64())

Review comment:
       Don't `$cast()` here because that evaluates. This should still be `array_expression(...)`, though you could write a `cast_array_expression()` helper that is like `Expression$cast()` if you want.

##########
File path: r/R/expression.R
##########
@@ -153,104 +200,82 @@ print.array_expression <- function(x, ...) {
 #' `Expression$field_ref(name)` is used to construct an `Expression` which
 #' evaluates to the named column in the `Dataset` against which it is evaluated.
 #'
-#' `Expression$compare(OP, e1, e2)` takes two `Expression` operands, constructing
-#' an `Expression` which will evaluate these operands then compare them with the
-#' relation specified by OP (e.g. "==", "!=", ">", etc.) For example, to filter
-#' down to rows where the column named "alpha" is less than 5:
-#' `Expression$compare("<", Expression$field_ref("alpha"), Expression$scalar(5))`
-#'
-#' `Expression$and(e1, e2)`, `Expression$or(e1, e2)`, and `Expression$not(e1)`
-#' construct an `Expression` combining their arguments with Boolean operators.
-#'
-#' `Expression$is_valid(x)` is essentially (an inversion of) `is.na()` for `Expression`s.
-#'
-#' `Expression$in_(x, set)` evaluates x and returns whether or not it is a member of the set.
+#' `Expression$create(function_name, ..., options)` builds a function-call
+#' `Expression` containing one or more `Expression`s.
 #' @name Expression
 #' @rdname Expression
 #' @export
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
-    ToString = function() dataset___expr__ToString(self)
+    ToString = function() dataset___expr__ToString(self),
+    cast = function(to_type, ...) {
+      Expression$create("cast", self, options = list(to_type = to_type, ...))
+    }
   )
 )
-
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  dataset___expr__call(function_name, args, options)
+}
 Expression$field_ref <- function(name) {
-  assert_is(name, "character")
-  assert_that(length(name) == 1)
+  assert_that(is.string(name))
   dataset___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
   dataset___expr__scalar(Scalar$create(x))
 }
-Expression$compare <- function(OP, e1, e2) {
-  comp_func <- comparison_function_map[[OP]]
-  if (is.null(comp_func)) {
-    stop(OP, " is not a supported comparison function", call. = FALSE)
-  }
-  comp_func(e1, e2)
-}
 
-comparison_function_map <- list(
-  "==" = dataset___expr__equal,
-  "!=" = dataset___expr__not_equal,
-  ">" = dataset___expr__greater,
-  ">=" = dataset___expr__greater_equal,
-  "<" = dataset___expr__less,
-  "<=" = dataset___expr__less_equal
-)
-Expression$in_ <- function(x, set) {
-  dataset___expr__in(x, Array$create(set))
-}
-Expression$and <- function(e1, e2) {
-  dataset___expr__and(e1, e2)
-}
-Expression$or <- function(e1, e2) {
-  dataset___expr__or(e1, e2)
-}
-Expression$not <- function(e1) {
-  dataset___expr__not(e1)
-}
-Expression$is_valid <- function(e1) {
-  dataset___expr__is_valid(e1)
+build_dataset_expression <- function(.Generic, e1, e2, ...) {
+  if (.Generic %in% names(.unary_function_map)) {
+    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  } else if (.Generic == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", e1,
+      options = list(
+        value_set = Array$create(e2),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    if (!inherits(e1, "Expression")) {
+      e1 <- Expression$scalar(e1)
+    }
+    if (!inherits(e2, "Expression")) {
+      e2 <- Expression$scalar(e2)
+    }
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
+    } else if (.Generic == "%/%") {
+      # In R, integer division works like floor(float division)
+      out <- build_dataset_expression("/", e1, e2)
+      return(out$cast(int32(), allow_float_truncate = TRUE))
+    } else if (.Generic == "%%") {
+      # TODO: need to do something with types to ensure that e2 is compatible

Review comment:
       Is this TODO valid?

##########
File path: r/R/expression.R
##########
@@ -59,6 +59,44 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   } else {
     e1 <- .wrap_arrow(e1, .Generic, e2$type)
     e2 <- .wrap_arrow(e2, .Generic, e1$type)
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
+    } else if (.Generic == "%/%") {
+      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+    } else if (.Generic == "%%") {
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.
+      # with return(e1 - e2 * (e1 %/% e2)) we get:
+      # "cannot add bindings to a locked environment"
+      out <- array_expression(
+        "subtract_checked", e1, array_expression(
+          "multiply_checked", e2, array_expression(
+            # this outer cast is to ensure that the result of this and the
+            # result of multiply are the same
+            "cast",
+            array_expression(
+              "cast",
+              array_expression(.binary_function_map[[.Generic]], e1, e2, ...),
+              options = list(to_type = int32(), allow_float_truncate = TRUE)
+            ),
+            options = list(to_type = e2$type, allow_float_truncate = TRUE)
+          )
+        )
+      )
+      return(out)
+    }
+
+    # hack to use subtract instead of subtract_checked for timestamps

Review comment:
       Why only subtract?
   
   And technically this could also be `else if` from above

##########
File path: r/R/expression.R
##########
@@ -59,6 +59,44 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
   } else {
     e1 <- .wrap_arrow(e1, .Generic, e2$type)
     e2 <- .wrap_arrow(e2, .Generic, e1$type)
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (.Generic == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      e1 <- e1$cast(float64())
+      e2 <- e2$cast(float64())
+    } else if (.Generic == "%/%") {
+      return(array_expression("cast", array_expression(.binary_function_map[[.Generic]], e1, e2, ...), options = list(to_type = int32(), allow_float_truncate = TRUE)))
+    } else if (.Generic == "%%") {
+      # {e1 - e2 * ( e1 %/% e2 )}
+      # TODO: there has to be a way to use the form ^^^ instead of this.
+      # with return(e1 - e2 * (e1 %/% e2)) we get:
+      # "cannot add bindings to a locked environment"

Review comment:
       I'll pull and see if I can figure out why this is problematic, though maybe it's solved by removing `$cast()` above

##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -133,6 +139,76 @@ test_that("filtering with expression", {
   )
 })
 
+test_that("filtering with arithmetic", {
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")

Review comment:
       Does this fail for the same reason that the comparisons below fail? `int / 2L` is an integer but the `array_expression` doesn't know the resulting type, so it can't cast `3` to it?
   
   How does it fail? Gracefully or not?
   
   We could work around this either by tracking/guessing the resulting type and sticking it in the array_expression object or by deferring the autocasting until the expressions are evaluated (so we'll know the type of `int > 2L` when we evaluate `that > 3`), but it's probably not worth it now. Maybe make a jira for us to come back to?

##########
File path: r/tests/testthat/test-dataset.R
##########
@@ -494,12 +494,104 @@ test_that("filter() on date32 columns", {
   )
 })
 
+test_that("filter() with expressions", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_is(ds$format, "ParquetFileFormat")
+  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_is(ds, "Dataset")
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  # check division's special casing.
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl / 2 > 3.5 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %/% 2L > 3 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %/% 2 > 3 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    )
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, dbl, int) %>%
+      filter(int %% 2L > 0 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
+      df2[1, c("chr", "dbl", "int")]
+    )
+  )
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")

Review comment:
       But datasets do have autocasting, so this should work, and if it doesn't, sounds like a different JIRA

##########
File path: r/tests/testthat/test-compute-arith.R
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Addition", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_type_equal(a, int32())
+  expect_type_equal(a + 4, int32())
+  expect_equal(a + 4, Array$create(c(5:8, NA_integer_)))
+  expect_identical(as.vector(a + 4), c(5:8, NA_integer_))
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+
+  # overflow errors — this is slightly different from R's `NA` coercion when
+  # overflowing, but better than the alternative of silently restarting
+  casted <- a$cast(int8())
+  expect_error(casted + 257)
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_type_equal(a + 4.1, float64())
+  expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
+})
+
+test_that("Subtraction", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a - 3, Array$create(c(-2:1, NA_integer_)))
+})
+
+test_that("Multiplication", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a * 2, Array$create(c(1:4 * 2L, NA_integer_)))
+})
+
+test_that("Division", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+  expect_equal(a / 2 / 2, Array$create(c(1:4 / 2 / 2, NA_real_)))
+  expect_equal(a %/% 2 %/% 2, Array$create(c(0L, 0L, 0L, 1L, NA_integer_)))
+
+  b <- a$cast(float64())
+  expect_equal(b / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(b %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+
+  # the behavior of %/% matches R's (i.e. the integer of the quotient, not
+  # simply dividing two integers)
+  expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
+  # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
+  # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
+
+  expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
+
+  expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
+})
+
+test_that("Dates casting", {
+  a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
+
+  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-11078")
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
+})
+
+test_that("Datetimes", {
+  a <- Array$create(c(Sys.time() + 1:4, NA_integer_))
+  b <- Scalar$create(Sys.time())
+  result <- a - b
+  expect_is(result$type, "DataType")
+  expect_identical(result$type$ToString(), "duration[us]")

Review comment:
       (1) I don't think this is right. timestamp + integer = timestamp, but because integer is cast to timestamp, you get duration. 
   (1b) Maybe you could support that by casting the integer to duration (same units as the timestamp), but this is a bunch of stuff that should probably get handled in C++
   (2) According to https://arrow.apache.org/docs/r/articles/arrow.html#arrow-to-r, we don't support converting duration types to R. 
   
   Given this, I think arithmetic with dates/times/timestamps should be punted to its own JIRA.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org