You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/06/27 17:40:03 UTC

[arrow] branch main updated: GH-36121: [R] Warn for `set_io_thread_count()` with `num_threads` < 2 (#36304)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4a93a37aea GH-36121: [R] Warn for `set_io_thread_count()` with `num_threads` < 2 (#36304)
4a93a37aea is described below

commit 4a93a37aeaaab2d3fe8ee01f11efbb0c7c756711
Author: Dewey Dunnington <de...@voltrondata.com>
AuthorDate: Tue Jun 27 14:39:57 2023 -0300

    GH-36121: [R] Warn for `set_io_thread_count()` with `num_threads` < 2 (#36304)
    
    ### Rationale for this change
    
    Setting the number of threads in the IO thread pool to 1 causes a hang or crash when using some functions (notably: any Acero exec plan).
    
    ### What changes are included in this PR?
    
    `set_io_thread_count()` now warns for `num_threads == 1`:
    
    ``` r
    library(arrow, warn.conflicts = FALSE)
    #> Some features are not enabled in this build of Arrow. Run `arrow_info()` for more information.
    
    # Already errors from C++
    set_io_thread_count(0)
    #> Error: Invalid: ThreadPool capacity must be > 0
    # New warning!
    set_io_thread_count(1)
    #> Warning: `arrow::set_io_thread_count()` with num_threads < 2 may
    #> cause certain operations to hang or crash.
    #> ℹ Use num_threads >= 2 to support all operations
    # No warning!
    set_io_thread_count(2)
    ```
    
    <sup>Created on 2023-06-26 with [reprex v2.0.2](https://reprex.tidyverse.org)</sup>
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    Yes: some existing code may issue a warning that previously did not. Documentation was added.
    * Closes: #36121
    
    Authored-by: Dewey Dunnington <de...@voltrondata.com>
    Signed-off-by: Dewey Dunnington <de...@voltrondata.com>
---
 r/R/config.R                   | 18 +++++++++++++++++-
 r/man/io_thread_count.Rd       |  4 +++-
 r/tests/testthat/test-config.R | 17 ++++++++++++++---
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/r/R/config.R b/r/R/config.R
index e373b3ee3e..bd00afe1be 100644
--- a/r/R/config.R
+++ b/r/R/config.R
@@ -39,10 +39,26 @@ io_thread_count <- function() {
 }
 
 #' @rdname io_thread_count
-#' @param num_threads integer: New number of threads for thread pool
+#' @param num_threads integer: New number of threads for thread pool. At least
+#'   two threads are reccomended to support all operations in the arrow
+#'   package.
 #' @export
 set_io_thread_count <- function(num_threads) {
   current_io_thread_count <- io_thread_count()
   SetIOThreadPoolCapacity(as.integer(num_threads))
+
+  # Warn for IO thread count < 2: Arrow C++ makes the assumption that there
+  # is at least one thread available and the R package uses one thread from
+  # the IO thread pool to support calling into R from C++.
+  if (num_threads < 2) {
+    rlang::warn(
+      c(
+        "`arrow::set_io_thread_count()` with num_threads < 2 may",
+        "cause certain operations to hang or crash.",
+        "i" = "Use num_threads >= 2 to support all operations"
+      )
+    )
+  }
+
   invisible(current_io_thread_count)
 }
diff --git a/r/man/io_thread_count.Rd b/r/man/io_thread_count.Rd
index b1dfa0ba78..6cd44e1f6e 100644
--- a/r/man/io_thread_count.Rd
+++ b/r/man/io_thread_count.Rd
@@ -10,7 +10,9 @@ io_thread_count()
 set_io_thread_count(num_threads)
 }
 \arguments{
-\item{num_threads}{integer: New number of threads for thread pool}
+\item{num_threads}{integer: New number of threads for thread pool. At least
+two threads are reccomended to support all operations in the arrow
+package.}
 }
 \description{
 Manage the global I/O thread pool in libarrow
diff --git a/r/tests/testthat/test-config.R b/r/tests/testthat/test-config.R
index a093a5fb73..f65626a079 100644
--- a/r/tests/testthat/test-config.R
+++ b/r/tests/testthat/test-config.R
@@ -19,11 +19,22 @@ test_that("set_io_thread_count() sets the number of io threads", {
   current_io_thread_count <- io_thread_count()
   on.exit(set_io_thread_count(current_io_thread_count))
 
-  previous_io_thread_count <- set_io_thread_count(1)
+  previous_io_thread_count <- set_io_thread_count(2)
   expect_identical(previous_io_thread_count, current_io_thread_count)
-  expect_identical(io_thread_count(), 1L)
+  expect_identical(io_thread_count(), 2L)
 
-  expect_identical(set_io_thread_count(current_io_thread_count), 1L)
+  expect_identical(set_io_thread_count(current_io_thread_count), 2L)
+})
+
+test_that("set_io_thread_count() warns for num_threads == 1",  {
+  current_io_thread_count <- io_thread_count()
+  on.exit(set_io_thread_count(current_io_thread_count))
+
+  expect_warning(
+    set_io_thread_count(1),
+    "num_threads < 2",
+    fixed = TRUE
+  )
 })
 
 test_that("set_cpu_count() sets the number of CPU threads", {