You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/05/13 01:15:19 UTC

[arrow] 03/17: ARROW-8549: [R] Assorted post-0.17 release cleanups

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch maint-0.17.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 558dae47a6408fe41d241eafbdd60b8e4bcc495c
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Wed Apr 22 12:42:59 2020 -0700

    ARROW-8549: [R] Assorted post-0.17 release cleanups
    
    Functional changes in the R package installation:
    
    * Downloading dependencies is made completely silent by default
    * When downloading source, it now tries three Apache mirrors
    * Limit parallelization in the C++ build to 2 CPUs (unless MAKEFLAGS is set), per CRAN repository policy
    
    CI/test changes:
    
    * Adds a `try()` around a non-essential dependency download to fix a source of occasional build flakiness
    * Makes the R install logs always print out, not just on failure. It was useful to be able to see what exactly the successful builds were doing to make sure they were doing the expected right things.
    * Makes the `--as-cran` checks build and check the docs/vignettes. This adds a lot of time to the build but is only done nightly, and something needs to check this, at least before release
    
    Closes #6995 from nealrichardson/0.17.0-rpkg
    
    Authored-by: Neal Richardson <ne...@gmail.com>
    Signed-off-by: Neal Richardson <ne...@gmail.com>
---
 ci/scripts/r_deps.sh              |  3 +-
 ci/scripts/r_test.sh              | 11 ++---
 dev/tasks/r/azure.linux.yml       |  3 +-
 dev/tasks/r/github.linux.cran.yml |  3 +-
 docker-compose.yml                |  3 ++
 r/cran-comments.md                |  5 ++-
 r/tools/linuxlibs.R               | 93 +++++++++++++++++++++++++--------------
 r/tools/winlibs.R                 |  9 +++-
 8 files changed, 83 insertions(+), 47 deletions(-)

diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh
index edb8cdf..7cf6dc5 100755
--- a/ci/scripts/r_deps.sh
+++ b/ci/scripts/r_deps.sh
@@ -27,7 +27,8 @@ pushd ${source_dir}
 # Install R package dependencies
 ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck'))"
 ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
-${R_BIN} -e "remotes::install_github('nealrichardson/decor')"
+# This isn't required for testing, only for if you're using this to build your dev environment
+${R_BIN} -e "try(remotes::install_github('nealrichardson/decor'))"
 
 popd
 
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index e79fac5..edfa32b 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -41,15 +41,16 @@ export TEST_R_WITH_ARROW=TRUE
 export _R_CHECK_TESTS_NLINES_=0
 export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
 export _R_CHECK_LIMIT_CORES_=FALSE
-export VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
 
 # Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
 BEFORE=$(ls -alh ~/)
 
-# Conditionally run --as-cran because crossbow jobs aren't using _R_CHECK_COMPILATION_FLAGS_KNOWN_
-# (maybe an R version thing, needs 3.6.2?)
-# Also only --run-donttest if NOT_CRAN because Parquet example requires snappy (optional dependency)
-${R_BIN} -e "cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true'); rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', ifelse(cran, '--as-cran', '--run-donttest')), error_on = 'warning', check_dir = 'check')"
+${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
+  if (as_cran) {
+    rcmdcheck::rcmdcheck(args = c('--as-cran', '--run-donttest'), error_on = 'warning', check_dir = 'check')
+  } else {
+    rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', '--run-donttest'), error_on = 'warning', check_dir = 'check')
+  }"
 
 AFTER=$(ls -alh ~/)
 if [ "$BEFORE" != "$AFTER" ]; then
diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml
index 59d5b02..4b4211c 100644
--- a/dev/tasks/r/azure.linux.yml
+++ b/dev/tasks/r/azure.linux.yml
@@ -55,5 +55,4 @@ jobs:
     - script: |
         set -ex
         cat arrow/r/check/arrow.Rcheck/00install.out
-      displayName: Dump install logs on failure
-      condition: failed()
+      displayName: Dump install logs
diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml
index 0a5362b..2b2d884 100644
--- a/dev/tasks/r/github.linux.cran.yml
+++ b/dev/tasks/r/github.linux.cran.yml
@@ -62,6 +62,5 @@ jobs:
       - name: Docker Run
         shell: bash
         run: cd arrow && docker-compose run r
-      - name: Dump install logs on failure
-        if: failure()
+      - name: Dump install logs
         run: cat arrow/r/check/arrow.Rcheck/00install.out
diff --git a/docker-compose.yml b/docker-compose.yml
index 9c0bfdd..ec8d483 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -784,6 +784,7 @@ services:
     shm_size: *shm-size
     environment:
       <<: *ccache
+      NOT_CRAN: 'true'
     volumes: *conda-volumes
     command:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
@@ -808,6 +809,7 @@ services:
       <<: *ccache
       ARROW_R_CXXFLAGS: '-Werror'
       LIBARROW_BUILD: 'false'
+      NOT_CRAN: 'true'
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
@@ -834,6 +836,7 @@ services:
     environment:
       LIBARROW_DOWNLOAD: "false"
       ARROW_HOME: "/arrow"
+      # To test for CRAN release, delete ^^ these two env vars so we download the Apache release
       ARROW_USE_PKG_CONFIG: "false"
     volumes:
       - .:/arrow:delegated
diff --git a/r/cran-comments.md b/r/cran-comments.md
index 59f80b6..07a184b 100644
--- a/r/cran-comments.md
+++ b/r/cran-comments.md
@@ -1,8 +1,9 @@
 ## Test environments
-* Debian Linux, R-devel, GCC ASAN/UBSAN
+* Debian Linux, GCC, R-devel/R-patched/R-release
+* Fedora Linux, GCC/clang, R-devel
 * Ubuntu Linux 16.04 LTS, R-release, GCC
 * win-builder (R-devel and R-release)
-* macOS (10.11, 10.14), R-release
+* macOS 10.14, R-release
 
 ## R CMD check results
 
diff --git a/r/tools/linuxlibs.R b/r/tools/linuxlibs.R
index b87465b..96fd8e1 100644
--- a/r/tools/linuxlibs.R
+++ b/r/tools/linuxlibs.R
@@ -20,10 +20,6 @@ VERSION <- args[1]
 dst_dir <- paste0("libarrow/arrow-", VERSION)
 
 arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
-apache_src_url <- paste0(
-  "https://archive.apache.org/dist/arrow/arrow-", VERSION,
-  "/apache-arrow-", VERSION, ".tar.gz"
-)
 
 options(.arrow.cleanup = character()) # To collect dirs to rm on exit
 on.exit(unlink(getOption(".arrow.cleanup")))
@@ -40,17 +36,23 @@ binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false"
 # For local debugging, set ARROW_R_DEV=TRUE to make this script print more
 quietly <- !env_is("ARROW_R_DEV", "true")
 
+try_download <- function(from_url, to_file) {
+  try(
+    suppressWarnings(
+      download.file(from_url, to_file, quiet = quietly)
+    ),
+    silent = quietly
+  )
+  file.exists(to_file)
+}
+
 download_binary <- function(os = identify_os()) {
   libfile <- tempfile()
   if (!is.null(os)) {
     # See if we can map this os-version to one we have binaries for
     os <- find_available_binary(os)
     binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
-    try(
-      download.file(binary_url, libfile, quiet = quietly),
-      silent = quietly
-    )
-    if (file.exists(libfile)) {
+    if (try_download(binary_url, libfile)) {
       cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
     } else {
       cat(sprintf("*** No C++ binaries found for %s\n", os))
@@ -133,31 +135,54 @@ find_available_binary <- function(os) {
 
 download_source <- function() {
   tf1 <- tempfile()
-  src_dir <- NULL
-  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
-  try(
-    download.file(source_url, tf1, quiet = quietly),
-    silent = quietly
-  )
-  if (!file.exists(tf1)) {
-    # Try for an official release
-    try(
-      download.file(apache_src_url, tf1, quiet = quietly),
-      silent = quietly
-    )
-  }
-  if (file.exists(tf1)) {
+  src_dir <- tempfile()
+  if (bintray_download(tf1)) {
+    # First try from bintray
     cat("*** Successfully retrieved C++ source\n")
-    src_dir <- tempfile()
     unzip(tf1, exdir = src_dir)
     unlink(tf1)
-    # These scripts need to be executable
-    system(sprintf("chmod 755 %s/cpp/build-support/*.sh", src_dir))
-    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
-    # The actual src is in cpp
     src_dir <- paste0(src_dir, "/cpp")
+  } else if (apache_download(tf1)) {
+    # If that fails, try for an official release
+    cat("*** Successfully retrieved C++ source\n")
+    untar(tf1, exdir = src_dir)
+    unlink(tf1)
+    src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
   }
-  src_dir
+
+  if (dir.exists(src_dir)) {
+    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
+    # These scripts need to be executable
+    system(
+      sprintf("chmod 755 %s/build-support/*.sh", src_dir),
+      ignore.stdout = quietly, ignore.stderr = quietly
+    )
+    return(src_dir)
+  } else {
+    return(NULL)
+  }
+}
+
+bintray_download <- function(destfile) {
+  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
+  try_download(source_url, destfile)
+}
+
+apache_download <- function(destfile, n_mirrors = 3) {
+  apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
+  apache_urls <- c(
+    # This returns a different mirror each time
+    rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
+    "https://downloads.apache.org/" # The backup
+  )
+  downloaded <- FALSE
+  for (u in apache_urls) {
+    downloaded <- try_download(paste0(u, apache_path), destfile)
+    if (downloaded) {
+      break
+    }
+  }
+  downloaded
 }
 
 find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
@@ -176,7 +201,10 @@ build_libarrow <- function(src_dir, dst_dir) {
   # Set up make for parallel building
   makeflags <- Sys.getenv("MAKEFLAGS")
   if (makeflags == "") {
-    makeflags <- sprintf("-j%s", parallel::detectCores())
+    # CRAN policy says not to use more than 2 cores during checks
+    # If you have more and want to use more, set MAKEFLAGS
+    ncores <- min(parallel::detectCores(), 2)
+    makeflags <- sprintf("-j%s", ncores)
     Sys.setenv(MAKEFLAGS = makeflags)
   }
   if (!quietly) {
@@ -212,10 +240,7 @@ ensure_cmake <- function() {
     )
     cmake_tar <- tempfile()
     cmake_dir <- tempfile()
-    try(
-      download.file(cmake_binary_url, cmake_tar, quiet = quietly),
-      silent = quietly
-    )
+    try_download(cmake_binary_url, cmake_tar)
     untar(cmake_tar, exdir = cmake_dir)
     unlink(cmake_tar)
     options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R
index 390e1f9..ea31d4f 100644
--- a/r/tools/winlibs.R
+++ b/r/tools/winlibs.R
@@ -29,11 +29,18 @@ if(!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))){
   } else {
     # Download static arrow from rwinlib
     if (getRversion() < "3.3.0") setInternet2()
+    quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
     get_file <- function(template, version) {
-      try(download.file(sprintf(template, version), "lib.zip", quiet = TRUE), silent = TRUE)
+      try(
+        suppressWarnings(
+          download.file(sprintf(template, version), "lib.zip", quiet = quietly)
+        ),
+        silent = quietly
+      )
     }
     # URL templates
     # TODO: don't hard-code RTools 3.5? Can we detect which toolchain we have?
+    # ifelse(nzchar(Sys.getenv("RTOOLS40_HOME")), "40", "35")
     nightly <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/bin/windows-35/arrow-%s.zip"
     rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip"
     # First look for a nightly