You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/05/13 01:15:19 UTC
[arrow] 03/17: ARROW-8549: [R] Assorted post-0.17 release cleanups
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-0.17.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 558dae47a6408fe41d241eafbdd60b8e4bcc495c
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Wed Apr 22 12:42:59 2020 -0700
ARROW-8549: [R] Assorted post-0.17 release cleanups
Functional changes in the R package installation:
* Downloading dependencies is made completely silent by default
* When downloading source, it now tries three Apache mirrors
* Limit parallelization in the C++ build to 2 CPUs (unless MAKEFLAGS is set), per CRAN repository policy
CI/test changes:
* Adds a `try()` around a non-essential dependency download to fix a source of occasional build flakiness
* Makes the R install logs always print out, not just on failure. It was useful to be able to see what exactly the successful builds were doing to make sure they were doing the expected right things.
* Makes the `--as-cran` checks build and check the docs/vignettes. This adds a lot of time to the build but is only done nightly, and something needs to check this, at least before release
Closes #6995 from nealrichardson/0.17.0-rpkg
Authored-by: Neal Richardson <ne...@gmail.com>
Signed-off-by: Neal Richardson <ne...@gmail.com>
---
ci/scripts/r_deps.sh | 3 +-
ci/scripts/r_test.sh | 11 ++---
dev/tasks/r/azure.linux.yml | 3 +-
dev/tasks/r/github.linux.cran.yml | 3 +-
docker-compose.yml | 3 ++
r/cran-comments.md | 5 ++-
r/tools/linuxlibs.R | 93 +++++++++++++++++++++++++--------------
r/tools/winlibs.R | 9 +++-
8 files changed, 83 insertions(+), 47 deletions(-)
diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh
index edb8cdf..7cf6dc5 100755
--- a/ci/scripts/r_deps.sh
+++ b/ci/scripts/r_deps.sh
@@ -27,7 +27,8 @@ pushd ${source_dir}
# Install R package dependencies
${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck'))"
${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
-${R_BIN} -e "remotes::install_github('nealrichardson/decor')"
+# This isn't required for testing, only for if you're using this to build your dev environment
+${R_BIN} -e "try(remotes::install_github('nealrichardson/decor'))"
popd
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index e79fac5..edfa32b 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -41,15 +41,16 @@ export TEST_R_WITH_ARROW=TRUE
export _R_CHECK_TESTS_NLINES_=0
export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
export _R_CHECK_LIMIT_CORES_=FALSE
-export VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
BEFORE=$(ls -alh ~/)
-# Conditionally run --as-cran because crossbow jobs aren't using _R_CHECK_COMPILATION_FLAGS_KNOWN_
-# (maybe an R version thing, needs 3.6.2?)
-# Also only --run-donttest if NOT_CRAN because Parquet example requires snappy (optional dependency)
-${R_BIN} -e "cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true'); rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', ifelse(cran, '--as-cran', '--run-donttest')), error_on = 'warning', check_dir = 'check')"
+${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
+ if (as_cran) {
+ rcmdcheck::rcmdcheck(args = c('--as-cran', '--run-donttest'), error_on = 'warning', check_dir = 'check')
+ } else {
+ rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', '--run-donttest'), error_on = 'warning', check_dir = 'check')
+ }"
AFTER=$(ls -alh ~/)
if [ "$BEFORE" != "$AFTER" ]; then
diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml
index 59d5b02..4b4211c 100644
--- a/dev/tasks/r/azure.linux.yml
+++ b/dev/tasks/r/azure.linux.yml
@@ -55,5 +55,4 @@ jobs:
- script: |
set -ex
cat arrow/r/check/arrow.Rcheck/00install.out
- displayName: Dump install logs on failure
- condition: failed()
+ displayName: Dump install logs
diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml
index 0a5362b..2b2d884 100644
--- a/dev/tasks/r/github.linux.cran.yml
+++ b/dev/tasks/r/github.linux.cran.yml
@@ -62,6 +62,5 @@ jobs:
- name: Docker Run
shell: bash
run: cd arrow && docker-compose run r
- - name: Dump install logs on failure
- if: failure()
+ - name: Dump install logs
run: cat arrow/r/check/arrow.Rcheck/00install.out
diff --git a/docker-compose.yml b/docker-compose.yml
index 9c0bfdd..ec8d483 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -784,6 +784,7 @@ services:
shm_size: *shm-size
environment:
<<: *ccache
+ NOT_CRAN: 'true'
volumes: *conda-volumes
command:
["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
@@ -808,6 +809,7 @@ services:
<<: *ccache
ARROW_R_CXXFLAGS: '-Werror'
LIBARROW_BUILD: 'false'
+ NOT_CRAN: 'true'
volumes: *ubuntu-volumes
command: >
/bin/bash -c "
@@ -834,6 +836,7 @@ services:
environment:
LIBARROW_DOWNLOAD: "false"
ARROW_HOME: "/arrow"
+ # To test for CRAN release, delete ^^ these two env vars so we download the Apache release
ARROW_USE_PKG_CONFIG: "false"
volumes:
- .:/arrow:delegated
diff --git a/r/cran-comments.md b/r/cran-comments.md
index 59f80b6..07a184b 100644
--- a/r/cran-comments.md
+++ b/r/cran-comments.md
@@ -1,8 +1,9 @@
## Test environments
-* Debian Linux, R-devel, GCC ASAN/UBSAN
+* Debian Linux, GCC, R-devel/R-patched/R-release
+* Fedora Linux, GCC/clang, R-devel
* Ubuntu Linux 16.04 LTS, R-release, GCC
* win-builder (R-devel and R-release)
-* macOS (10.11, 10.14), R-release
+* macOS 10.14, R-release
## R CMD check results
diff --git a/r/tools/linuxlibs.R b/r/tools/linuxlibs.R
index b87465b..96fd8e1 100644
--- a/r/tools/linuxlibs.R
+++ b/r/tools/linuxlibs.R
@@ -20,10 +20,6 @@ VERSION <- args[1]
dst_dir <- paste0("libarrow/arrow-", VERSION)
arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
-apache_src_url <- paste0(
- "https://archive.apache.org/dist/arrow/arrow-", VERSION,
- "/apache-arrow-", VERSION, ".tar.gz"
-)
options(.arrow.cleanup = character()) # To collect dirs to rm on exit
on.exit(unlink(getOption(".arrow.cleanup")))
@@ -40,17 +36,23 @@ binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false"
# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
quietly <- !env_is("ARROW_R_DEV", "true")
+try_download <- function(from_url, to_file) {
+ try(
+ suppressWarnings(
+ download.file(from_url, to_file, quiet = quietly)
+ ),
+ silent = quietly
+ )
+ file.exists(to_file)
+}
+
download_binary <- function(os = identify_os()) {
libfile <- tempfile()
if (!is.null(os)) {
# See if we can map this os-version to one we have binaries for
os <- find_available_binary(os)
binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
- try(
- download.file(binary_url, libfile, quiet = quietly),
- silent = quietly
- )
- if (file.exists(libfile)) {
+ if (try_download(binary_url, libfile)) {
cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
} else {
cat(sprintf("*** No C++ binaries found for %s\n", os))
@@ -133,31 +135,54 @@ find_available_binary <- function(os) {
download_source <- function() {
tf1 <- tempfile()
- src_dir <- NULL
- source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
- try(
- download.file(source_url, tf1, quiet = quietly),
- silent = quietly
- )
- if (!file.exists(tf1)) {
- # Try for an official release
- try(
- download.file(apache_src_url, tf1, quiet = quietly),
- silent = quietly
- )
- }
- if (file.exists(tf1)) {
+ src_dir <- tempfile()
+ if (bintray_download(tf1)) {
+ # First try from bintray
cat("*** Successfully retrieved C++ source\n")
- src_dir <- tempfile()
unzip(tf1, exdir = src_dir)
unlink(tf1)
- # These scripts need to be executable
- system(sprintf("chmod 755 %s/cpp/build-support/*.sh", src_dir))
- options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
- # The actual src is in cpp
src_dir <- paste0(src_dir, "/cpp")
+ } else if (apache_download(tf1)) {
+ # If that fails, try for an official release
+ cat("*** Successfully retrieved C++ source\n")
+ untar(tf1, exdir = src_dir)
+ unlink(tf1)
+ src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
}
- src_dir
+
+ if (dir.exists(src_dir)) {
+ options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
+ # These scripts need to be executable
+ system(
+ sprintf("chmod 755 %s/build-support/*.sh", src_dir),
+ ignore.stdout = quietly, ignore.stderr = quietly
+ )
+ return(src_dir)
+ } else {
+ return(NULL)
+ }
+}
+
+bintray_download <- function(destfile) {
+ source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
+ try_download(source_url, destfile)
+}
+
+apache_download <- function(destfile, n_mirrors = 3) {
+ apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
+ apache_urls <- c(
+ # This returns a different mirror each time
+ rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
+ "https://downloads.apache.org/" # The backup
+ )
+ downloaded <- FALSE
+ for (u in apache_urls) {
+ downloaded <- try_download(paste0(u, apache_path), destfile)
+ if (downloaded) {
+ break
+ }
+ }
+ downloaded
}
find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
@@ -176,7 +201,10 @@ build_libarrow <- function(src_dir, dst_dir) {
# Set up make for parallel building
makeflags <- Sys.getenv("MAKEFLAGS")
if (makeflags == "") {
- makeflags <- sprintf("-j%s", parallel::detectCores())
+ # CRAN policy says not to use more than 2 cores during checks
+ # If you have more and want to use more, set MAKEFLAGS
+ ncores <- min(parallel::detectCores(), 2)
+ makeflags <- sprintf("-j%s", ncores)
Sys.setenv(MAKEFLAGS = makeflags)
}
if (!quietly) {
@@ -212,10 +240,7 @@ ensure_cmake <- function() {
)
cmake_tar <- tempfile()
cmake_dir <- tempfile()
- try(
- download.file(cmake_binary_url, cmake_tar, quiet = quietly),
- silent = quietly
- )
+ try_download(cmake_binary_url, cmake_tar)
untar(cmake_tar, exdir = cmake_dir)
unlink(cmake_tar)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R
index 390e1f9..ea31d4f 100644
--- a/r/tools/winlibs.R
+++ b/r/tools/winlibs.R
@@ -29,11 +29,18 @@ if(!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))){
} else {
# Download static arrow from rwinlib
if (getRversion() < "3.3.0") setInternet2()
+ quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
get_file <- function(template, version) {
- try(download.file(sprintf(template, version), "lib.zip", quiet = TRUE), silent = TRUE)
+ try(
+ suppressWarnings(
+ download.file(sprintf(template, version), "lib.zip", quiet = quietly)
+ ),
+ silent = quietly
+ )
}
# URL templates
# TODO: don't hard-code RTools 3.5? Can we detect which toolchain we have?
+ # ifelse(nzchar(Sys.getenv("RTOOLS40_HOME")), "40", "35")
nightly <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/bin/windows-35/arrow-%s.zip"
rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip"
# First look for a nightly