You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by am...@apache.org on 2022/05/26 13:44:14 UTC
[arrow] branch master updated: ARROW-16403:[R][CI] Create Crossbow task for R nightly builds
This is an automated email from the ASF dual-hosted git repository.
amolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 156dc72c32 ARROW-16403:[R][CI] Create Crossbow task for R nightly builds
156dc72c32 is described below
commit 156dc72c320dbbdec5424f24857e3335fc8c7dee
Author: Jacob Wujciak-Jens <ja...@wujciak.de>
AuthorDate: Thu May 26 15:44:05 2022 +0200
ARROW-16403:[R][CI] Create Crossbow task for R nightly builds
Closes #13149 from assignUser/ARROW-16403-nightly-crossbow
Lead-authored-by: Jacob Wujciak-Jens <ja...@wujciak.de>
Co-authored-by: Neal Richardson <ne...@gmail.com>
Signed-off-by: Alessandro Molina <am...@turbogears.org>
---
.github/workflows/r_nightly.yml | 124 ++++++++++++++++
dev/tasks/macros.jinja | 76 +++++++++-
dev/tasks/r/github.nightly.yml | 304 ++++++++++++++++++++++++++++++++++++++++
dev/tasks/tasks.yml | 10 ++
4 files changed, 513 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
new file mode 100644
index 0000000000..8fb96a2796
--- /dev/null
+++ b/.github/workflows/r_nightly.yml
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Upload R Nightly builds
+
+on:
+ workflow_dispatch:
+ inputs:
+ prefix:
+ description: Job prefix to use.
+ required: false
+ default: ''
+ schedule:
+ #Crossbow packagin runs at 0 8 * * *
+ - cron: '0 14 * * *'
+
+jobs:
+ upload:
+ env:
+ PREFIX: ${{ github.event.inputs.prefix || ''}}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Arrow
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 1
+ path: arrow
+ repository: apache/arrow
+ ref: master
+ submodules: recursive
+ - name: Checkout Crossbow
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+ path: crossbow
+ repository: ursacomputing/crossbow
+ ref: master
+ - name: Set up Python
+ uses: actions/setup-python@v3
+ with:
+ cache: 'pip'
+ python-version: 3.8
+ - name: Install Archery
+ shell: bash
+ run: pip install -e arrow/dev/archery[all]
+ - run: mkdir -p binaries
+ - name: Download Artifacts
+ run: |
+ if [ -z $PREFIX ]; then
+ PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0
+ fi
+ echo $PREFIX
+
+ archery crossbow download-artifacts -f r-nightly-packages -t binaries --skip-pattern-validation $PREFIX
+ - name: Build Repository
+ shell: Rscript {0}
+ run: |
+ art_path <- list.files("binaries",
+ recursive = TRUE,
+ include.dirs = TRUE,
+ pattern = "r-nightly-packages$",
+ full.names = TRUE
+ )
+
+ pkgs <- list.files(art_path, pattern = "r-pkg_*")
+ src_i <- grep("r-pkg_src", pkgs)
+ src_pkg <- pkgs[src_i]
+ pkgs <- pkgs[-src_i]
+ libs <- list.files(art_path, pattern = "r-libarrow*")
+
+ new_names <- sub("r-pkg_", "", pkgs, fixed = T)
+ matches <- regmatches(new_names, regexec("(([a-z]+)-[\\.a-zA-Z0-9]+)_(\\d\\.\\d)-(arrow.+)$", new_names))
+
+ dir.create("repo/src/contrib", recursive = TRUE)
+ file.copy(paste0(art_path, "/", src_pkg), paste0("repo/src/contrib/", sub("r-pkg_src-", "", src_pkg)))
+ tools::write_PACKAGES("repo/src/contrib", type = "source", verbose = TRUE)
+
+ for (match in matches) {
+ path <- paste0("repo/bin/", match[[3]], "/contrib/", match[[4]])
+ path <- sub("macos", "macosx", path)
+ dir.create(path, recursive = TRUE)
+ file <- list.files(art_path, pattern = paste0(match[[1]], "*"), full.names = TRUE)
+
+ file.copy(file, paste0(path, "/", match[[5]]))
+ tools::write_PACKAGES(path, type = paste0(substring(match[[3]], 1, 3), ".binary"), verbose = TRUE)
+ }
+
+ lib_names <- sub("r-libarrow-", "", libs)
+ lib_match <- regmatches(lib_names, regexec("([[:alpha:]]+)-(.+)", lib_names))
+
+ dir.create("repo/libarrow/bin/windows", recursive = TRUE)
+ dir.create("repo/libarrow/bin/centos-7", recursive = TRUE)
+ dir.create("repo/libarrow/bin/ubuntu-18.04", recursive = TRUE)
+
+ for (match in lib_match) {
+ path <- list.files("repo//libarrow//bin", pattern = match[[2]], full.names = TRUE)
+ file <- paste0(art_path, "/r-libarrow-", match[[1]])
+ file.copy(file, paste0(path, "//", match[[3]]))
+ }
+ - run: ls -R repo
+ - name: Upload Files
+ uses: burnett01/rsync-deployments@5.2
+ with:
+ switches: -avzr
+ path: repo/*
+ remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r
+ remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }}
+ remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }}
+ remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }}
+ remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index bd65f6488c..fa3676ed30 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -52,8 +52,9 @@ on:
{%- macro github_install_archery() -%}
- name: Set up Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v3
with:
+ cache: 'pip'
python-version: 3.8
- name: Install Archery
shell: bash
@@ -221,3 +222,76 @@ on:
cp ${formula} $(brew --repository homebrew/core)/Formula/
done
{% endmacro %}
+
+{%- macro github_change_r_pkg_version(is_fork, version = '\\2.\'\"$(date +%Y%m%d)\"\'' ) -%}
+ - name: Modify version
+ shell: bash
+ run: |
+ cd arrow/r
+ sed -i.bak -E -e \
+ 's/(^Version: )([0-9]+\.[0-9]+\.[0-9]+).*$/\1{{ version }}/' \
+ DESCRIPTION
+ head DESCRIPTION
+ rm -f DESCRIPTION.bak
+ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb
+
+ # Pin the git commit in the formula to match
+ cd tools
+ if [ "{{ is_fork }}" == "true" ]; then
+ sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb
+ else
+ sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb
+ fi
+ rm -f apache-arrow.rb.bak
+{% endmacro %}
+
+{%- macro github_test_r_src_pkg() -%}
+ source("https://raw.githubusercontent.com/apache/arrow/master/ci/etc/rprofile")
+
+ install.packages(
+ "arrow",
+ repos = c(getOption("arrow.dev_repo"), "https://cloud.r-project.org"),
+ verbose = TRUE
+ )
+
+ library(arrow)
+ read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
+
+ # Our Version should always be > CRAN so we would detect a CRAN version here.
+ stopifnot(packageVersion("arrow") == {{ '"${{needs.source.outputs.version}}"' }})
+{% endmacro %}
+
+{%- macro github_setup_local_r_repo(os, get_bin = 'true') -%}
+ - name: Setup local repo
+ shell: bash
+ run: mkdir repo
+ - name: Get windows binary
+ if: {{ '${{' }} ({{ os }} == 'windows-latest') && {{ get_bin }} {{ '}}' }}
+ uses: actions/download-artifact@v3
+ with:
+ name: r-libarrow-windows
+ path: repo/libarrow/bin/windows
+ - name: Get ubuntu binary
+ if: {{ '${{' }} ({{ os }} != 'windows-latest') && {{ get_bin }} {{ '}}' }}
+ uses: actions/download-artifact@v3
+ with:
+ name: r-libarrow-ubuntu
+ path: repo/libarrow/bin/ubuntu-18.04
+ - name: Get centos binary
+ if: {{ '${{' }} ({{ os }} != 'windows-latest') && {{ get_bin }} {{ '}}' }}
+ uses: actions/download-artifact@v3
+ with:
+ name: r-libarrow-centos
+ path: repo/libarrow/bin/centos-7
+ - name: Get src pkg
+ uses: actions/download-artifact@v3
+ with:
+ name: r-pkg_src
+ path: repo/src/contrib
+ - name: Update repo index
+ shell: Rscript {0}
+ run: |
+ # getwd() is necessary as this macro is used within jobs using a docker container
+ tools::write_PACKAGES(file.path(getwd(), "/repo/src/contrib", fsep = "/"), type = "source", verbose = TRUE)
+ - run: ls -R repo
+{% endmacro %}
\ No newline at end of file
diff --git a/dev/tasks/r/github.nightly.yml b/dev/tasks/r/github.nightly.yml
new file mode 100644
index 0000000000..e60baae511
--- /dev/null
+++ b/dev/tasks/r/github.nightly.yml
@@ -0,0 +1,304 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+# This allows us to set a custom version via param:
+# crossbow submit --param custom_version=8.5.3 r-nightly-packages
+# if the param is unset defaults to the usual Ymd naming scheme
+{% set version = custom_version|default("\\2.\'\"$(date +%Y%m%d)\"\'") %}
+# We need this as boolean and string
+{% set is_upstream_b = arrow.github_repo == 'apache/arrow' %}
+# use filter to cast to string and convert to lowercase to match yaml boolean
+{% set is_fork = (not is_upstream_b)|lower %}
+{% set is_upstream = is_upstream_b|lower %}
+
+
+{{ macros.github_header() }}
+
+jobs:
+ source:
+ # This job will change the version to either the custom_version param or YMD format.
+ # The output allows other steps to use the exact version to prevent issues (e.g. date changes during run)
+ name: Source Package
+ runs-on: ubuntu-latest
+ outputs:
+ version: {{ '${{ steps.save-version.outputs.version }}' }}
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_change_r_pkg_version(is_fork, version)|indent }}
+ - name: Save Version
+ id: save-version
+ shell: bash
+ run: |
+ echo "::set-output name=version::$(grep ^Version arrow/r/DESCRIPTION | sed s/Version:\ //)"
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ install-r: false
+
+ - name: Build R source package
+ shell: bash
+ run: |
+ cd arrow/r
+ # Copy in the Arrow C++ source
+ make sync-cpp
+ R CMD build --no-build-vignettes .
+
+ - name: Upload package artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: r-pkg_src
+ path: arrow/r/arrow_*.tar.gz
+
+ linux-cpp:
+ name: C++ Binary {{ '${{ matrix.config.os }}-${{ matrix.config.version }}' }}
+ runs-on: ubuntu-latest
+ needs: source
+ strategy:
+ fail-fast: false
+ matrix:
+ config:
+ - { os: ubuntu, version: "18.04" }
+ - { os: centos, version: "7" }
+ env:
+ UBUNTU: {{ '${{ matrix.config.version }}' }}
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.version }}')|indent }}
+ {{ macros.github_install_archery()|indent }}
+ - name: Build libarrow
+ shell: bash
+ run: |
+ sudo sysctl -w kernel.core_pattern="core.%e.%p"
+ ulimit -c unlimited
+ archery docker run {{ '${{ matrix.config.os}}' }}-cpp-static
+ - name: Bundle libarrow
+ shell: bash
+ env:
+ PKG_FILE: arrow-{{ '${{ needs.source.outputs.version }}' }}.zip
+ VERSION: {{ '${{ needs.source.outputs.version }}' }}
+ run: |
+ cd arrow/r/libarrow/dist
+ # These files were created by the docker user so we have to sudo to get them
+ sudo -E zip -r $PKG_FILE lib/ include/
+
+ - name: Upload binary artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: r-libarrow-{{ '${{ matrix.config.os}}' }}
+ path: arrow/r/libarrow/dist/arrow-*.zip
+
+ windows-cpp:
+ name: C++ Binary Windows RTools (40 only)
+ needs: source
+ runs-on: windows-latest
+ steps:
+ - run: git config --global core.autocrlf false
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.version }}')|indent }}
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ rtools-version: 40
+ r-version: "4.0"
+ Ncpus: 2
+
+ - name: Build Arrow C++ with rtools40
+ shell: bash
+ env:
+ ARROW_HOME: "arrow"
+ run: arrow/ci/scripts/r_windows_build.sh
+
+ - name: Upload binary artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: r-libarrow-windows
+ path: build/arrow-*.zip
+
+ r-packages:
+ if: true && !cancelled()
+ needs: [source, windows-cpp]
+ name: {{ '${{ matrix.platform }} ${{ matrix.r_version.r }}' }}
+ runs-on: {{ '${{ matrix.platform }}' }}
+ strategy:
+ fail-fast: false
+ matrix:
+ platform:
+ - windows-latest
+ # This is newer than what CRAN builds on, but Travis is no longer an option for us, so...
+ - macos-10.15
+ # - devops-managed # No M1 until the runner application runs native
+ r_version:
+ - { rtools: 40, r: "4.1" }
+ - { rtools: 42, r: "4.2" }
+ steps:
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ r-version: {{ '${{ matrix.r_version.r }}' }}
+ rtools-version: {{ '${{ matrix.r_version.rtools }}' }}
+ Ncpus: 2
+ {{ macros.github_setup_local_r_repo('matrix.platform')|indent }}
+ - name: Build Binary
+ shell: Rscript {0}
+ run: |
+ on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+
+ # Install dependencies by installing (yesterday's) binary, then removing it
+ install.packages(c("arrow", "cpp11"),
+ type = "binary",
+ repos = c("https://nightlies.apache.org/arrow/r", "https://cloud.r-project.org")
+ )
+ remove.packages("arrow")
+
+ # Setup local repo
+ dev_repo <- paste0(
+ ifelse(on_windows, "file:", "file://"),
+ getwd(),
+ "/repo")
+
+ options(arrow.dev_repo = dev_repo)
+
+ # Build
+ Sys.setenv(MAKEFLAGS = paste0("-j", parallel::detectCores()))
+ INSTALL_opts <- "--build"
+ if (!on_windows) {
+ # Windows doesn't support the --strip arg
+ INSTALL_opts <- c(INSTALL_opts, "--strip")
+ }
+
+
+ install.packages(
+ "arrow",
+ type = "source",
+ repos = dev_repo,
+ INSTALL_opts = INSTALL_opts
+ )
+
+ # Test
+ library(arrow)
+ read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
+
+ - name: Upload binary artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: r-pkg_{{ '${{ matrix.platform }}_${{ matrix.r_version.r }}' }}
+ path: arrow_*
+
+ test-linux-binary:
+ if: true && !cancelled()
+ needs: [source, linux-cpp]
+ name: Test binary {{ '${{ matrix.image }}' }}
+ runs-on: ubuntu-latest
+ container: {{ '${{ matrix.image }}' }}
+ strategy:
+ fail-fast: false
+ matrix:
+ image:
+ - "rhub/ubuntu-gcc-release" # ubuntu-20.04 (focal)
+ - "rstudio/r-base:4.1-bionic"
+ - "rstudio/r-base:4.2-centos7"
+ - "rocker/r-ver:3.6.3" # for debian:buster (10)
+ - "rocker/r-ver" # ubuntu-20.04
+ - "rhub/fedora-clang-devel" # tests distro-map.csv, mapped t
+ steps:
+ - name: Install system requirements
+ shell: bash
+ run: |
+ if [ "`which dnf`" ]; then
+ dnf install -y libcurl-devel openssl-devel
+ elif [ "`which yum`" ]; then
+ yum install -y libcurl-devel openssl-devel
+ elif [ "`which zypper`" ]; then
+ zypper install -y libcurl-devel libopenssl-devel
+ else
+ apt-get update
+ apt-get install -y libcurl4-openssl-dev libssl-dev
+ fi
+
+ # Add R-devel to PATH
+ echo "/opt/R-devel/bin" >> $GITHUB_PATH
+ {{ macros.github_setup_local_r_repo("'ubuntu-latest'")|indent }}
+ - name: Set dev repo
+ shell: bash
+ run: |
+ # It is important to use pwd here as this happens inside a container so the
+ # normal github.workspace path is wrong.
+ echo "options(arrow.dev_repo = 'file://$(pwd)/repo')" >> ~/.Rprofile
+ - name: Install arrow from our repo
+ env:
+ LIBARROW_BUILD: "FALSE"
+ LIBARROW_BINARY: "TRUE"
+ shell: Rscript {0}
+ run: |
+ {{ macros.github_test_r_src_pkg()|indent(8) }}
+
+ test-source:
+ if: true && !cancelled()
+ needs: source
+ name: Test linux source build
+ runs-on: ubuntu-latest
+ steps:
+ - name: Install R
+ uses: r-lib/actions/setup-r@v2
+ with:
+ install-r: false
+ {{ macros.github_setup_local_r_repo("'ubuntu-latest'", 'false')|indent }}
+ - name: Set dev repo
+ shell: bash
+ run: |
+ echo "options(arrow.dev_repo = 'file://$(pwd)/repo')" >> ~/.Rprofile
+ - name: Install arrow from nightly repo
+ env:
+ # Test source build so be sure not to download a binary
+ LIBARROW_BINARY: "FALSE"
+ shell: Rscript {0}
+ run: |
+ {{ macros.github_test_r_src_pkg()|indent(8) }}
+
+ - name: Retry with verbosity if that failed
+ if: failure()
+ env:
+ LIBARROW_BINARY: "FALSE"
+ ARROW_R_DEV: "TRUE"
+ run: |
+ {{ macros.github_test_r_src_pkg()|indent(8) }}
+
+ upload-binaries:
+ needs: r-packages
+ if: true && !cancelled()
+ name: Upload artifacts
+ runs-on: ubuntu-latest
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ - name: Download Artifacts
+ uses: actions/download-artifact@v3
+ with:
+ path: artifacts
+ - name: Install R
+ uses: r-lib/actions/setup-r@v2
+ with:
+ install-r: false
+ - name: Rename artifacts
+ shell: Rscript {0}
+ run: |
+ file_paths <- list.files("artifacts", include.dirs = FALSE, recursive = TRUE)
+ new_names <- paste0("binaries/", sub("/","-",file_paths))
+ dir.create("binaries", showWarnings = FALSE)
+ file.copy(paste0("artifacts/", file_paths), new_names)
+
+ {{ macros.github_upload_releases("binaries/r-*")|indent }}
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 93dd522984..42250896d4 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -174,6 +174,7 @@ groups:
- nuget
- wheel-*
- python-sdist
+ - r-nightly-packages
nightly-release:
- verify-rc-source-*
@@ -911,6 +912,15 @@ tasks:
- Apache.Arrow.{no_rc_version}.nupkg
- Apache.Arrow.{no_rc_version}.snupkg
+ ######################## R packages & binaries ##############################
+ r-nightly-packages:
+ ci: github
+ template: r/github.nightly.yml
+ artifacts:
+ - r-libarrow-.+
+ - r-pkg_.+
+
+
########################### Release verification ############################
######################## Linux verification #################################