You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by am...@apache.org on 2022/05/26 13:44:14 UTC

[arrow] branch master updated: ARROW-16403:[R][CI] Create Crossbow task for R nightly builds

This is an automated email from the ASF dual-hosted git repository.

amolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 156dc72c32 ARROW-16403:[R][CI] Create Crossbow task for R nightly builds
156dc72c32 is described below

commit 156dc72c320dbbdec5424f24857e3335fc8c7dee
Author: Jacob Wujciak-Jens <ja...@wujciak.de>
AuthorDate: Thu May 26 15:44:05 2022 +0200

    ARROW-16403:[R][CI] Create Crossbow task for R nightly builds
    
    Closes #13149 from assignUser/ARROW-16403-nightly-crossbow
    
    Lead-authored-by: Jacob Wujciak-Jens <ja...@wujciak.de>
    Co-authored-by: Neal Richardson <ne...@gmail.com>
    Signed-off-by: Alessandro Molina <am...@turbogears.org>
---
 .github/workflows/r_nightly.yml | 124 ++++++++++++++++
 dev/tasks/macros.jinja          |  76 +++++++++-
 dev/tasks/r/github.nightly.yml  | 304 ++++++++++++++++++++++++++++++++++++++++
 dev/tasks/tasks.yml             |  10 ++
 4 files changed, 513 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
new file mode 100644
index 0000000000..8fb96a2796
--- /dev/null
+++ b/.github/workflows/r_nightly.yml
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Upload R Nightly builds
+
+on:
+  workflow_dispatch:
+    inputs:
+      prefix:
+        description: Job prefix to use.
+        required: false
+        default: ''
+  schedule:
+    #Crossbow packagin runs at 0 8 * * *
+    - cron: '0 14 * * *'
+
+jobs:
+  upload:
+    env:
+      PREFIX: ${{ github.event.inputs.prefix || ''}}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+          path: arrow
+          repository: apache/arrow
+          ref: master
+          submodules: recursive
+      - name: Checkout Crossbow
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          path: crossbow
+          repository: ursacomputing/crossbow
+          ref: master 
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          cache: 'pip'
+          python-version: 3.8
+      - name: Install Archery
+        shell: bash
+        run: pip install -e arrow/dev/archery[all]
+      - run: mkdir -p binaries
+      - name: Download Artifacts
+        run: |
+          if [ -z $PREFIX ]; then
+            PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0
+          fi
+          echo $PREFIX
+
+          archery crossbow download-artifacts -f r-nightly-packages -t binaries --skip-pattern-validation $PREFIX
+      - name: Build Repository
+        shell: Rscript {0}
+        run: |
+          art_path <- list.files("binaries",
+              recursive = TRUE,
+              include.dirs = TRUE,
+              pattern = "r-nightly-packages$",
+              full.names = TRUE
+          )
+
+          pkgs <- list.files(art_path, pattern = "r-pkg_*")
+          src_i <- grep("r-pkg_src", pkgs)
+          src_pkg <- pkgs[src_i]
+          pkgs <- pkgs[-src_i]
+          libs <- list.files(art_path, pattern = "r-libarrow*")
+
+          new_names <- sub("r-pkg_", "", pkgs, fixed = T)
+          matches <- regmatches(new_names, regexec("(([a-z]+)-[\\.a-zA-Z0-9]+)_(\\d\\.\\d)-(arrow.+)$", new_names))
+
+          dir.create("repo/src/contrib", recursive = TRUE)
+          file.copy(paste0(art_path, "/", src_pkg), paste0("repo/src/contrib/", sub("r-pkg_src-", "", src_pkg)))
+          tools::write_PACKAGES("repo/src/contrib", type = "source", verbose = TRUE)
+
+          for (match in matches) {
+              path <- paste0("repo/bin/", match[[3]], "/contrib/", match[[4]])
+              path <- sub("macos", "macosx", path)
+              dir.create(path, recursive = TRUE)
+              file <- list.files(art_path, pattern = paste0(match[[1]], "*"), full.names = TRUE)
+
+              file.copy(file, paste0(path, "/", match[[5]]))
+              tools::write_PACKAGES(path, type = paste0(substring(match[[3]], 1, 3), ".binary"), verbose = TRUE)
+          }
+
+          lib_names <- sub("r-libarrow-", "", libs)
+          lib_match <- regmatches(lib_names, regexec("([[:alpha:]]+)-(.+)", lib_names))
+
+          dir.create("repo/libarrow/bin/windows", recursive = TRUE)
+          dir.create("repo/libarrow/bin/centos-7", recursive = TRUE)
+          dir.create("repo/libarrow/bin/ubuntu-18.04", recursive = TRUE)
+
+          for (match in lib_match) {
+              path <- list.files("repo//libarrow//bin", pattern = match[[2]], full.names = TRUE)
+              file <- paste0(art_path, "/r-libarrow-", match[[1]])
+              file.copy(file, paste0(path, "//", match[[3]]))
+          }
+      - run: ls -R repo
+      - name: Upload Files
+        uses: burnett01/rsync-deployments@5.2
+        with:
+          switches: -avzr
+          path: repo/*
+          remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r
+          remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }}
+          remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }}
+          remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }}
+          remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index bd65f6488c..fa3676ed30 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -52,8 +52,9 @@ on:
 
 {%- macro github_install_archery() -%}
   - name: Set up Python
-    uses: actions/setup-python@v2
+    uses: actions/setup-python@v3
     with:
+      cache: 'pip'
       python-version: 3.8
   - name: Install Archery
     shell: bash
@@ -221,3 +222,76 @@ on:
         cp ${formula} $(brew --repository homebrew/core)/Formula/
       done
 {% endmacro %}
+
+{%- macro github_change_r_pkg_version(is_fork, version = '\\2.\'\"$(date +%Y%m%d)\"\'' ) -%}
+  - name: Modify version
+    shell: bash
+    run: |
+      cd arrow/r
+      sed -i.bak -E -e \
+        's/(^Version: )([0-9]+\.[0-9]+\.[0-9]+).*$/\1{{ version }}/' \
+        DESCRIPTION
+      head DESCRIPTION
+      rm -f DESCRIPTION.bak
+      cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb
+      
+      # Pin the git commit in the formula to match
+      cd tools
+      if [ "{{ is_fork }}" == "true" ]; then 
+        sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb
+      else
+        sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb
+      fi
+      rm -f apache-arrow.rb.bak
+{% endmacro %}
+
+{%- macro github_test_r_src_pkg() -%}
+  source("https://raw.githubusercontent.com/apache/arrow/master/ci/etc/rprofile")
+  
+  install.packages(
+    "arrow",
+    repos = c(getOption("arrow.dev_repo"), "https://cloud.r-project.org"),
+    verbose = TRUE
+  )
+  
+  library(arrow)
+  read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
+
+  # Our Version should always be > CRAN so we would detect a CRAN version here.
+  stopifnot(packageVersion("arrow") == {{ '"${{needs.source.outputs.version}}"' }})
+{% endmacro %}
+
+{%- macro github_setup_local_r_repo(os, get_bin = 'true') -%}
+  - name: Setup local repo
+    shell: bash
+    run: mkdir repo 
+  - name: Get windows binary
+    if: {{ '${{' }} ({{ os }} == 'windows-latest') && {{ get_bin }} {{ '}}' }}
+    uses: actions/download-artifact@v3
+    with:
+      name: r-libarrow-windows
+      path: repo/libarrow/bin/windows
+  - name: Get ubuntu binary
+    if: {{ '${{' }} ({{ os }} != 'windows-latest') && {{ get_bin }} {{ '}}' }}
+    uses: actions/download-artifact@v3
+    with:
+      name: r-libarrow-ubuntu
+      path: repo/libarrow/bin/ubuntu-18.04
+  - name: Get centos binary
+    if: {{ '${{' }} ({{ os }} != 'windows-latest') && {{ get_bin }} {{ '}}' }}
+    uses: actions/download-artifact@v3
+    with:
+      name: r-libarrow-centos
+      path: repo/libarrow/bin/centos-7
+  - name: Get src pkg
+    uses: actions/download-artifact@v3
+    with:
+      name: r-pkg_src
+      path: repo/src/contrib
+  - name: Update repo index
+    shell: Rscript {0}
+    run: |
+      # getwd() is necessary as this macro is used within jobs using a docker container
+      tools::write_PACKAGES(file.path(getwd(), "/repo/src/contrib", fsep = "/"), type = "source", verbose = TRUE)
+  - run: ls -R repo
+{% endmacro %}
\ No newline at end of file
diff --git a/dev/tasks/r/github.nightly.yml b/dev/tasks/r/github.nightly.yml
new file mode 100644
index 0000000000..e60baae511
--- /dev/null
+++ b/dev/tasks/r/github.nightly.yml
@@ -0,0 +1,304 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+# This allows us to set a custom version via param:
+# crossbow submit --param custom_version=8.5.3 r-nightly-packages
+# if the param is unset defaults to the usual Ymd naming scheme
+{% set version = custom_version|default("\\2.\'\"$(date +%Y%m%d)\"\'") %}
+# We need this as boolean and string
+{% set is_upstream_b = arrow.github_repo == 'apache/arrow' %}
+# use filter to cast to string and convert to lowercase to match yaml boolean
+{% set is_fork = (not is_upstream_b)|lower %}
+{% set is_upstream = is_upstream_b|lower %}
+
+
+{{ macros.github_header() }}
+
+jobs:
+  source:
+    # This job will change the version to either the custom_version param or YMD format.
+    # The output allows other steps to use the exact version to prevent issues (e.g. date changes during run)
+    name: Source Package
+    runs-on: ubuntu-latest
+    outputs:
+      version: {{ '${{ steps.save-version.outputs.version }}' }}
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_change_r_pkg_version(is_fork, version)|indent }}
+      - name: Save Version
+        id: save-version
+        shell: bash
+        run: | 
+          echo "::set-output name=version::$(grep ^Version arrow/r/DESCRIPTION | sed s/Version:\ //)"
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          install-r: false
+
+      - name: Build R source package
+        shell: bash
+        run: |
+          cd arrow/r
+          # Copy in the Arrow C++ source
+          make sync-cpp
+          R CMD build --no-build-vignettes .
+
+      - name: Upload package artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: r-pkg_src
+          path: arrow/r/arrow_*.tar.gz
+
+  linux-cpp:
+    name: C++ Binary {{ '${{ matrix.config.os }}-${{ matrix.config.version }}' }}
+    runs-on: ubuntu-latest
+    needs: source
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { os: ubuntu, version: "18.04" }
+          - { os: centos, version: "7" }
+    env:
+      UBUNTU: {{ '${{ matrix.config.version }}' }}
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.version }}')|indent }}
+      {{ macros.github_install_archery()|indent }}
+      - name: Build libarrow
+        shell: bash
+        run: |
+          sudo sysctl -w kernel.core_pattern="core.%e.%p"
+          ulimit -c unlimited
+          archery docker run  {{ '${{ matrix.config.os}}' }}-cpp-static
+      - name: Bundle libarrow
+        shell: bash
+        env:
+          PKG_FILE: arrow-{{ '${{ needs.source.outputs.version }}' }}.zip
+          VERSION: {{ '${{ needs.source.outputs.version }}' }}
+        run: |
+          cd arrow/r/libarrow/dist
+          # These files were created by the docker user so we have to sudo to get them
+          sudo -E zip -r $PKG_FILE lib/ include/
+          
+      - name: Upload binary artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: r-libarrow-{{ '${{ matrix.config.os}}' }}
+          path: arrow/r/libarrow/dist/arrow-*.zip
+
+  windows-cpp:
+    name: C++ Binary Windows RTools (40 only)
+    needs: source
+    runs-on: windows-latest
+    steps:
+      - run: git config --global core.autocrlf false
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.version }}')|indent }}
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          rtools-version: 40
+          r-version: "4.0"
+          Ncpus: 2
+
+      - name: Build Arrow C++ with rtools40
+        shell: bash
+        env:
+          ARROW_HOME: "arrow"
+        run: arrow/ci/scripts/r_windows_build.sh
+
+      - name: Upload binary artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: r-libarrow-windows
+          path: build/arrow-*.zip 
+
+  r-packages:
+    if: true && !cancelled()
+    needs: [source, windows-cpp]
+    name: {{ '${{ matrix.platform }} ${{ matrix.r_version.r }}' }}
+    runs-on: {{ '${{ matrix.platform }}' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - windows-latest
+          # This is newer than what CRAN builds on, but Travis is no longer an option for us, so...
+          - macos-10.15
+          # - devops-managed # No M1 until the runner application runs native
+        r_version:
+          - { rtools: 40, r: "4.1" }
+          - { rtools: 42, r: "4.2" }
+    steps:
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: {{ '${{ matrix.r_version.r }}' }}
+          rtools-version: {{ '${{ matrix.r_version.rtools }}' }}
+          Ncpus: 2
+      {{ macros.github_setup_local_r_repo('matrix.platform')|indent }}
+      - name: Build Binary
+        shell: Rscript {0}
+        run: |
+          on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+
+          # Install dependencies by installing (yesterday's) binary, then removing it
+          install.packages(c("arrow", "cpp11"),
+            type = "binary",
+            repos = c("https://nightlies.apache.org/arrow/r", "https://cloud.r-project.org")
+          )
+          remove.packages("arrow")
+
+          # Setup local repo
+          dev_repo <- paste0(
+            ifelse(on_windows, "file:", "file://"),
+            getwd(),
+            "/repo")
+          
+          options(arrow.dev_repo = dev_repo)
+          
+          # Build
+          Sys.setenv(MAKEFLAGS = paste0("-j", parallel::detectCores()))
+          INSTALL_opts <- "--build"
+          if (!on_windows) {
+            # Windows doesn't support the --strip arg
+            INSTALL_opts <- c(INSTALL_opts, "--strip")
+          }
+
+         
+          install.packages(
+            "arrow",
+            type = "source",
+            repos = dev_repo,
+            INSTALL_opts = INSTALL_opts
+          )
+
+          # Test
+          library(arrow)
+          read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
+
+      - name: Upload binary artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: r-pkg_{{ '${{ matrix.platform }}_${{ matrix.r_version.r }}' }}
+          path: arrow_*
+
+  test-linux-binary:
+    if: true && !cancelled()
+    needs: [source, linux-cpp]
+    name: Test binary {{ '${{ matrix.image }}' }}
+    runs-on: ubuntu-latest
+    container: {{ '${{ matrix.image }}' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        image:
+          - "rhub/ubuntu-gcc-release" # ubuntu-20.04 (focal)
+          - "rstudio/r-base:4.1-bionic"
+          - "rstudio/r-base:4.2-centos7"
+          - "rocker/r-ver:3.6.3" # for debian:buster (10)
+          - "rocker/r-ver" # ubuntu-20.04
+          - "rhub/fedora-clang-devel" # tests distro-map.csv, mapped t
+    steps:
+      - name: Install system requirements
+        shell: bash
+        run: |
+          if [ "`which dnf`" ]; then
+            dnf install -y libcurl-devel openssl-devel
+          elif [ "`which yum`" ]; then
+            yum install -y libcurl-devel openssl-devel
+          elif [ "`which zypper`" ]; then
+            zypper install -y libcurl-devel libopenssl-devel
+          else
+            apt-get update
+            apt-get install -y libcurl4-openssl-dev libssl-dev
+          fi
+
+          # Add R-devel to PATH
+          echo "/opt/R-devel/bin" >> $GITHUB_PATH
+      {{ macros.github_setup_local_r_repo("'ubuntu-latest'")|indent }}
+      - name: Set dev repo
+        shell: bash
+        run: |
+          # It is important to use pwd here as this happens inside a container so the 
+          # normal github.workspace path is wrong.
+          echo "options(arrow.dev_repo = 'file://$(pwd)/repo')" >> ~/.Rprofile
+      - name: Install arrow from our repo
+        env:
+          LIBARROW_BUILD: "FALSE"
+          LIBARROW_BINARY: "TRUE"
+        shell: Rscript {0}
+        run: |
+          {{  macros.github_test_r_src_pkg()|indent(8) }}
+
+  test-source:
+    if: true && !cancelled()
+    needs: source
+    name: Test linux source build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Install R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          install-r: false
+      {{ macros.github_setup_local_r_repo("'ubuntu-latest'", 'false')|indent }}
+      - name: Set dev repo
+        shell: bash
+        run: |
+          echo "options(arrow.dev_repo = 'file://$(pwd)/repo')" >> ~/.Rprofile
+      - name: Install arrow from nightly repo
+        env:
+          # Test source build so be sure not to download a binary
+          LIBARROW_BINARY: "FALSE"
+        shell: Rscript {0}
+        run: |
+          {{ macros.github_test_r_src_pkg()|indent(8) }}
+
+      - name: Retry with verbosity if that failed
+        if: failure()
+        env:
+          LIBARROW_BINARY: "FALSE"
+          ARROW_R_DEV: "TRUE"
+        run: |
+          {{ macros.github_test_r_src_pkg()|indent(8) }}
+
+  upload-binaries:
+    needs: r-packages
+    if: true && !cancelled()
+    name: Upload artifacts
+    runs-on: ubuntu-latest
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      - name: Download Artifacts 
+        uses: actions/download-artifact@v3
+        with: 
+          path: artifacts
+      - name: Install R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          install-r: false
+      - name: Rename artifacts
+        shell: Rscript {0}
+        run: |
+          file_paths <- list.files("artifacts", include.dirs = FALSE, recursive = TRUE)
+          new_names <- paste0("binaries/", sub("/","-",file_paths))
+          dir.create("binaries", showWarnings = FALSE)
+          file.copy(paste0("artifacts/", file_paths), new_names)
+
+      {{ macros.github_upload_releases("binaries/r-*")|indent }}
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 93dd522984..42250896d4 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -174,6 +174,7 @@ groups:
     - nuget
     - wheel-*
     - python-sdist
+    - r-nightly-packages
 
   nightly-release:
     - verify-rc-source-*
@@ -911,6 +912,15 @@ tasks:
       - Apache.Arrow.{no_rc_version}.nupkg
       - Apache.Arrow.{no_rc_version}.snupkg
 
+  ######################## R packages & binaries ##############################
+  r-nightly-packages:
+    ci: github
+    template: r/github.nightly.yml
+    artifacts:
+      - r-libarrow-.+
+      - r-pkg_.+
+
+
   ########################### Release verification ############################
 
   ######################## Linux verification #################################