You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/12/03 04:19:17 UTC

[GitHub] [arrow] nevi-me commented on a change in pull request #8821: ARROW-10792: [Rust] [CI] Modularize builds for faster build and smaller caches

nevi-me commented on a change in pull request #8821:
URL: https://github.com/apache/arrow/pull/8821#discussion_r534656496



##########
File path: .github/workflows/rust.yml
##########
@@ -22,144 +22,300 @@ on:
     paths:
       - '.github/workflows/rust.yml'
       - 'ci/docker/*rust*'
-      - 'ci/scripts/rust_*.sh'
-      - 'ci/scripts/util_*.sh'
       - 'rust/**'
       - 'format/Flight.proto'
   pull_request:
     paths:
       - '.github/workflows/rust.yml'
       - 'ci/docker/*rust*'
-      - 'ci/scripts/rust_*.sh'
-      - 'ci/scripts/util_*.sh'
       - 'rust/**'
       - 'format/Flight.proto'
 
-env:
-  DOCKER_BUILDKIT: 0
-  COMPOSE_DOCKER_CLI_BUILD: 1
-  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-
 jobs:
 
-  debian:
-    name: AMD64 Debian 10 Rust ${{ matrix.rust }}
+  # build the docker image with rust and necessary dependencies
+  linux-build-image:
+    name: Linux ${{ matrix.arch }} Rust ${{ matrix.rust }} build image
     runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     strategy:
-      fail-fast: false
       matrix:
-        rust: [nightly-2020-11-24]
+        arch: [amd64]
+        rust: [stable, nightly-2020-11-24]
     env:
-      RUST: ${{ matrix.rust }}
+      REGISTRY: docker.pkg.github.com
+      IMAGE_NAME: docker.pkg.github.com/${{ github.repository }}/${{ matrix.arch }}-linux-rust-${{ matrix.rust }}
     steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
+      # based on https://github.com/dtinth/github-actions-docker-layer-caching-poc/blob/master/.github/workflows/dockerimage.yml
+      - uses: actions/checkout@v2
+      - name: Build build image
         shell: bash
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        shell: bash
-        run: ci/scripts/util_cleanup.sh
-      - name: Cache Docker Volumes
-        uses: actions/cache@v1
-        with:
-          path: .docker
-          key: debian-10-rust-${{ hashFiles('rust/**/**.rs', 'rust/**/Cargo.toml') }}
-          restore-keys: debian-10-rust-
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
-          archery docker run debian-rust
-      - name: Fix Cache Permissions
-        run: sudo chmod -R o+r .docker
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        shell: bash
-        run: archery docker push debian-rust
+          echo ${{ secrets.GITHUB_TOKEN }} | docker login $REGISTRY -u $GITHUB_ACTOR --password-stdin
+          docker pull $IMAGE_NAME || true
+          docker build -f ci/docker/debian-10-rust.dockerfile . -t $IMAGE_NAME --cache-from $IMAGE_NAME
+          docker push $IMAGE_NAME || true
 
-  windows:
-    name: AMD64 Windows 2019 Rust ${{ matrix.rust }}
-    runs-on: windows-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+  # build the library, a compilation step used by multiple steps below
+  linux-build-lib:
+    name: Linux ${{ matrix.arch }} Rust ${{ matrix.rust }} build libraries
+    needs: [linux-build-image]
+    runs-on: ubuntu-latest
     strategy:
-      fail-fast: false
       matrix:
+        arch: [amd64]
         rust: [nightly-2020-11-24]
+    container:
+      image: docker.pkg.github.com/${{ github.repository }}/${{ matrix.arch }}-linux-rust-${{ matrix.rust }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ github.token }}
+      env:
+        # so that cache takes it
+        CARGO_HOME: /github/home/.cargo
+        CARGO_TARGET_DIR: /github/home/target/
     steps:
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-            toolchain: ${{ matrix.rust }}
-            override: true
-            components: rustfmt
-      - name: Install Flatbuffers
-        shell: bash
-        run: choco install flatc
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
+      - uses: actions/checkout@v2
+      - name: Cache Cargo
+        uses: actions/cache@v2
         with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        shell: bash
-        run: ci/scripts/util_checkout.sh
-      - name: Cache Build Artifacts
-        uses: actions/cache@v1
+          # these represent dependencies downloaded by cargo
+          # and thus do not depend on the OS, arch nor rust version.
+          path: /github/home/.cargo
+          key: cargo-cache-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: cargo-cache-
+      - name: Cache Rust dependencies
+        uses: actions/cache@v2
         with:
-          path: rust/target
-          key: windows-rust-v1-${{ hashFiles('rust/**/**.rs', 'rust/**/Cargo.toml') }}
-          restore-keys: windows-rust-v1-
+          # these represent compiled steps of both dependencies and arrow
+          # and thus are specific for a particular OS, arch and rust version.
+          path: /github/home/target
+          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
       - name: Build
-        shell: bash
-        run: ci/scripts/rust_build.sh $(pwd) $(pwd)/build
-      - name: Test
-        shell: bash
-        run: ci/scripts/rust_test.sh $(pwd) $(pwd)/build
+        run: cd rust && cargo build
 
-  macos:
-    name: AMD64 MacOS 10.15 Rust ${{ matrix.rust }}
-    runs-on: macos-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+  # test the crate
+  linux-test:
+    name: Linux ${{ matrix.arch }} Rust ${{ matrix.rust }} test workspace
+    needs: [linux-build-lib]
+    runs-on: ubuntu-latest
     strategy:
-      fail-fast: false
       matrix:
+        arch: [amd64]
         rust: [nightly-2020-11-24]
+    container:
+      image: docker.pkg.github.com/${{ github.repository }}/${{ matrix.arch }}-linux-rust-${{ matrix.rust }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ github.token }}
+      env:
+        CARGO_HOME: /github/home/.cargo
+        CARGO_TARGET_DIR: /github/home/target
+
+        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Cache Cargo
+        uses: actions/cache@v2
+        with:
+          path: /github/home/.cargo
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: cargo-cache-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: cargo-cache-
+      - name: Cache Rust dependencies
+        uses: actions/cache@v2
+        with:
+          path: /github/home/target
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
+      - name: Run tests
+        run: |
+          cd rust
+          cargo test
+          # test datafusion examples
+          cd datafusion
+          cargo run --example csv_sql
+          cargo run --example parquet_sql
+
+  # test the --features "simd" of the arrow crate
+  linux-test-simd:
+    name: Linux ${{ matrix.arch }} Rust ${{ matrix.rust }} test arrow simd
+    needs: [linux-build-lib]
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        arch: [amd64]
+        rust: [nightly-2020-11-24]
+    container:
+      image: docker.pkg.github.com/${{ github.repository }}/${{ matrix.arch }}-linux-rust-${{ matrix.rust }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ github.token }}
+      env:
+        CARGO_HOME: /github/home/.cargo
+        CARGO_TARGET_DIR: /github/home/target
+
+        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Cache Cargo
+        uses: actions/cache@v2
+        with:
+          path: /github/home/.cargo
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: cargo-cache-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: cargo-cache-
+      - name: Cache Rust dependencies
+        uses: actions/cache@v2
+        with:
+          path: /github/home/target
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
+      - name: Run tests
+        run: |
+          cd rust/arrow
+          cargo test --features "simd"
+
+  # test the arrow crate with stable rust
+  linux-test-stable:
+    name: Linux ${{ matrix.arch }} Rust ${{ matrix.rust }} test arrow
+    needs: [linux-build-image]
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        arch: [amd64]
+        rust: [stable]
+    container:
+      image: docker.pkg.github.com/${{ github.repository }}/${{ matrix.arch }}-linux-rust-${{ matrix.rust }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ github.token }}
+      env:
+        CARGO_HOME: /github/home/.cargo
+        CARGO_TARGET_DIR: /github/home/target
+
+        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
     steps:
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Cache Cargo
+        uses: actions/cache@v2
+        with:
+          path: /github/home/.cargo
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: cargo-cache-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: cargo-cache-
+      - name: Cache Rust dependencies
+        uses: actions/cache@v2
+        with:
+          path: /github/home/target
+          # these keys equal the ones on `linux-build-lib` for re-use
+          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
+      - name: Run tests
+        run: |
+          cd rust/arrow

Review comment:
       @GregBowyer we'd be able to add parquet to stable tests here, so we can leave out adding it to CI in your PR




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org