You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/06/20 23:46:44 UTC
[spark] branch master updated: [SPARK-39521][INFRA] Separate scheduled jobs to each workflow

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e468cf010f [SPARK-39521][INFRA] Separate scheduled jobs to each workflow
9e468cf010f is described below

commit 9e468cf010f7381c1e85b02c0b3b043db7ffc07d
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Jun 21 08:46:18 2022 +0900

    [SPARK-39521][INFRA] Separate scheduled jobs to each workflow
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to separate workflows for each scheduled jobs. After this PR, each scheduled build can be easily searched at https://github.com/apache/spark/actions. For example, as below:
    
    ![Screen Shot 2022-06-20 at 6 01 04 PM](https://user-images.githubusercontent.com/6477701/174565779-ab54eb69-49f9-4746-b714-902741e1d554.png)
    
    In addition, this PR switches ANSI build to a scheduled build too because it's too expensive to run it for each commit.
    
    ### Why are the changes needed?
    
    Currently it is very inconvenient to navigate scheduled jobs now. We should use https://github.com/apache/spark/actions/workflows/build_and_test.yml?query=event%3Aschedule link and manually search one by one.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    Tested in my fork (https://github.com/HyukjinKwon/spark/actions).
    
    Closes #36922 from HyukjinKwon/SPARK-39521.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .github/workflows/build_and_test.yml               | 230 +++++++--------------
 .../{build_and_test_ansi.yml => build_ansi.yml}    |  21 +-
 ...{build_and_test_ansi.yml => build_branch32.yml} |  21 +-
 ...{build_and_test_ansi.yml => build_branch33.yml} |  21 +-
 ...{build_and_test_ansi.yml => build_coverage.yml} |  21 +-
 .../{build_and_test_ansi.yml => build_hadoop2.yml} |  17 +-
 .../{build_and_test_ansi.yml => build_java11.yml}  |  22 +-
 .../{build_and_test_ansi.yml => build_java17.yml}  |  22 +-
 .../{build_and_test_ansi.yml => build_main.yml}    |  10 +-
 ...{build_and_test_ansi.yml => build_scala213.yml} |  21 +-
 .../workflows/cancel_duplicate_workflow_runs.yml   |   2 +-
 .github/workflows/notify_test_workflow.yml         |   2 +-
 .github/workflows/test_report.yml                  |   2 +-
 .github/workflows/update_build_status.yml          |   2 +-
 14 files changed, 187 insertions(+), 227 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 81381eb16d4..084cbb95b07 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -20,105 +20,32 @@
 name: Build and test
 
 on:
-  push:
-    branches:
-    - '**'
-  schedule:
-    # Note that the scheduled jobs are only for master branch.
-    # master, Hadoop 2
-    - cron: '0 1 * * *'
-    # master
-    - cron: '0 4 * * *'
-    # branch-3.2
-    - cron: '0 7 * * *'
-    # PySpark coverage for master branch
-    - cron: '0 10 * * *'
-    # Java 11
-    - cron: '0 13 * * *'
-    # Java 17
-    - cron: '0 16 * * *'
-    # branch-3.3
-    - cron: '0 19 * * *'
   workflow_call:
     inputs:
-      ansi_enabled:
+      java:
         required: false
-        type: boolean
-        default: false
-
+        type: string
+        default: 8
+      branch:
+        required: false
+        type: string
+        default: master
+      hadoop:
+        required: false
+        type: string
+        default: hadoop3
+      type:
+        required: false
+        type: string
+        default: regular
+      envs:
+        required: false
+        type: string
+        default: "{}"
 jobs:
-  configure-jobs:
-    name: Configure jobs
-    runs-on: ubuntu-20.04
-    # All other jobs in this workflow depend on this job,
-    # so the entire workflow is skipped when these conditions evaluate to false:
-    # Run all jobs for Apache Spark repository
-    # Run only non-scheduled jobs for forked repositories
-    if: github.repository == 'apache/spark' || github.event_name != 'schedule'
-    outputs:
-      java: ${{ steps.set-outputs.outputs.java }}
-      branch: ${{ steps.set-outputs.outputs.branch }}
-      hadoop: ${{ steps.set-outputs.outputs.hadoop }}
-      type: ${{ steps.set-outputs.outputs.type }}
-      envs: ${{ steps.set-outputs.outputs.envs }}
-    steps:
-    - name: Configure branch and additional environment variables
-      id: set-outputs
-      run: |
-        if [ "${{ github.event.schedule }}" = "0 1 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{}'
-          echo '::set-output name=hadoop::hadoop2'
-        elif [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.2'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3.2'
-        elif [ "${{ github.event.schedule }}" = "0 10 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::pyspark-coverage-scheduled'
-          echo '::set-output name=envs::{"PYSPARK_CODECOV": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 13 * * *" ]; then
-          echo '::set-output name=java::11'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 16 * * *" ]; then
-          echo '::set-output name=java::17'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 19 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.3'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3'
-        else
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'  # NOTE: UPDATE THIS WHEN CUTTING BRANCH
-          echo '::set-output name=type::regular'
-          echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
-          echo '::set-output name=hadoop::hadoop3'
-        fi
-
   precondition:
     name: Check changes
     runs-on: ubuntu-20.04
-    needs: configure-jobs
     env:
       GITHUB_PREV_SHA: ${{ github.event.before }}
     outputs:
@@ -129,7 +56,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -155,22 +82,21 @@ jobs:
 
   # Build: build Spark and run the tests for specified modules.
   build:
-    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
-    needs: [configure-jobs, precondition]
+    name: "Build modules (${{ format('{0}, {1} job', inputs.branch, inputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
+    needs: precondition
     # Run scheduled jobs for Apache Spark only
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
     if: >-
-      needs.configure-jobs.outputs.type == 'scheduled'
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
+      inputs.type == 'scheduled' || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
     # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
         java:
-          - ${{ needs.configure-jobs.outputs.java }}
+          - ${{ inputs.java }}
         hadoop:
-          - ${{ needs.configure-jobs.outputs.hadoop }}
+          - ${{ inputs.hadoop }}
         hive:
           - hive2.3
         # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
@@ -194,27 +120,27 @@ jobs:
         include:
           # Hive tests
           - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             included-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- slow tests"
           - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- other tests"
           # SQL tests
           - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             included-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- slow tests"
           - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- other tests"
@@ -233,7 +159,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -280,7 +206,7 @@ jobs:
         python3.8 -m pip list
     # Run the tests.
     - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         # Hive "other tests" test needs larger metaspace size based on experiment.
         if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
@@ -300,15 +226,15 @@ jobs:
         path: "**/target/unit-tests.log"
 
   pyspark:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run PySpark coverage scheduled jobs for Apache Spark only
     # Run scheduled jobs with JDK 17 in Apache Spark
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if pyspark changes exist
     if: >-
-      needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
-      || (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
-    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }}"
+      inputs.type == 'pyspark-coverage-scheduled'
+      || (inputs.type == 'scheduled' && inputs.java == '17')
+      || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
+    name: "Build modules (${{ format('{0}, {1} job', inputs.branch, inputs.type) }}): ${{ matrix.modules }}"
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20220207
@@ -316,7 +242,7 @@ jobs:
       fail-fast: false
       matrix:
         java:
-          - ${{ needs.configure-jobs.outputs.java }}
+          - ${{ inputs.java }}
         modules:
           - >-
             pyspark-sql, pyspark-mllib, pyspark-resource
@@ -328,14 +254,13 @@ jobs:
             pyspark-pandas-slow
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_UNIDOC: true
       SKIP_MIMA: true
       METASPACE_SIZE: 1g
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -343,7 +268,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -384,12 +309,12 @@ jobs:
         bash miniconda.sh -b -p $HOME/miniconda
     # Run the tests.
     - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         export PATH=$PATH:$HOME/miniconda/bin
         ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
     - name: Upload coverage to Codecov
-      if: needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
+      if: inputs.type == 'pyspark-coverage-scheduled'
       uses: codecov/codecov-action@v2
       with:
         files: ./python/coverage.xml
@@ -399,33 +324,32 @@ jobs:
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
       uses: actions/upload-artifact@v2
       with:
-        name: unit-tests-log-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
 
   sparkr:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run scheduled jobs with JDK 17 in Apache Spark
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if sparkr changes exist
     if: >-
-      (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
+      (inputs.type == 'scheduled' && inputs.java == '17')
+      || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
     name: "Build modules: sparkr"
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20220207
     env:
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_MIMA: true
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -433,7 +357,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -460,11 +384,12 @@ jobs:
         key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           sparkr-coursier-
-    - name: Install Java ${{ needs.configure-jobs.outputs.java }}
+    - name: Install Java ${{ inputs.java }}
       uses: actions/setup-java@v1
       with:
-        java-version: ${{ needs.configure-jobs.outputs.java }}
+        java-version: ${{ inputs.java }}
     - name: Run tests
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         # The followings are also used by `r-lib/actions/setup-r` to avoid
         # R issues at docker environment
@@ -475,13 +400,12 @@ jobs:
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-sparkr--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-sparkr--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
 
   # Static analysis, and documentation build
   lint:
-    needs: configure-jobs
-    if: needs.configure-jobs.outputs.type == 'regular'
+    if: inputs.type == 'regular'
     name: Linters, licenses, dependencies and documentation generation
     runs-on: ubuntu-20.04
     env:
@@ -497,7 +421,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -595,9 +519,9 @@ jobs:
         bundle exec jekyll build
 
   java-11-17:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+    if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
     name: Java ${{ matrix.java }} build with Maven
     strategy:
       fail-fast: false
@@ -612,7 +536,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -651,9 +575,9 @@ jobs:
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+    if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
     name: Scala 2.13 build with SBT
     runs-on: ubuntu-20.04
     steps:
@@ -662,7 +586,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -697,21 +621,20 @@ jobs:
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
 
   tpcds-1g:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if tpcds changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
+    if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
     name: Run TPC-DS queries with SF=1
     runs-on: ubuntu-20.04
     env:
       SPARK_LOCAL_IP: localhost
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -763,6 +686,7 @@ jobs:
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.preferSortMergeJoin=true
@@ -770,12 +694,14 @@ jobs:
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=10485760
     - name: Run TPC-DS queries (Shuffled hash join)
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.forceApplyShuffledHashJoin=true
@@ -783,23 +709,23 @@ jobs:
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-tpcds--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
       uses: actions/upload-artifact@v2
       with:
-        name: unit-tests-log-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-tpcds--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
 
   docker-integration-tests:
-    needs: [configure-jobs, precondition]
+    needs: precondition
     # Run regular jobs for commit in both Apache Spark and forked repository, but only if docker changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
+    if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
     name: Run Docker integration tests
     runs-on: ubuntu-20.04
     env:
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
@@ -811,7 +737,7 @@ jobs:
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -848,11 +774,11 @@ jobs:
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-docker-integration--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
       uses: actions/upload-artifact@v2
       with:
-        name: unit-tests-log-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_ansi.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_ansi.yml
index 0ca335445b4..f5c75d3fb7b 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_ansi.yml
@@ -17,18 +17,23 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build / ANSI (master, Hadoop 3, JDK 8, Scala 2.12)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 1 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      type: scheduled
+      envs: >-
+        {
+          "SPARK_ANSI_SQL_MODE": "true",
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_branch32.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_branch32.yml
index 0ca335445b4..12e84068d72 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_branch32.yml
@@ -17,18 +17,23 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (branch-3.2, Scala 2.13, Hadoop 3, JDK 8)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 4 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: branch-3.2
+      hadoop: hadoop3.2
+      type: scheduled
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_branch33.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_branch33.yml
index 0ca335445b4..1d6c5f56287 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_branch33.yml
@@ -17,18 +17,23 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (branch-3.3, Scala 2.13, Hadoop 3, JDK 8)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 7 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: branch-3.3
+      hadoop: hadoop3
+      type: scheduled
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_coverage.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_coverage.yml
index 0ca335445b4..8a9a7f45c14 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_coverage.yml
@@ -17,18 +17,23 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build / Coverage (master, Scala 2.12, Hadoop 3, JDK 8)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 10 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      type: pyspark-coverage-scheduled
+      envs: >-
+        {
+          "PYSPARK_CODECOV": "true"
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_hadoop2.yml
similarity index 82%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_hadoop2.yml
index 0ca335445b4..c15c43e17bc 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_hadoop2.yml
@@ -17,18 +17,19 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 2, JDK 8)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 13 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: master
+      hadoop: hadoop2
+      type: scheduled
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_java11.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_java11.yml
index 0ca335445b4..dfe5884f968 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_java11.yml
@@ -17,18 +17,24 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 11)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 16 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 11
+      branch: master
+      hadoop: hadoop3
+      type: scheduled
+      envs: >-
+        {
+          "SKIP_MIMA": "true",
+          "SKIP_UNIDOC": "true"
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_java17.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_java17.yml
index 0ca335445b4..4a973ca3991 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_java17.yml
@@ -17,18 +17,24 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 17)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 22 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      type: scheduled
+      envs: >-
+        {
+          "SKIP_MIMA": "true",
+          "SKIP_UNIDOC": "true"
+        }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_main.yml
similarity index 85%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_main.yml
index 0ca335445b4..2071d65b44d 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_main.yml
@@ -17,18 +17,14 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build"
 
 on:
   push:
     branches:
-      - '**'
+    - '**'
 
 jobs:
   call-build-and-test:
-    name: Call main build
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
-    if: github.repository == 'apache/spark'
-    with:
-      ansi_enabled: true
-
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_scala213.yml
similarity index 77%
rename from .github/workflows/build_and_test_ansi.yml
rename to .github/workflows/build_scala213.yml
index 0ca335445b4..0bc0eabd534 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_scala213.yml
@@ -17,18 +17,23 @@
 # under the License.
 #
 
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.13, Hadoop 3, JDK 8)"
 
 on:
-  push:
-    branches:
-      - '**'
+  schedule:
+    - cron: '0 19 * * *'
 
 jobs:
-  call-build-and-test:
-    name: Call main build
+  run-build:
+    name: Run
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      ansi_enabled: true
-
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      type: scheduled
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
diff --git a/.github/workflows/cancel_duplicate_workflow_runs.yml b/.github/workflows/cancel_duplicate_workflow_runs.yml
index 525c7e7972c..d41ca31190d 100644
--- a/.github/workflows/cancel_duplicate_workflow_runs.yml
+++ b/.github/workflows/cancel_duplicate_workflow_runs.yml
@@ -21,7 +21,7 @@ name: Cancelling Duplicates
 on:
   workflow_run:
     workflows: 
-      - 'Build and test'
+      - 'Build'
     types: ['requested']
 
 jobs:
diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
index eb0da84a797..4c84f5f25e6 100644
--- a/.github/workflows/notify_test_workflow.yml
+++ b/.github/workflows/notify_test_workflow.yml
@@ -69,7 +69,7 @@ jobs:
               // Assume that runs were not found.
             }
 
-            const name = 'Build and test'
+            const name = 'Build'
             const head_sha = context.payload.pull_request.head.sha
             let status = 'queued'
 
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index 5f46985a975..c6225e6a1ab 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -20,7 +20,7 @@
 name: Report test results
 on:
   workflow_run:
-    workflows: ["Build and test", "Build and test (ANSI)"]
+    workflows: ["Build"]
     types:
       - completed
 
diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 671487adbfe..aa8e0cfbbf9 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -58,7 +58,7 @@ jobs:
 
                   // Iterator GitHub Checks in the PR
                   for await (const cr of checkRuns.data.check_runs) {
-                    if (cr.name == 'Build and test' && cr.conclusion != "action_required") {
+                    if (cr.name == 'Build' && cr.conclusion != "action_required") {
                       // text contains parameters to make request in JSON.
                       const params = JSON.parse(cr.output.text)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org