You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/06/20 23:46:44 UTC
[spark] branch master updated: [SPARK-39521][INFRA] Separate scheduled jobs to each workflow
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9e468cf010f [SPARK-39521][INFRA] Separate scheduled jobs to each workflow
9e468cf010f is described below
commit 9e468cf010f7381c1e85b02c0b3b043db7ffc07d
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Jun 21 08:46:18 2022 +0900
[SPARK-39521][INFRA] Separate scheduled jobs to each workflow
### What changes were proposed in this pull request?
This PR proposes to separate workflows for each scheduled jobs. After this PR, each scheduled build can be easily searched at https://github.com/apache/spark/actions. For example, as below:
![Screen Shot 2022-06-20 at 6 01 04 PM](https://user-images.githubusercontent.com/6477701/174565779-ab54eb69-49f9-4746-b714-902741e1d554.png)
In addition, this PR switches ANSI build to a scheduled build too because it's too expensive to run it for each commit.
### Why are the changes needed?
Currently it is very inconvenient to navigate scheduled jobs now. We should use https://github.com/apache/spark/actions/workflows/build_and_test.yml?query=event%3Aschedule link and manually search one by one.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
Tested in my fork (https://github.com/HyukjinKwon/spark/actions).
Closes #36922 from HyukjinKwon/SPARK-39521.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.github/workflows/build_and_test.yml | 230 +++++++--------------
.../{build_and_test_ansi.yml => build_ansi.yml} | 21 +-
...{build_and_test_ansi.yml => build_branch32.yml} | 21 +-
...{build_and_test_ansi.yml => build_branch33.yml} | 21 +-
...{build_and_test_ansi.yml => build_coverage.yml} | 21 +-
.../{build_and_test_ansi.yml => build_hadoop2.yml} | 17 +-
.../{build_and_test_ansi.yml => build_java11.yml} | 22 +-
.../{build_and_test_ansi.yml => build_java17.yml} | 22 +-
.../{build_and_test_ansi.yml => build_main.yml} | 10 +-
...{build_and_test_ansi.yml => build_scala213.yml} | 21 +-
.../workflows/cancel_duplicate_workflow_runs.yml | 2 +-
.github/workflows/notify_test_workflow.yml | 2 +-
.github/workflows/test_report.yml | 2 +-
.github/workflows/update_build_status.yml | 2 +-
14 files changed, 187 insertions(+), 227 deletions(-)
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 81381eb16d4..084cbb95b07 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -20,105 +20,32 @@
name: Build and test
on:
- push:
- branches:
- - '**'
- schedule:
- # Note that the scheduled jobs are only for master branch.
- # master, Hadoop 2
- - cron: '0 1 * * *'
- # master
- - cron: '0 4 * * *'
- # branch-3.2
- - cron: '0 7 * * *'
- # PySpark coverage for master branch
- - cron: '0 10 * * *'
- # Java 11
- - cron: '0 13 * * *'
- # Java 17
- - cron: '0 16 * * *'
- # branch-3.3
- - cron: '0 19 * * *'
workflow_call:
inputs:
- ansi_enabled:
+ java:
required: false
- type: boolean
- default: false
-
+ type: string
+ default: 8
+ branch:
+ required: false
+ type: string
+ default: master
+ hadoop:
+ required: false
+ type: string
+ default: hadoop3
+ type:
+ required: false
+ type: string
+ default: regular
+ envs:
+ required: false
+ type: string
+ default: "{}"
jobs:
- configure-jobs:
- name: Configure jobs
- runs-on: ubuntu-20.04
- # All other jobs in this workflow depend on this job,
- # so the entire workflow is skipped when these conditions evaluate to false:
- # Run all jobs for Apache Spark repository
- # Run only non-scheduled jobs for forked repositories
- if: github.repository == 'apache/spark' || github.event_name != 'schedule'
- outputs:
- java: ${{ steps.set-outputs.outputs.java }}
- branch: ${{ steps.set-outputs.outputs.branch }}
- hadoop: ${{ steps.set-outputs.outputs.hadoop }}
- type: ${{ steps.set-outputs.outputs.type }}
- envs: ${{ steps.set-outputs.outputs.envs }}
- steps:
- - name: Configure branch and additional environment variables
- id: set-outputs
- run: |
- if [ "${{ github.event.schedule }}" = "0 1 * * *" ]; then
- echo '::set-output name=java::8'
- echo '::set-output name=branch::master'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{}'
- echo '::set-output name=hadoop::hadoop2'
- elif [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
- echo '::set-output name=java::8'
- echo '::set-output name=branch::master'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
- echo '::set-output name=hadoop::hadoop3'
- elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
- echo '::set-output name=java::8'
- echo '::set-output name=branch::branch-3.2'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
- echo '::set-output name=hadoop::hadoop3.2'
- elif [ "${{ github.event.schedule }}" = "0 10 * * *" ]; then
- echo '::set-output name=java::8'
- echo '::set-output name=branch::master'
- echo '::set-output name=type::pyspark-coverage-scheduled'
- echo '::set-output name=envs::{"PYSPARK_CODECOV": "true"}'
- echo '::set-output name=hadoop::hadoop3'
- elif [ "${{ github.event.schedule }}" = "0 13 * * *" ]; then
- echo '::set-output name=java::11'
- echo '::set-output name=branch::master'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
- echo '::set-output name=hadoop::hadoop3'
- elif [ "${{ github.event.schedule }}" = "0 16 * * *" ]; then
- echo '::set-output name=java::17'
- echo '::set-output name=branch::master'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
- echo '::set-output name=hadoop::hadoop3'
- elif [ "${{ github.event.schedule }}" = "0 19 * * *" ]; then
- echo '::set-output name=java::8'
- echo '::set-output name=branch::branch-3.3'
- echo '::set-output name=type::scheduled'
- echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
- echo '::set-output name=hadoop::hadoop3'
- else
- echo '::set-output name=java::8'
- echo '::set-output name=branch::master' # NOTE: UPDATE THIS WHEN CUTTING BRANCH
- echo '::set-output name=type::regular'
- echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
- echo '::set-output name=hadoop::hadoop3'
- fi
-
precondition:
name: Check changes
runs-on: ubuntu-20.04
- needs: configure-jobs
env:
GITHUB_PREV_SHA: ${{ github.event.before }}
outputs:
@@ -129,7 +56,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -155,22 +82,21 @@ jobs:
# Build: build Spark and run the tests for specified modules.
build:
- name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
- needs: [configure-jobs, precondition]
+ name: "Build modules (${{ format('{0}, {1} job', inputs.branch, inputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
+ needs: precondition
# Run scheduled jobs for Apache Spark only
# Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
if: >-
- needs.configure-jobs.outputs.type == 'scheduled'
- || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
+ inputs.type == 'scheduled' || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
java:
- - ${{ needs.configure-jobs.outputs.java }}
+ - ${{ inputs.java }}
hadoop:
- - ${{ needs.configure-jobs.outputs.hadoop }}
+ - ${{ inputs.hadoop }}
hive:
- hive2.3
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
@@ -194,27 +120,27 @@ jobs:
include:
# Hive tests
- modules: hive
- java: ${{ needs.configure-jobs.outputs.java }}
- hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
- java: ${{ needs.configure-jobs.outputs.java }}
- hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
# SQL tests
- modules: sql
- java: ${{ needs.configure-jobs.outputs.java }}
- hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
hive: hive2.3
included-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- slow tests"
- modules: sql
- java: ${{ needs.configure-jobs.outputs.java }}
- hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
hive: hive2.3
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- other tests"
@@ -233,7 +159,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -280,7 +206,7 @@ jobs:
python3.8 -m pip list
# Run the tests.
- name: Run tests
- env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+ env: ${{ fromJSON(inputs.envs) }}
run: |
# Hive "other tests" test needs larger metaspace size based on experiment.
if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
@@ -300,15 +226,15 @@ jobs:
path: "**/target/unit-tests.log"
pyspark:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run PySpark coverage scheduled jobs for Apache Spark only
# Run scheduled jobs with JDK 17 in Apache Spark
# Run regular jobs for commit in both Apache Spark and forked repository, but only if pyspark changes exist
if: >-
- needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
- || (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
- || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
- name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }}"
+ inputs.type == 'pyspark-coverage-scheduled'
+ || (inputs.type == 'scheduled' && inputs.java == '17')
+ || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
+ name: "Build modules (${{ format('{0}, {1} job', inputs.branch, inputs.type) }}): ${{ matrix.modules }}"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20220207
@@ -316,7 +242,7 @@ jobs:
fail-fast: false
matrix:
java:
- - ${{ needs.configure-jobs.outputs.java }}
+ - ${{ inputs.java }}
modules:
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
@@ -328,14 +254,13 @@ jobs:
pyspark-pandas-slow
env:
MODULES_TO_TEST: ${{ matrix.modules }}
- HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+ HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
SKIP_UNIDOC: true
SKIP_MIMA: true
METASPACE_SIZE: 1g
- SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
@@ -343,7 +268,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -384,12 +309,12 @@ jobs:
bash miniconda.sh -b -p $HOME/miniconda
# Run the tests.
- name: Run tests
- env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+ env: ${{ fromJSON(inputs.envs) }}
run: |
export PATH=$PATH:$HOME/miniconda/bin
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
- name: Upload coverage to Codecov
- if: needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
+ if: inputs.type == 'pyspark-coverage-scheduled'
uses: codecov/codecov-action@v2
with:
files: ./python/coverage.xml
@@ -399,33 +324,32 @@ jobs:
if: always()
uses: actions/upload-artifact@v2
with:
- name: test-results-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: test-results-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
- name: unit-tests-log-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
sparkr:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run scheduled jobs with JDK 17 in Apache Spark
# Run regular jobs for commit in both Apache Spark and forked repository, but only if sparkr changes exist
if: >-
- (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
- || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
+ (inputs.type == 'scheduled' && inputs.java == '17')
+ || (inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
name: "Build modules: sparkr"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20220207
env:
- HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+ HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
SKIP_MIMA: true
- SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
@@ -433,7 +357,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -460,11 +384,12 @@ jobs:
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
sparkr-coursier-
- - name: Install Java ${{ needs.configure-jobs.outputs.java }}
+ - name: Install Java ${{ inputs.java }}
uses: actions/setup-java@v1
with:
- java-version: ${{ needs.configure-jobs.outputs.java }}
+ java-version: ${{ inputs.java }}
- name: Run tests
+ env: ${{ fromJSON(inputs.envs) }}
run: |
# The followings are also used by `r-lib/actions/setup-r` to avoid
# R issues at docker environment
@@ -475,13 +400,12 @@ jobs:
if: always()
uses: actions/upload-artifact@v2
with:
- name: test-results-sparkr--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: test-results-sparkr--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
# Static analysis, and documentation build
lint:
- needs: configure-jobs
- if: needs.configure-jobs.outputs.type == 'regular'
+ if: inputs.type == 'regular'
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-20.04
env:
@@ -497,7 +421,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -595,9 +519,9 @@ jobs:
bundle exec jekyll build
java-11-17:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
- if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+ if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
name: Java ${{ matrix.java }} build with Maven
strategy:
fail-fast: false
@@ -612,7 +536,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -651,9 +575,9 @@ jobs:
rm -rf ~/.m2/repository/org/apache/spark
scala-213:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
- if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+ if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
name: Scala 2.13 build with SBT
runs-on: ubuntu-20.04
steps:
@@ -662,7 +586,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -697,21 +621,20 @@ jobs:
./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
tpcds-1g:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run regular jobs for commit in both Apache Spark and forked repository, but only if tpcds changes exist
- if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
+ if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
name: Run TPC-DS queries with SF=1
runs-on: ubuntu-20.04
env:
SPARK_LOCAL_IP: localhost
- SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -763,6 +686,7 @@ jobs:
run: |
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
env:
+ SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=-1
spark.sql.join.preferSortMergeJoin=true
@@ -770,12 +694,14 @@ jobs:
run: |
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
env:
+ SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=10485760
- name: Run TPC-DS queries (Shuffled hash join)
run: |
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
env:
+ SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=-1
spark.sql.join.forceApplyShuffledHashJoin=true
@@ -783,23 +709,23 @@ jobs:
if: always()
uses: actions/upload-artifact@v2
with:
- name: test-results-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: test-results-tpcds--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
- name: unit-tests-log-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: unit-tests-log-tpcds--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
docker-integration-tests:
- needs: [configure-jobs, precondition]
+ needs: precondition
# Run regular jobs for commit in both Apache Spark and forked repository, but only if docker changes exist
- if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
+ if: inputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
name: Run Docker integration tests
runs-on: ubuntu-20.04
env:
- HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+ HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
@@ -811,7 +737,7 @@ jobs:
with:
fetch-depth: 0
repository: apache/spark
- ref: ${{ needs.configure-jobs.outputs.branch }}
+ ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
@@ -848,11 +774,11 @@ jobs:
if: always()
uses: actions/upload-artifact@v2
with:
- name: test-results-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: test-results-docker-integration--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
- name: unit-tests-log-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+ name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_ansi.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_ansi.yml
index 0ca335445b4..f5c75d3fb7b 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_ansi.yml
@@ -17,18 +17,23 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build / ANSI (master, Hadoop 3, JDK 8, Scala 2.12)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 1 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: master
+ hadoop: hadoop3
+ type: scheduled
+ envs: >-
+ {
+ "SPARK_ANSI_SQL_MODE": "true",
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_branch32.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_branch32.yml
index 0ca335445b4..12e84068d72 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_branch32.yml
@@ -17,18 +17,23 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (branch-3.2, Scala 2.13, Hadoop 3, JDK 8)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 4 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: branch-3.2
+ hadoop: hadoop3.2
+ type: scheduled
+ envs: >-
+ {
+ "SCALA_PROFILE": "scala2.13"
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_branch33.yml
similarity index 76%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_branch33.yml
index 0ca335445b4..1d6c5f56287 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_branch33.yml
@@ -17,18 +17,23 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (branch-3.3, Scala 2.13, Hadoop 3, JDK 8)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 7 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: branch-3.3
+ hadoop: hadoop3
+ type: scheduled
+ envs: >-
+ {
+ "SCALA_PROFILE": "scala2.13"
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_coverage.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_coverage.yml
index 0ca335445b4..8a9a7f45c14 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_coverage.yml
@@ -17,18 +17,23 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build / Coverage (master, Scala 2.12, Hadoop 3, JDK 8)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 10 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: master
+ hadoop: hadoop3
+ type: pyspark-coverage-scheduled
+ envs: >-
+ {
+ "PYSPARK_CODECOV": "true"
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_hadoop2.yml
similarity index 82%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_hadoop2.yml
index 0ca335445b4..c15c43e17bc 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_hadoop2.yml
@@ -17,18 +17,19 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 2, JDK 8)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 13 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: master
+ hadoop: hadoop2
+ type: scheduled
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_java11.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_java11.yml
index 0ca335445b4..dfe5884f968 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_java11.yml
@@ -17,18 +17,24 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 11)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 16 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 11
+ branch: master
+ hadoop: hadoop3
+ type: scheduled
+ envs: >-
+ {
+ "SKIP_MIMA": "true",
+ "SKIP_UNIDOC": "true"
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_java17.yml
similarity index 75%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_java17.yml
index 0ca335445b4..4a973ca3991 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_java17.yml
@@ -17,18 +17,24 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 17)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 22 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 17
+ branch: master
+ hadoop: hadoop3
+ type: scheduled
+ envs: >-
+ {
+ "SKIP_MIMA": "true",
+ "SKIP_UNIDOC": "true"
+ }
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_main.yml
similarity index 85%
copy from .github/workflows/build_and_test_ansi.yml
copy to .github/workflows/build_main.yml
index 0ca335445b4..2071d65b44d 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_main.yml
@@ -17,18 +17,14 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build"
on:
push:
branches:
- - '**'
+ - '**'
jobs:
call-build-and-test:
- name: Call main build
+ name: Run
uses: ./.github/workflows/build_and_test.yml
- if: github.repository == 'apache/spark'
- with:
- ansi_enabled: true
-
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_scala213.yml
similarity index 77%
rename from .github/workflows/build_and_test_ansi.yml
rename to .github/workflows/build_scala213.yml
index 0ca335445b4..0bc0eabd534 100644
--- a/.github/workflows/build_and_test_ansi.yml
+++ b/.github/workflows/build_scala213.yml
@@ -17,18 +17,23 @@
# under the License.
#
-name: "Build and test (ANSI)"
+name: "Build (master, Scala 2.13, Hadoop 3, JDK 8)"
on:
- push:
- branches:
- - '**'
+ schedule:
+ - cron: '0 19 * * *'
jobs:
- call-build-and-test:
- name: Call main build
+ run-build:
+ name: Run
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
- ansi_enabled: true
-
+ java: 8
+ branch: master
+ hadoop: hadoop3
+ type: scheduled
+ envs: >-
+ {
+ "SCALA_PROFILE": "scala2.13"
+ }
diff --git a/.github/workflows/cancel_duplicate_workflow_runs.yml b/.github/workflows/cancel_duplicate_workflow_runs.yml
index 525c7e7972c..d41ca31190d 100644
--- a/.github/workflows/cancel_duplicate_workflow_runs.yml
+++ b/.github/workflows/cancel_duplicate_workflow_runs.yml
@@ -21,7 +21,7 @@ name: Cancelling Duplicates
on:
workflow_run:
workflows:
- - 'Build and test'
+ - 'Build'
types: ['requested']
jobs:
diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
index eb0da84a797..4c84f5f25e6 100644
--- a/.github/workflows/notify_test_workflow.yml
+++ b/.github/workflows/notify_test_workflow.yml
@@ -69,7 +69,7 @@ jobs:
// Assume that runs were not found.
}
- const name = 'Build and test'
+ const name = 'Build'
const head_sha = context.payload.pull_request.head.sha
let status = 'queued'
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index 5f46985a975..c6225e6a1ab 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -20,7 +20,7 @@
name: Report test results
on:
workflow_run:
- workflows: ["Build and test", "Build and test (ANSI)"]
+ workflows: ["Build"]
types:
- completed
diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 671487adbfe..aa8e0cfbbf9 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -58,7 +58,7 @@ jobs:
// Iterator GitHub Checks in the PR
for await (const cr of checkRuns.data.check_runs) {
- if (cr.name == 'Build and test' && cr.conclusion != "action_required") {
+ if (cr.name == 'Build' && cr.conclusion != "action_required") {
// text contains parameters to make request in JSON.
const params = JSON.parse(cr.output.text)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org