You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ma...@apache.org on 2021/03/08 23:19:23 UTC

[incubator-mxnet] branch v1.8.x updated: [v1.8.x] Cherry-pick CD pipelines refactor with fix for docker images pipeline (#19989)

This is an automated email from the ASF dual-hosted git repository.

manuseth pushed a commit to branch v1.8.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.8.x by this push:
     new dd4661a  [v1.8.x] Cherry-pick CD pipelines refactor with fix for docker images pipeline (#19989)
dd4661a is described below

commit dd4661a12230584dc206b4bf233ede261fa64786
Author: Manu Seth <22...@users.noreply.github.com>
AuthorDate: Mon Mar 8 15:17:37 2021 -0800

    [v1.8.x] Cherry-pick CD pipelines refactor with fix for docker images pipeline (#19989)
    
    * [v1.x] Fix nightly CD for python docker image releases (#19774)
    
    * [CD] switch CD_RELEASE_JOB_NAME from global env var to job argument (#17775)
    
    * Fix nightly CD for python docker image releases (#19772)
    
    * install wget
    
    * test cd docker in ci
    
    * install docker
    
    * install python3-dev and gcc
    
    * remove docker testing from ci
    
    * remove python3-dev
    
    * ecr target
    
    * skip build test
    
    * adding back python3-dev for make
    
    * remove dynamic and pypi stages for testing
    
    * install build-essential
    
    * install zlib
    
    * update python version
    
    * update ld library path
    
    * install openssl
    
    * update test packages for python3.7
    
    * remove call to deleted safe_docker_run.py
    
    * hardcode region for public ecr repo
    
    * use deadsnakes to install python
    
    * revert dependency change
    
    * refactor ecr login
    
    * update ecr repo jenkins global var
    
    * cleanup
    
    * update docker authentication
    
    * add ecr repo
    
    * add back pypi and tests
    
    * remove unused libmxnet pipeline
    
    * update cu112 base docker
    
    * update base docker images to ub18
    
    * differentiate tag prefix for v1.x
    
    Co-authored-by: Sheng Zha <sz...@users.noreply.github.com>
    
    * pass version param (#19982)
    
    * remove cu112 changes
    
    Co-authored-by: Sheng Zha <sz...@users.noreply.github.com>
---
 cd/Jenkinsfile_cd_pipeline                        | 42 ++++++----------
 cd/Jenkinsfile_release_job                        | 11 +++--
 cd/Jenkinsfile_utils.groovy                       | 21 ++++----
 cd/README.md                                      |  4 +-
 cd/mxnet_lib/{static => }/Jenkins_pipeline.groovy |  0
 cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy      | 58 -----------------------
 cd/python/docker/Dockerfile                       |  1 -
 cd/python/docker/Dockerfile.test                  |  3 --
 cd/python/docker/python_images.sh                 | 28 +++--------
 cd/utils/docker_tag.sh                            | 22 +++------
 cd/utils/mxnet_base_image.sh                      | 21 +++-----
 11 files changed, 54 insertions(+), 157 deletions(-)

diff --git a/cd/Jenkinsfile_cd_pipeline b/cd/Jenkinsfile_cd_pipeline
index 717ad52..6ef1104 100644
--- a/cd/Jenkinsfile_cd_pipeline
+++ b/cd/Jenkinsfile_cd_pipeline
@@ -47,7 +47,7 @@ pipeline {
           cd_utils = load('cd/Jenkinsfile_utils.groovy')
           
           // Update release job state in Jenkins
-          cd_utils.update_release_job_state()
+          cd_utils.update_release_job_state(params.CD_RELEASE_JOB_NAME)
         }
       }
     }
@@ -55,33 +55,21 @@ pipeline {
     stage("MXNet Release") {
       steps {
         script {
-          cd_utils.error_checked_parallel([
-
-            "Static libmxnet based release": {
-              stage("Build") {
-                cd_utils.trigger_release_job("Build static libmxnet", "mxnet_lib/static", params.MXNET_VARIANTS)    
-              }
-              stage("Releases") {
-                cd_utils.error_checked_parallel([
-                  "PyPI Release": {
-                    echo "Building PyPI Release"
-                    cd_utils.trigger_release_job("Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
-                  },
-                  "Python Docker Release": {
-                    echo "Building Python Docker Release"
-                    cd_utils.trigger_release_job("Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
-                  }
-                ])
-              }
-            },
-
-            "Dynamic libmxnet based release": {
-              stage("Build") {
-                cd_utils.trigger_release_job("Build dynamic libmxnet", "mxnet_lib/dynamic", params.MXNET_VARIANTS)    
+          stage("Build libmxnet") {
+            cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build libmxnet", "mxnet_lib", params.MXNET_VARIANTS)
+          }
+          stage("Releases") {
+            cd_utils.error_checked_parallel([
+              "PyPI Release": {
+                echo "Building PyPI Release"
+                cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
+              },
+              "Python Docker Release": {
+                echo "Building Python Docker Release"
+                cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
               }
-            }
-            
-          ])
+            ])
+          }
         }
       }
     }
diff --git a/cd/Jenkinsfile_release_job b/cd/Jenkinsfile_release_job
index 0c2a0e7..a2dd674 100644
--- a/cd/Jenkinsfile_release_job
+++ b/cd/Jenkinsfile_release_job
@@ -42,9 +42,10 @@ pipeline {
     // Using string instead of choice parameter to keep the changes to the parameters minimal to avoid
     // any disruption caused by different COMMIT_ID values chaning the job parameter configuration on
     // Jenkins.
-    string(defaultValue: "mxnet_lib/static", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
-    string(defaultValue: "cpu,native,cu92,cu100,cu101,cu102,cu110", description: "Comma separated list of variants", name: "MXNET_VARIANTS")
+    string(defaultValue: "mxnet_lib", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
+    string(defaultValue: "cpu,native,cu100,cu101,cu102,cu110", description: "Comma separated list of variants", name: "MXNET_VARIANTS")
     booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD")
+    string(defaultValue: "nightly_v1.x", description: "String used for naming docker images", name: "VERSION")
   }
 
   stages {
@@ -74,7 +75,8 @@ pipeline {
             |Release Build: ${params.RELEASE_BUILD}
             |Commit Id: ${env.GIT_COMMIT}
             |Branch: ${env.GIT_BRANCH}
-            |Variants: ${env.MXNET_VARIANTS}""".stripMargin()
+            |Version: ${VERSION}
+            |Variants: ${params.MXNET_VARIANTS}""".stripMargin()
           }
         }
       }
@@ -90,8 +92,7 @@ pipeline {
 
           // Add new job types here
           def valid_job_types = [
-            "mxnet_lib/static",
-            "mxnet_lib/dynamic",
+            "mxnet_lib",
             "python/pypi",
             "python/docker"
           ]
diff --git a/cd/Jenkinsfile_utils.groovy b/cd/Jenkinsfile_utils.groovy
index cfda3ad..9387821 100644
--- a/cd/Jenkinsfile_utils.groovy
+++ b/cd/Jenkinsfile_utils.groovy
@@ -19,21 +19,22 @@
 
 // Triggers a downstream jenkins job responsible for building, testing
 // and publishing all the variants for a particular 'job_type'.
-// The 'job_type' should be the name of the directory that contains the 
-// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the 
+// The 'job_type' should be the name of the directory that contains the
+// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the
 // artifact (docker image, binary, pypi or maven package, etc.) that should
 // be published.
 
 STATE_UPDATE="State Update"
 
-def trigger_release_job(job_name, job_type, mxnet_variants) {
+def trigger_release_job(cd_release_job, job_name, job_type, mxnet_variants) {
   def run = build(
-    job: env.CD_RELEASE_JOB_NAME, 
+    job: cd_release_job,
     parameters: [
       string(name: "RELEASE_JOB_NAME", value: "${job_name}"),
       string(name: "RELEASE_JOB_TYPE", value: "${job_type}"),
       string(name: "MXNET_VARIANTS", value: "${mxnet_variants}"),
       booleanParam(name: "RELEASE_BUILD", value: "${env.RELEASE_BUILD}"),
+      string(name: "VERSION", value: "${env.VERSION}"),
       string(name: "COMMIT_ID", value: "${env.GIT_COMMIT}")
     ],
     // If propagate is true, any result other than successful will
@@ -49,7 +50,7 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
   // continue with the pipeline and try to post as many releases as possible
   // but mark it as unstable
   if (result == "UNSTABLE" || result == "ABORTED") {
-    currentBuild.result = "UNSTABLE" 
+    currentBuild.result = "UNSTABLE"
   }
 
   // Throw an exception on failure, because this would mean the whole
@@ -65,12 +66,12 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
 // the configuration of the release job in jenkins
 // to the configuration of release job as defined in the
 // Jenkinsfile _release_job for env.GIT_COMMIT revision
-def update_release_job_state() {
+def update_release_job_state(cd_release_job) {
   build(
-    job: env.CD_RELEASE_JOB_NAME, 
+    job: cd_release_job,
     parameters: [
       string(name: "RELEASE_JOB_TYPE", value: STATE_UPDATE),
-  
+
       // Should be set to the current git commit
       string(name: "COMMIT_ID", value: "${env.GIT_COMMIT}")
     ])
@@ -103,7 +104,7 @@ def wrap_variant_pipeline_fn(variant_pipeline, total_num_pipelines) {
 // The outcome of the execution of each parallel step will affect
 // the result (SUCCESS, FAILURE, ABORTED, UNSTABLE) of the overall job.
 // If all steps fail or are aborted, the job will be set to failed.
-// If some steps fail or are aborted, the job will be set to unstable. 
+// If some steps fail or are aborted, the job will be set to unstable.
 def error_checked_parallel(variant_pipelines) {
   pipelines = variant_pipelines.inject([:]) { mp, key, value ->
     mp << ["${key}": wrap_variant_pipeline_fn(value, variant_pipelines.size())]
@@ -179,7 +180,7 @@ def restore_dynamic_libmxnet(variant) {
 // NOTE: Be mindful of the expected time that a step should take. If it will take a long time,
 // and it can be done in a CPU node, do it in a CPU node. We should avoid using GPU instances unless
 // we *have* to.
-// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can 
+// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can
 // just run on a single node. As is done bellow.
 // For examples of multi-node CD pipelines, see the the binary_release/static and binary_release/dynamic
 // pipeline.
diff --git a/cd/README.md b/cd/README.md
index 5672717..2c86c6e 100644
--- a/cd/README.md
+++ b/cd/README.md
@@ -60,7 +60,7 @@ The [release job](Jenkinsfile_release_job) takes five parameters:
  * **RELEASE\_JOB\_TYPE**: Defines the release pipeline you want to execute.
  * **COMMIT_ID**: The commit id to build
 
-The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/static/Jenkins_pipeline.groovy)).
+The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/Jenkins_pipeline.groovy)).
 
 NOTE: The **COMMIT_ID** is a little tricky and we must be very careful with it. It is necessary to ensure that the same commit is built through out the pipeline, but at the same time, it has the potential to change the current state of the release job configuration - specifically the parameter configuration. Any changes to this configuration will require a "dry-run" of the release job to ensure Jenkins has the current (master) version. This is acceptable as there will be few changes to t [...]
 
@@ -192,4 +192,4 @@ def test(mxnet_variant) {
 
 Examples:
 
-Both the [statically linked libmxnet](mxnet_lib/static/Jenkins_pipeline.groovy) and [dynamically linked libmxnet](mxnet_lib/dynamic/Jenkins_pipeline.groovy) pipelines have long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GP [...]
+The [libmxnet](mxnet_lib/Jenkins_pipeline.groovy) pipeline has long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes.
diff --git a/cd/mxnet_lib/static/Jenkins_pipeline.groovy b/cd/mxnet_lib/Jenkins_pipeline.groovy
similarity index 100%
rename from cd/mxnet_lib/static/Jenkins_pipeline.groovy
rename to cd/mxnet_lib/Jenkins_pipeline.groovy
diff --git a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy b/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy
deleted file mode 100644
index 3cef811..0000000
--- a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy
+++ /dev/null
@@ -1,58 +0,0 @@
-// -*- mode: groovy -*-
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Jenkins pipeline
-// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
-
-// NOTE: ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job
-
-// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow
-// libmxnet location
-libmxnet = 'lib/libmxnet.so'
-
-// licenses
-licenses = 'licenses/*'
-
-// libmxnet dependencies
-mx_deps = ''
-mx_native_deps = ''
-
-// library type
-// either static or dynamic - depending on how it links to its dependencies
-libtype = 'dynamic'
-
-libmxnet_pipeline = load('cd/mxnet_lib/mxnet_lib_pipeline.groovy')
-
-// Builds the dynamic binary for the specified mxnet variant
-def build(mxnet_variant) {
-  node(NODE_LINUX_CPU) {
-    ws("workspace/mxnet_${libtype}/${mxnet_variant}/${env.BUILD_NUMBER}") {
-      def image = libmxnet_pipeline.get_environment(mxnet_variant)
-      ci_utils.init_git()
-      ci_utils.docker_run(image, "build_dynamic_libmxnet ${mxnet_variant}", false)
-      ci_utils.pack_lib("mxnet_${mxnet_variant}", libmxnet_pipeline.get_stash(mxnet_variant))
-    }
-  }
-}
-
-def get_pipeline(mxnet_variant) {
-  return libmxnet_pipeline.get_pipeline(mxnet_variant, this.&build)
-}
-
-return this
diff --git a/cd/python/docker/Dockerfile b/cd/python/docker/Dockerfile
index accbe9b..194fda1 100644
--- a/cd/python/docker/Dockerfile
+++ b/cd/python/docker/Dockerfile
@@ -32,7 +32,6 @@ RUN apt-get update && \
     wget -nv https://bootstrap.pypa.io/get-pip.py && \
     python3 get-pip.py
 
-
 ARG MXNET_COMMIT_ID
 ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID}
 
diff --git a/cd/python/docker/Dockerfile.test b/cd/python/docker/Dockerfile.test
index bed059d..3349e93 100644
--- a/cd/python/docker/Dockerfile.test
+++ b/cd/python/docker/Dockerfile.test
@@ -23,9 +23,6 @@
 ARG BASE_IMAGE
 FROM ${BASE_IMAGE}
 
-# Install test dependencies
-RUN pip install nose
-
 ARG USER_ID=1001
 ARG GROUP_ID=1001
 
diff --git a/cd/python/docker/python_images.sh b/cd/python/docker/python_images.sh
index a93d578..05ec518 100755
--- a/cd/python/docker/python_images.sh
+++ b/cd/python/docker/python_images.sh
@@ -23,7 +23,7 @@
 
 set -xe
 
-usage="Usage: python_images.sh <build|test|publish> MXNET-VARIANT"
+usage="Usage: python_images.sh <build|test|push> MXNET-VARIANT"
 
 command=${1:?$usage}
 mxnet_variant=${2:?$usage}
@@ -39,8 +39,8 @@ image_name="${repository}:${main_tag}"
 
 resources_path='cd/python/docker'
 
-if [ ! -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
-    image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${image_name}"
+if [ ! -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
+    image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${image_name}"
 fi
 
 build() {
@@ -57,33 +57,19 @@ test() {
 
     # Ensure the correct context root is passed in when building - Dockerfile.test expects ci directory
     docker build -t "${test_image_name}" --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` --build-arg BASE_IMAGE="${image_name}" -f ${resources_path}/Dockerfile.test ./ci
-    ./ci/safe_docker_run.py ${runtime_param} --cap-add "SYS_PTRACE" -u `id -u`:`id -g` -v `pwd`:/work/mxnet "${test_image_name}" ${resources_path}/test_python_image.sh "${mxnet_variant}"
 }
 
 push() {
-    if [ -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
-        echo "Cannot publish image without RELEASE_DOCKERHUB_REPOSITORY environment variable being set."
+    if [ -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
+        echo "Cannot publish image without RELEASE_PUBLIC_ECR_REPOSITORY environment variable being set."
         exit 1
     fi
 
-    # The secret name env var is set in the Jenkins configuration
-    # Manage Jenkins -> Configure System
-    ./${ci_utils}/docker_login.py --secret-name "${RELEASE_DOCKERHUB_SECRET_NAME}"
+    # Retrieve an authentication token and authenticate Docker client to registry
+    aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/w6z5f7h2
 
     # Push image
     docker push "${image_name}"
-
-    # Iterate over remaining tags, if any
-    for ((i=1;i<${#docker_tags[@]};i++)); do
-        local docker_tag="${docker_tags[${i}]}"
-        local latest_image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${repository}:${docker_tag}_py3"
-
-        docker tag "${image_name}" "${latest_image_name}"
-        docker push "${latest_image_name}"
-        echo "Successfully pushed ${latest_image_name}. Pull it with:"
-        echo "docker pull ${latest_image_name}"
-        echo "For a complete list of tags see https://hub.docker.com/u/${RELEASE_DOCKERHUB_REPOSITORY}/${repository}"
-    done
 }
 
 case ${command} in
diff --git a/cd/utils/docker_tag.sh b/cd/utils/docker_tag.sh
index e77cbe7..48629ad 100755
--- a/cd/utils/docker_tag.sh
+++ b/cd/utils/docker_tag.sh
@@ -21,10 +21,14 @@
 
 mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"}
 is_release=${RELEASE_BUILD:-false}
-version=${VERSION:-nightly}
+version=${VERSION:-nightly_v1.x}
 
-# The docker tags will be in the form <version>_<hardware>(_mkl)
-# Eg. nightly_cpu, 1.4.0_cpu_mkl, nightly_gpu_cu80_mkl, etc.
+if [[ ${version} == "null" ]]; then
+    version="nightly_v1.x"
+fi
+
+# The docker tags will be in the form <version>_<hardware>
+# Eg. nightly_v1.x_cpu, 1.8.0_cpu, nightly_v1.x_gpu_cu110, etc.
 
 if [[ ${mxnet_variant} == "cpu" ]]; then
     tag_suffix="cpu"
@@ -39,15 +43,3 @@ else
 fi
 
 echo "${version}_${tag_suffix}"
-
-# Print out latest tags as well
-if [[ ${is_release} == "true" ]]; then
-    if [[ ${mxnet_variant} == "cpu" ]]; then
-        echo "latest"
-        echo "latest_cpu"
-    elif [[ ${mxnet_variant} == "native" ]]; then
-        echo "latest_cpu_native"
-    elif [[ ${mxnet_variant} == "cu90" ]]; then
-        echo "latest_gpu"
-    fi
-fi
diff --git a/cd/utils/mxnet_base_image.sh b/cd/utils/mxnet_base_image.sh
index 0e1ecc8..5073a37 100755
--- a/cd/utils/mxnet_base_image.sh
+++ b/cd/utils/mxnet_base_image.sh
@@ -21,32 +21,23 @@
 mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"}
 
 case ${mxnet_variant} in
-    cu80*)
-    echo "nvidia/cuda:8.0-cudnn7-runtime-ubuntu16.04"
-    ;;
-    cu90*)
-    echo "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-    ;;
-    cu92*)
-    echo "nvidia/cuda:9.2-cudnn7-runtime-ubuntu16.04"
-    ;;
     cu100*)
-    echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu16.04"
+    echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04"
     ;;
     cu101*)
-    echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu16.04"
+    echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
     ;;
     cu102*)
-    echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu16.04"
+    echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
     ;;
     cu110*)
-    echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu16.04"
+    echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"
     ;;
     cpu)
-    echo "ubuntu:16.04"
+    echo "ubuntu:18.04"
     ;;
     native)
-    echo "ubuntu:16.04"
+    echo "ubuntu:18.04"
     ;;
     *)
     echo "Error: Unrecognized mxnet-variant: '${mxnet_variant}'"