You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2024/02/02 02:04:30 UTC

(superset) branch master updated: feat: docker image tags documentation + tweaks (#26923)

This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b0d8ed265 feat: docker image tags documentation + tweaks (#26923)
6b0d8ed265 is described below

commit 6b0d8ed265bf7dddc04487df6dd1d788184cb7fa
Author: Maxime Beauchemin <ma...@gmail.com>
AuthorDate: Thu Feb 1 18:04:23 2024 -0800

    feat: docker image tags documentation + tweaks (#26923)
    
    Co-authored-by: Sam Firke <sf...@users.noreply.github.com>
---
 .github/workflows/docker-release.yml     | 40 +++++++++++++++++--
 .github/workflows/ephemeral-env.yml      | 10 +----
 .github/workflows/no-op.yml              | 14 -------
 docker-compose-non-dev.yml               |  2 +-
 docs/README.md                           |  5 ++-
 docs/docs/installation/docker.mdx        | 66 ++++++++++++++++++++++++++++++++
 helm/superset/Chart.yaml                 |  2 +-
 helm/superset/README.md                  |  4 +-
 helm/superset/values.yaml                |  2 +-
 scripts/build_docker.py                  | 25 ++++++++++--
 tests/unit_tests/scripts/docker_build.py | 14 ++++---
 11 files changed, 145 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml
index fe355c54dc..0d0770db28 100644
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -1,8 +1,26 @@
-name: Docker
+name: Docker Publish Release
 
 on:
   release:
     types: [published]
+
+  # Can be triggered manually
+  workflow_dispatch:
+    inputs:
+      release:
+        required: true
+        description: The version to generate
+      git-ref:
+        required: true
+        description: The git reference to checkout prior to running the docker build
+      force-latest:
+        required: true
+        type: choice
+        default: 'false'
+        description: Whether to force a latest tag on the release
+        options:
+          - true
+          - false
 jobs:
   config:
     runs-on: "ubuntu-latest"
@@ -53,6 +71,22 @@ jobs:
           DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }}
           DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          GITHUB_RELEASE_TAG_NAME="${{ github.event.release.tag_name }}"
+          RELEASE="${{ github.event.release.tag_name }}"
+          FORCE_LATEST=""
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            # in the case of a manually-triggered run, read release from input
+            RELEASE ="${{ github.event.inputs.release }}"
+            if [ "${{ github.event.inputs.force-latest }}" = "true" ]; then
+              FORCE_LATEST="--force-latest"
+            fi
+            # build_docker.py may not exist on that SHA, let's switcharoo to /tmp
+            cp ./scripts/build_docker.py /tmp
+            git checkout "${{ github.event.inputs.git-ref }}"
+            cp /tmp/build_docker.py scripts/
+          fi
           pip install click
-          ./scripts/build_docker.py ${{ matrix.build_preset }} release --platform ${{ matrix.platform }}
+          ./scripts/build_docker.py \
+            ${{ matrix.build_preset }} \
+            "${{ github.event_name }}" \
+            --build_context_ref "$RELEASE" \
+            --platform ${{ matrix.platform }} $FORCE_LATEST
diff --git a/.github/workflows/ephemeral-env.yml b/.github/workflows/ephemeral-env.yml
index 5c94455440..0ba23e48ac 100644
--- a/.github/workflows/ephemeral-env.yml
+++ b/.github/workflows/ephemeral-env.yml
@@ -122,13 +122,7 @@ jobs:
 
       - name: Build ephemeral env image
         run: |
-          docker buildx build --target ci \
-            --load \
-            -t ${{ steps.get-sha.outputs.sha }} \
-            -t "pr-${{ github.event.issue.number }}" \
-            --platform linux/amd64 \
-            --label "build_actor=${GITHUB_ACTOR}" \
-            .
+          ./scripts/build_docker.py "ci" "pull_request"
 
       - name: Configure AWS credentials
         uses: aws-actions/configure-aws-credentials@v4
@@ -147,7 +141,7 @@ jobs:
           ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
           ECR_REPOSITORY: superset-ci
           SHA: ${{ steps.get-sha.outputs.sha }}
-          IMAGE_TAG: pr-${{ github.event.issue.number }}
+          IMAGE_TAG: pr-${{ github.event.issue.number }}-ci
         run: |
           docker tag $SHA $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
           docker tag $SHA $ECR_REGISTRY/$ECR_REPOSITORY:$SHA
diff --git a/.github/workflows/no-op.yml b/.github/workflows/no-op.yml
index 4fecc2ba94..835063640a 100644
--- a/.github/workflows/no-op.yml
+++ b/.github/workflows/no-op.yml
@@ -52,17 +52,3 @@ jobs:
         run: |
           echo "This is a no-op step for python-lint to ensure a successful status."
           exit 0
-  check:
-    runs-on: ubuntu-latest
-    steps:
-      - name: No-op for frontend-build
-        run: |
-          echo "This is a no-op step for frontend-build to ensure a successful status."
-          exit 0
-  docker-build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: No-op for frontend-build
-        run: |
-          echo "This is a no-op step for frontend-build to ensure a successful status."
-          exit 0
diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml
index cf36ae833d..34aec9bbb7 100644
--- a/docker-compose-non-dev.yml
+++ b/docker-compose-non-dev.yml
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest-dev}
+x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest}
 x-superset-depends-on: &superset-depends-on
   - db
   - redis
diff --git a/docs/README.md b/docs/README.md
index 1e6107564a..cccd379862 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -16,4 +16,7 @@ KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
 -->
-This is the public documentation site for Superset, built using [Docusaurus 2](https://docusaurus.io/). See [CONTRIBUTING.md](../CONTRIBUTING.md#documentation) for documentation on contributing to documentation.
+This is the public documentation site for Superset, built using
+[Docusaurus 2](https://docusaurus.io/). See
+[CONTRIBUTING.md](../CONTRIBUTING.md#documentation) for documentation on
+contributing to documentation.
diff --git a/docs/docs/installation/docker.mdx b/docs/docs/installation/docker.mdx
new file mode 100644
index 0000000000..fc6ca04123
--- /dev/null
+++ b/docs/docs/installation/docker.mdx
@@ -0,0 +1,66 @@
+# Docker Images and Tags
+
+The Apache Superset community extensively uses Docker for development, release,
+and productionizing Superset. This page details our Docker builds and tag naming
+schemes to help users navigate our offerings.
+
+Images are built and pushed to the [Superset Docker Hub repository](
+https://hub.docker.com/r/apache/superset). Different sets of images are created for:
+- Published releases, with tags like `3.0.0` and the `latest` tag.
+- Pull request iterations, each identified by tags starting with a SHA like
+  `8a2f7d378ab13c156fa183d9284b607ed69f5ecc`, and `pr-3454`, referencing the pull
+  request ID.
+- Merges to the main branch (`master`), resulting in new SHAs, with tags
+  prefixed with `master` for the latest `master` version.
+
+Each code version has multiple builds for different purposes, identified by suffixes:
+- **Build preset:** We offer various images for different needs:
+  - `lean`: The default Docker image, including both frontend and backend. Tags
+    without a build_preset are lean builds, e.g., `latest`.
+  - `dev`: For development, with a headless browser and root access.
+  - `py310`, e.g., Py310: Similar to lean but with a different Python version (in this example, 3.10).
+  - `ci`: For certain CI workloads.
+  - `websocket`: For Superset clusters supporting advanced features.
+  - `dockerize`: Used by Helm.
+- **Platform:** We build for `linux/arm64` and `linux/amd64`. The `-arm` suffix
+  indicates ARM builds (e.g., `latest-arm`), while tags without a suffix are for
+  AMD (e.g., `latest`).
+
+## Key Image Tags and Examples
+
+- `latest`: The latest official release build, implicitly the lean build on
+  `linux/amd64`.
+- `latest-dev`: the `-dev` image of the latest official release build, with a headless browser and root access.
+- `master`: The latest build from the `master` branch, implicitly lean on
+  `linux/amd64`.
+- `master-dev`: Similar to `master` but includes a headless browser and root access.
+- `pr-5252`: The latest commit in PR 5252.
+- `30948dc401b40982cb7c0dbf6ebbe443b2748c1b-dev-arm`: A `linux/arm64` build for
+  this specific SHA, which could be from a pull request, master merge, or release.
+- `30948dc-dev-arm`: Same as above, but SHA truncated to 7 characters for a
+  shorter handle on the same image
+- `websocket-latest`: The WebSocket image for use in a Superset cluster.
+
+For insights or modifications to the build matrix and tagging conventions,
+check the [build_docker.py](https://github.com/apache/superset/blob/master/scripts/build_docker.py)
+script and the [docker.yml](https://github.com/apache/superset/blob/master/.github/workflows/docker.yml)
+GitHub action.
+
+## Caching
+
+To accelerate builds, we follow Docker best practices and use `apache/superset-cache`.
+
+## On ARM builds and working with Apple silicon
+
+Apple's current generation of computers uses ARM-based CPUs, and Docker
+running on MACs seem to require `linux/arm64/v8` (at least one user's M2 was
+configured in that way). Setting the environment
+variable `DOCKER_DEFAULT_PLATFORM` to `linux/amd64` seems to function in
+term of leveraging, and building upon the Superset builds provided here.
+
+```
+export DOCKER_DEFAULT_PLATFORM=linux/amd64
+```
+
+Presumably, `linux/arm64/v8` would be more optimized for this generation
+of chips, but less compatible across the ARM ecosystem.
diff --git a/helm/superset/Chart.yaml b/helm/superset/Chart.yaml
index 25ff660e38..6926f3fcbb 100644
--- a/helm/superset/Chart.yaml
+++ b/helm/superset/Chart.yaml
@@ -29,7 +29,7 @@ maintainers:
   - name: craig-rueda
     email: craig@craigrueda.com
     url: https://github.com/craig-rueda
-version: 0.12.1
+version: 0.12.2
 dependencies:
   - name: postgresql
     version: 12.1.6
diff --git a/helm/superset/README.md b/helm/superset/README.md
index 27c1232440..ed93586bc9 100644
--- a/helm/superset/README.md
+++ b/helm/superset/README.md
@@ -23,7 +23,7 @@ NOTE: This file is generated by helm-docs: https://github.com/norwoodj/helm-docs
 
 # superset
 
-![Version: 0.12.1](https://img.shields.io/badge/Version-0.12.1-informational?style=flat-square)
+![Version: 0.12.2](https://img.shields.io/badge/Version-0.12.2-informational?style=flat-square)
 
 Apache Superset is a modern, enterprise-ready business intelligence web application
 
@@ -77,7 +77,7 @@ On helm this can be set on `extraSecretEnv.SUPERSET_SECRET_KEY` or `configOverri
 | hostAliases | list | `[]` | Custom hostAliases for all superset pods # https://kubernetes.io/docs/tasks/network/customize-hosts-file-for-pods/ |
 | image.pullPolicy | string | `"IfNotPresent"` |  |
 | image.repository | string | `"apachesuperset.docker.scarf.sh/apache/superset"` |  |
-| image.tag | string | `""` |  |
+| image.tag | string | `"latest"` |  |
 | imagePullSecrets | list | `[]` |  |
 | ingress.annotations | object | `{}` |  |
 | ingress.enabled | bool | `false` |  |
diff --git a/helm/superset/values.yaml b/helm/superset/values.yaml
index 15c5f7e214..3d96009d80 100644
--- a/helm/superset/values.yaml
+++ b/helm/superset/values.yaml
@@ -179,7 +179,7 @@ extraConfigMountPath: "/app/configs"
 
 image:
   repository: apachesuperset.docker.scarf.sh/apache/superset
-  tag: ""
+  tag: "latest"
   pullPolicy: IfNotPresent
 
 imagePullSecrets: []
diff --git a/scripts/build_docker.py b/scripts/build_docker.py
index f2323eb7ed..de6b8444f8 100755
--- a/scripts/build_docker.py
+++ b/scripts/build_docker.py
@@ -83,6 +83,7 @@ def get_docker_tags(
     sha: str,
     build_context: str,
     build_context_ref: str,
+    force_latest: bool = False,
 ) -> set[str]:
     """
     Return a set of tags given a given build context
@@ -110,11 +111,13 @@ def get_docker_tags(
     if build_context == "release":
         # add a release tag
         tags.add(make_docker_tag([build_context_ref] + tag_chunks))
-        if is_latest:
+        if is_latest or force_latest:
             # add a latest tag
             tags.add(make_docker_tag(["latest"] + tag_chunks))
     elif build_context == "push" and build_context_ref == "master":
         tags.add(make_docker_tag(["master"] + tag_chunks))
+    elif build_context == "pull_request":
+        tags.add(make_docker_tag([f"pr-{build_context_ref}"] + tag_chunks))
     return tags
 
 
@@ -125,6 +128,7 @@ def get_docker_command(
     sha: str,
     build_context: str,
     build_context_ref: str,
+    force_latest: bool = False,
 ) -> str:
     tag = ""
     build_target = ""
@@ -160,6 +164,7 @@ def get_docker_command(
         sha,
         build_context,
         build_context_ref,
+        force_latest,
     )
     docker_tags = ("\\\n" + 8 * " ").join([f"-t {s} " for s in tags])
 
@@ -205,12 +210,16 @@ def get_docker_command(
 )
 @click.option("--build_context_ref", help="a reference to the pr, release or branch")
 @click.option("--dry-run", is_flag=True, help="Run the command in dry-run mode.")
+@click.option(
+    "--force-latest", is_flag=True, help="Force the 'latest' tag on the release"
+)
 def main(
     build_preset: str,
     build_context: str,
     build_context_ref: str,
     platform: str,
     dry_run: bool,
+    force_latest: bool,
 ) -> None:
     """
     This script executes docker build and push commands based on given arguments.
@@ -219,7 +228,16 @@ def main(
     is_authenticated = (
         True if os.getenv("DOCKERHUB_TOKEN") and os.getenv("DOCKERHUB_USER") else False
     )
-    build_context_ref = get_build_context_ref(build_context)
+
+    if force_latest and build_context != "release":
+        print(
+            "--force-latest can only be applied if the build context is set to 'release'"
+        )
+        exit(1)
+
+    if build_context == "release" and not build_context_ref.strip():
+        print("Release number has to be provided")
+        exit(1)
 
     docker_build_command = get_docker_command(
         build_preset,
@@ -227,7 +245,8 @@ def main(
         is_authenticated,
         get_git_sha(),
         build_context,
-        get_build_context_ref(build_context),
+        build_context_ref,
+        force_latest,
     )
 
     if not dry_run:
diff --git a/tests/unit_tests/scripts/docker_build.py b/tests/unit_tests/scripts/docker_build.py
index 002a51f27d..ee9ad66ead 100644
--- a/tests/unit_tests/scripts/docker_build.py
+++ b/tests/unit_tests/scripts/docker_build.py
@@ -65,7 +65,7 @@ def test_is_latest_release(release, expected_bool):
             SHA,
             "pull_request",
             PR_ID,
-            [f"{REPO}:22e7c60-arm", f"{REPO}:{SHA}-arm"],
+            [f"{REPO}:22e7c60-arm", f"{REPO}:{SHA}-arm", f"{REPO}:pr-{PR_ID}-arm"],
         ),
         (
             "ci",
@@ -73,7 +73,7 @@ def test_is_latest_release(release, expected_bool):
             SHA,
             "pull_request",
             PR_ID,
-            [f"{REPO}:22e7c60-ci", f"{REPO}:{SHA}-ci"],
+            [f"{REPO}:22e7c60-ci", f"{REPO}:{SHA}-ci", f"{REPO}:pr-{PR_ID}-ci"],
         ),
         (
             "lean",
@@ -81,7 +81,7 @@ def test_is_latest_release(release, expected_bool):
             SHA,
             "pull_request",
             PR_ID,
-            [f"{REPO}:22e7c60", f"{REPO}:{SHA}"],
+            [f"{REPO}:22e7c60", f"{REPO}:{SHA}", f"{REPO}:pr-{PR_ID}"],
         ),
         (
             "dev",
@@ -89,7 +89,11 @@ def test_is_latest_release(release, expected_bool):
             SHA,
             "pull_request",
             PR_ID,
-            [f"{REPO}:22e7c60-dev-arm", f"{REPO}:{SHA}-dev-arm"],
+            [
+                f"{REPO}:22e7c60-dev-arm",
+                f"{REPO}:{SHA}-dev-arm",
+                f"{REPO}:pr-{PR_ID}-dev-arm",
+            ],
         ),
         (
             "dev",
@@ -97,7 +101,7 @@ def test_is_latest_release(release, expected_bool):
             SHA,
             "pull_request",
             PR_ID,
-            [f"{REPO}:22e7c60-dev", f"{REPO}:{SHA}-dev"],
+            [f"{REPO}:22e7c60-dev", f"{REPO}:{SHA}-dev", f"{REPO}:pr-{PR_ID}-dev"],
         ),
         # old releases
         (