You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yi...@apache.org on 2023/09/14 13:22:46 UTC

[spark-docker] branch master updated: [SPARK-45169] Add official image Dockerfile for Apache Spark 3.5.0

This is an automated email from the ASF dual-hosted git repository.

yikun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark-docker.git


The following commit(s) were added to refs/heads/master by this push:
     new 028efd4  [SPARK-45169] Add official image Dockerfile for Apache Spark 3.5.0
028efd4 is described below

commit 028efd4637fb2cf791d5bd9ea70b2fca472de4b7
Author: Yikun Jiang <yi...@gmail.com>
AuthorDate: Thu Sep 14 21:22:32 2023 +0800

    [SPARK-45169] Add official image Dockerfile for Apache Spark 3.5.0
    
    ### What changes were proposed in this pull request?
    Add Apache Spark 3.5.0 Dockerfiles.
    
    - Add 3.5.0 GPG key
    - Add .github/workflows/build_3.5.0.yaml
    - `./add-dockerfiles.sh 3.5.0` to generate dockerfiles
    - Add version and tag info
    - Backport https://github.com/apache/spark/commit/1d2c338c867c69987d8ed1f3666358af54a040e3 and https://github.com/apache/spark/commit/0c7b4306c7c5fbdd6c577774f8172f82e1d23e3b entrypoint changes
    
    ### Why are the changes needed?
    Apache Spark 3.5.0 released
    
    ### Does this PR introduce _any_ user-facing change?
    Docker image will be published.
    
    ### How was this patch tested?
    Add workflow and CI passed
    
    Closes #55 from Yikun/3.5.0.
    
    Authored-by: Yikun Jiang <yi...@gmail.com>
    Signed-off-by: Yikun Jiang <yi...@gmail.com>
---
 .github/workflows/build_3.5.0.yaml                 | 41 +++++++++++
 .github/workflows/publish.yml                      |  3 +-
 .github/workflows/test.yml                         |  3 +-
 3.5.0/scala2.12-java11-python3-r-ubuntu/Dockerfile | 29 ++++++++
 3.5.0/scala2.12-java11-python3-ubuntu/Dockerfile   | 26 +++++++
 3.5.0/scala2.12-java11-r-ubuntu/Dockerfile         | 28 ++++++++
 3.5.0/scala2.12-java11-ubuntu/Dockerfile           | 79 ++++++++++++++++++++++
 .../scala2.12-java11-ubuntu/entrypoint.sh          |  4 ++
 entrypoint.sh.template                             |  4 ++
 tools/template.py                                  |  4 +-
 versions.json                                      | 42 ++++++++++--
 11 files changed, 253 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build_3.5.0.yaml b/.github/workflows/build_3.5.0.yaml
new file mode 100644
index 0000000..6eb3ad6
--- /dev/null
+++ b/.github/workflows/build_3.5.0.yaml
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build and Test (3.5.0)"
+
+on:
+  pull_request:
+    branches:
+      - 'master'
+    paths:
+      - '3.5.0/**'
+
+jobs:
+  run-build:
+    strategy:
+      matrix:
+        image-type: ["all", "python", "scala", "r"]
+    name: Run
+    secrets: inherit
+    uses: ./.github/workflows/main.yml
+    with:
+      spark: 3.5.0
+      scala: 2.12
+      java: 11
+      image-type: ${{ matrix.image-type }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index d213ada..8cfa95d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -25,9 +25,10 @@ on:
       spark:
         description: 'The Spark version of Spark image.'
         required: true
-        default: '3.4.1'
+        default: '3.5.0'
         type: choice
         options:
+        - 3.5.0
         - 3.4.1
         - 3.4.0
         - 3.3.3
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4f0f741..47dac20 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,9 +25,10 @@ on:
       spark:
         description: 'The Spark version of Spark image.'
         required: true
-        default: '3.4.1'
+        default: '3.5.0'
         type: choice
         options:
+        - 3.5.0
         - 3.4.1
         - 3.4.0
         - 3.3.3
diff --git a/3.5.0/scala2.12-java11-python3-r-ubuntu/Dockerfile b/3.5.0/scala2.12-java11-python3-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..d6faaa7
--- /dev/null
+++ b/3.5.0/scala2.12-java11-python3-r-ubuntu/Dockerfile
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:3.5.0-scala2.12-java11-ubuntu
+
+USER root
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y python3 python3-pip; \
+    apt-get install -y r-base r-base-dev; \
+    rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME /usr/lib/R
+
+USER spark
diff --git a/3.5.0/scala2.12-java11-python3-ubuntu/Dockerfile b/3.5.0/scala2.12-java11-python3-ubuntu/Dockerfile
new file mode 100644
index 0000000..78db218
--- /dev/null
+++ b/3.5.0/scala2.12-java11-python3-ubuntu/Dockerfile
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:3.5.0-scala2.12-java11-ubuntu
+
+USER root
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y python3 python3-pip; \
+    rm -rf /var/lib/apt/lists/*
+
+USER spark
diff --git a/3.5.0/scala2.12-java11-r-ubuntu/Dockerfile b/3.5.0/scala2.12-java11-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..b5fa221
--- /dev/null
+++ b/3.5.0/scala2.12-java11-r-ubuntu/Dockerfile
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:3.5.0-scala2.12-java11-ubuntu
+
+USER root
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y r-base r-base-dev; \
+    rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME /usr/lib/R
+
+USER spark
diff --git a/3.5.0/scala2.12-java11-ubuntu/Dockerfile b/3.5.0/scala2.12-java11-ubuntu/Dockerfile
new file mode 100644
index 0000000..15f4b31
--- /dev/null
+++ b/3.5.0/scala2.12-java11-ubuntu/Dockerfile
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM eclipse-temurin:11-jre-focal
+
+ARG spark_uid=185
+
+RUN groupadd --system --gid=${spark_uid} spark && \
+    useradd --system --uid=${spark_uid} --gid=spark spark
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \
+    mkdir -p /opt/spark; \
+    mkdir /opt/spark/python; \
+    mkdir -p /opt/spark/examples; \
+    mkdir -p /opt/spark/work-dir; \
+    chmod g+w /opt/spark/work-dir; \
+    touch /opt/spark/RELEASE; \
+    chown -R spark:spark /opt/spark; \
+    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Apache Spark
+# https://downloads.apache.org/spark/KEYS
+ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \
+    SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz.asc \
+    GPG_KEY=FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216
+
+RUN set -ex; \
+    export SPARK_TMP="$(mktemp -d)"; \
+    cd $SPARK_TMP; \
+    wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
+    wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
+    export GNUPGHOME="$(mktemp -d)"; \
+    gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
+    gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
+    gpg --batch --verify spark.tgz.asc spark.tgz; \
+    gpgconf --kill all; \
+    rm -rf "$GNUPGHOME" spark.tgz.asc; \
+    \
+    tar -xf spark.tgz --strip-components=1; \
+    chown -R spark:spark .; \
+    mv jars /opt/spark/; \
+    mv bin /opt/spark/; \
+    mv sbin /opt/spark/; \
+    mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
+    mv examples /opt/spark/; \
+    mv kubernetes/tests /opt/spark/; \
+    mv data /opt/spark/; \
+    mv python/pyspark /opt/spark/python/pyspark/; \
+    mv python/lib /opt/spark/python/lib/; \
+    mv R /opt/spark/; \
+    chmod a+x /opt/decom.sh; \
+    cd ..; \
+    rm -rf "$SPARK_TMP";
+
+COPY entrypoint.sh /opt/
+
+ENV SPARK_HOME /opt/spark
+
+WORKDIR /opt/spark/work-dir
+
+USER spark
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/entrypoint.sh.template b/3.5.0/scala2.12-java11-ubuntu/entrypoint.sh
old mode 100644
new mode 100755
similarity index 95%
copy from entrypoint.sh.template
copy to 3.5.0/scala2.12-java11-ubuntu/entrypoint.sh
index 2e3d2a8..c576d8f
--- a/entrypoint.sh.template
+++ b/3.5.0/scala2.12-java11-ubuntu/entrypoint.sh
@@ -77,6 +77,9 @@ elif ! [ -z "${SPARK_HOME+x}" ]; then
   SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
 fi
 
+# SPARK-43540: add current working directory into executor classpath
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"
+
 # Switch to spark if no USER specified (root by default) otherwise use USER directly
 switch_spark_if_root() {
   if [ $(id -u) -eq 0 ]; then
@@ -90,6 +93,7 @@ case "$1" in
     CMD=(
       "$SPARK_HOME/bin/spark-submit"
       --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS"
       --deploy-mode client
       "$@"
     )
diff --git a/entrypoint.sh.template b/entrypoint.sh.template
index 2e3d2a8..c576d8f 100644
--- a/entrypoint.sh.template
+++ b/entrypoint.sh.template
@@ -77,6 +77,9 @@ elif ! [ -z "${SPARK_HOME+x}" ]; then
   SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
 fi
 
+# SPARK-43540: add current working directory into executor classpath
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"
+
 # Switch to spark if no USER specified (root by default) otherwise use USER directly
 switch_spark_if_root() {
   if [ $(id -u) -eq 0 ]; then
@@ -90,6 +93,7 @@ case "$1" in
     CMD=(
       "$SPARK_HOME/bin/spark-submit"
       --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS"
       --deploy-mode client
       "$@"
     )
diff --git a/tools/template.py b/tools/template.py
index d305e62..7bd56d2 100755
--- a/tools/template.py
+++ b/tools/template.py
@@ -33,7 +33,9 @@ GPG_KEY_DICT = {
     # issuer "xinrong@apache.org"
     "3.4.0": "CC68B3D16FE33A766705160BA7E57908C7A4E1B1",
     # issuer "dongjoon@apache.org"
-    "3.4.1": "F28C9C925C188C35E345614DEDA00CE834F0FC5C"
+    "3.4.1": "F28C9C925C188C35E345614DEDA00CE834F0FC5C",
+    # issuer "liyuanjian@apache.org"
+    "3.5.0": "FC3AE3A7EAA1BAC98770840E7E1ABCC53AAA2216"
 }
 
 
diff --git a/versions.json b/versions.json
index 1442bfe..80d35b9 100644
--- a/versions.json
+++ b/versions.json
@@ -1,29 +1,57 @@
 {
   "versions": [
+    {
+      "path": "3.5.0/scala2.12-java11-python3-ubuntu",
+      "tags": [
+        "3.5.0-scala2.12-java11-python3-ubuntu",
+        "3.5.0-python3",
+        "3.5.0",
+        "python3",
+        "latest"
+      ]
+    },
+    {
+      "path": "3.5.0/scala2.12-java11-r-ubuntu",
+      "tags": [
+        "3.5.0-scala2.12-java11-r-ubuntu",
+        "3.5.0-r",
+        "r"
+      ]
+    },
+    {
+      "path": "3.5.0/scala2.12-java11-ubuntu",
+      "tags": [
+        "3.5.0-scala2.12-java11-ubuntu",
+        "3.5.0-scala",
+        "scala"
+      ]
+    },
+    {
+      "path": "3.5.0/scala2.12-java11-python3-r-ubuntu",
+      "tags": [
+        "3.5.0-scala2.12-java11-python3-r-ubuntu"
+      ]
+    },
     {
       "path": "3.4.1/scala2.12-java11-python3-ubuntu",
       "tags": [
         "3.4.1-scala2.12-java11-python3-ubuntu",
         "3.4.1-python3",
-        "3.4.1",
-        "python3",
-        "latest"
+        "3.4.1"
       ]
     },
     {
       "path": "3.4.1/scala2.12-java11-r-ubuntu",
       "tags": [
         "3.4.1-scala2.12-java11-r-ubuntu",
-        "3.4.1-r",
-        "r"
+        "3.4.1-r"
       ]
     },
     {
       "path": "3.4.1/scala2.12-java11-ubuntu",
       "tags": [
         "3.4.1-scala2.12-java11-ubuntu",
-        "3.4.1-scala",
-        "scala"
+        "3.4.1-scala"
       ]
     },
     {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org