You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tr...@apache.org on 2019/05/24 17:01:36 UTC

[flink] branch release-1.8 updated: [FLINK-12416] Fix docker build scripts on Flink-1.8

This is an automated email from the ASF dual-hosted git repository.

trohrmann pushed a commit to branch release-1.8
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.8 by this push:
     new 0094df9  [FLINK-12416] Fix docker build scripts on Flink-1.8
0094df9 is described below

commit 0094df9dc284d8748c80db7b5c7993f995dc59b0
Author: Yun Tang <my...@live.com>
AuthorDate: Fri May 10 02:44:20 2019 +0800

    [FLINK-12416] Fix docker build scripts on Flink-1.8
    
    This closes #8391.
---
 flink-container/docker/Dockerfile     | 10 ++++--
 flink-container/docker/README.md      |  2 ++
 flink-container/docker/build.sh       | 63 ++++++++++++++++++++++++++++++-----
 flink-contrib/docker-flink/Dockerfile |  5 ++-
 flink-contrib/docker-flink/README.md  |  2 +-
 flink-contrib/docker-flink/build.sh   | 60 +++++++++++++++++++++++++++++----
 6 files changed, 122 insertions(+), 20 deletions(-)

diff --git a/flink-container/docker/Dockerfile b/flink-container/docker/Dockerfile
index 5745479..5839d1a 100644
--- a/flink-container/docker/Dockerfile
+++ b/flink-container/docker/Dockerfile
@@ -30,16 +30,20 @@ ENV PATH $PATH:$FLINK_HOME/bin
 # flink-dist can point to a directory or a tarball on the local system
 ARG flink_dist=NOT_SET
 ARG job_jar=NOT_SET
+# hadoop jar is optional
+ARG hadoop_jar=NOT_SET*
 
 # Install build dependencies and flink
-ADD $flink_dist $FLINK_INSTALL_PATH
+ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/
 ADD $job_jar $FLINK_INSTALL_PATH/job.jar
 
 RUN set -x && \
-  ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \
+  ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \
   ln -s $FLINK_INSTALL_PATH/job.jar $FLINK_LIB_DIR && \
+  if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \
   addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \
-  chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \
+  chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \
+  chown -R flink:flink ${FLINK_INSTALL_PATH}/job.jar && \
   chown -h flink:flink $FLINK_HOME
 
 COPY docker-entrypoint.sh /
diff --git a/flink-container/docker/README.md b/flink-container/docker/README.md
index 3ff70c6..b1569a3 100644
--- a/flink-container/docker/README.md
+++ b/flink-container/docker/README.md
@@ -28,6 +28,8 @@ If you want to build the Flink image from an archive stored under `<PATH_TO_ARCH
 If you want to build the Flink image for a specific version of Flink/Hadoop/Scala run:
 
     build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --image-name <IMAGE_NAME>
+
+Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version.
     
 The script will try to download the released version from the Apache archive.
 
diff --git a/flink-container/docker/build.sh b/flink-container/docker/build.sh
index 614a9c3..1792603 100755
--- a/flink-container/docker/build.sh
+++ b/flink-container/docker/build.sh
@@ -22,10 +22,11 @@ usage() {
 Usage:
   build.sh --job-jar <path-to-job-jar> --from-local-dist [--image-name <image>]
   build.sh --job-jar <path-to-job-jar> --from-archive <path-to-dist-archive> [--image-name <image>]
-  build.sh --job-jar <path-to-job-jar> --from-release --flink-version <x.x.x> --hadoop-version <x.x> --scala-version <x.xx> [--image-name <image>]
+  build.sh --job-jar <path-to-job-jar> --from-release --flink-version <x.x.x> --scala-version <x.xx> [--hadoop-version <x.x>] [--image-name <image>]
   build.sh --help
 
-  If the --image-name flag is not used the built image name will be 'flink'.
+  If the --image-name flag is not used the built image name will be 'flink-job'.
+  Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided.
 HERE
   exit 1
 }
@@ -57,7 +58,8 @@ key="$1"
     shift
     ;;
     --hadoop-version)
-    HADOOP_VERSION="$(echo "$2" | sed 's/\.//')"
+    HADOOP_VERSION="$2"
+    HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')"
     shift
     ;;
     --scala-version)
@@ -94,15 +96,60 @@ mkdir -p "${TMPDIR}"
 JOB_JAR_TARGET="${TMPDIR}/job.jar"
 cp ${JOB_JAR_PATH} ${JOB_JAR_TARGET}
 
+checkUrlAvailable() {
+    curl --output /dev/null --silent --head --fail $1
+    ret=$?
+    if [[ ${ret} -ne 0 ]]; then
+        echo "The url $1 not available, please check your parameters, exit..."
+        usage
+        exit 2
+    fi
+}
+
 if [ -n "${FROM_RELEASE}" ]; then
 
-  [[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
+  [[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
+
+  FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/"
+
+  FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/')
+
+  if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then
+
+  # After Flink-1.8 we would let release pre-built package with hadoop
+    if [[ -n "${HADOOP_VERSION}" ]]; then
+        echo "After Flink-1.8, we would download pre-bundle hadoop jar package."
+        # list to get target pre-bundle package
+        SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/"
+        SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)"
+        SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar"
+
+        CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}"
+
+        DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME}
+        checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL}
+
+        echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}"
+
+        curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP}
+        SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}"
+    fi
+    FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz"
+  elif [[ -z "${HADOOP_VERSION}" ]]; then
+    usage
+  else
+    FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz"
+  fi
+
 
-  FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz"
   CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}"
 
-  echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}"
-  curl -# "https://archive.apache.org/dist/flink/flink-${FLINK_VERSION}/${FLINK_DIST_FILE_NAME}" --output ${CURL_OUTPUT}
+  DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME}
+  checkUrlAvailable ${DOWNLOAD_FLINK_URL}
+
+  echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}"
+
+  curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT}
 
   FLINK_DIST="${CURL_OUTPUT}"
 
@@ -123,4 +170,4 @@ else
 
 fi
 
-docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" -t "${IMAGE_NAME}" .
+docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_jar="${JOB_JAR_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .
diff --git a/flink-contrib/docker-flink/Dockerfile b/flink-contrib/docker-flink/Dockerfile
index d24a68c..78ece73 100644
--- a/flink-contrib/docker-flink/Dockerfile
+++ b/flink-contrib/docker-flink/Dockerfile
@@ -28,11 +28,14 @@ ENV PATH $PATH:$FLINK_HOME/bin
 
 # flink-dist can point to a directory or a tarball on the local system
 ARG flink_dist=NOT_SET
+# hadoop jar is optional
+ARG hadoop_jar=NOT_SET*
 
 # Install build dependencies and flink
-ADD $flink_dist $FLINK_INSTALL_PATH
+ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/
 RUN set -x && \
   ln -s $FLINK_INSTALL_PATH/flink-* $FLINK_HOME && \
+  if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \
   addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \
   chown -R flink:flink $FLINK_INSTALL_PATH/flink-* && \
   chown -h flink:flink $FLINK_HOME
diff --git a/flink-contrib/docker-flink/README.md b/flink-contrib/docker-flink/README.md
index c7d94bb..990244a 100644
--- a/flink-contrib/docker-flink/README.md
+++ b/flink-contrib/docker-flink/README.md
@@ -35,7 +35,7 @@ or
 If you want to build the container for a specific version of flink/hadoop/scala
 you can configure it in the respective args:
 
-    docker build --build-arg FLINK_VERSION=1.0.3 --build-arg HADOOP_VERSION=26 --build-arg SCALA_VERSION=2.10 -t "flink:1.0.3-hadoop2.6-scala_2.10" flink
+    build.sh --from-release --flink-version 1.8.0 --hadoop-version 2.8 --scala-version 2.11 --image-name <IMAGE_NAME>
 
 # Deploy
 
diff --git a/flink-contrib/docker-flink/build.sh b/flink-contrib/docker-flink/build.sh
index 26557a2..4bbc65a 100755
--- a/flink-contrib/docker-flink/build.sh
+++ b/flink-contrib/docker-flink/build.sh
@@ -22,10 +22,11 @@ usage() {
   cat <<HERE
 Usage:
   build.sh --from-local-dist [--image-name <image>]
-  build.sh --from-release --flink-version <x.x.x> --hadoop-version <x.x> --scala-version <x.xx> [--image-name <image>]
+  build.sh --from-release --flink-version <x.x.x> --scala-version <x.xx> --hadoop-version <x.x> [--image-name <image>]
   build.sh --help
 
   If the --image-name flag is not used the built image name will be 'flink'.
+  Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided.
 HERE
   exit 1
 }
@@ -49,7 +50,8 @@ key="$1"
     shift
     ;;
     --hadoop-version)
-    HADOOP_VERSION="$(echo "$2" | sed 's/\.//')"
+    HADOOP_VERSION="$2"
+    HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')"
     shift
     ;;
     --scala-version)
@@ -79,16 +81,60 @@ trap cleanup EXIT
 
 mkdir -p "${TMPDIR}"
 
+checkUrlAvailable() {
+    curl --output /dev/null --silent --head --fail $1
+    ret=$?
+    if [[ ${ret} -ne 0 ]]; then
+        echo "The url $1 not available, please check your parameters, exit..."
+        usage
+        exit 2
+    fi
+}
+
 if [ -n "${FROM_RELEASE}" ]; then
 
-  [[ -n "${FLINK_VERSION}" ]] && [[ -n "${HADOOP_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
+  [[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage
 
   FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/"
-  FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala_${SCALA_VERSION}.tgz"
+
+  FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/')
+
+  if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then
+
+  # After Flink-1.8 we would let release pre-built package with hadoop
+    if [[ -n "${HADOOP_VERSION}" ]]; then
+        echo "After Flink-1.8, we would download pre-bundle hadoop jar package."
+        # list to get target pre-bundle package
+        SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/"
+        SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)"
+        SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar"
+
+        CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}"
+
+        DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME}
+        checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL}
+
+        echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}"
+
+        curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP}
+        SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}"
+    fi
+    FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz"
+  elif [[ -z "${HADOOP_VERSION}" ]]; then
+    usage
+  else
+    FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz"
+  fi
+
+
   CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}"
 
-  echo "Downloading ${FLINK_DIST_FILE_NAME} from ${FLINK_BASE_URL}"
-  curl -s ${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} --output ${CURL_OUTPUT}
+  DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME}
+  checkUrlAvailable ${DOWNLOAD_FLINK_URL}
+
+  echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}"
+
+  curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT}
 
   FLINK_DIST="${CURL_OUTPUT}"
 
@@ -105,4 +151,4 @@ else
 
 fi
 
-docker build --build-arg flink_dist="${FLINK_DIST}" -t "${IMAGE_NAME}" .
+docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg hadoop_jar="${SHADED_HADOOP}" -t "${IMAGE_NAME}" .