You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/09/21 14:38:34 UTC

[hop] branch master updated: HOP-4170: upgrade flink and spark IT environemnt, remove some jars

This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/master by this push:
     new 725315262f HOP-4170: upgrade flink and spark IT environemnt, remove some jars
     new 23411438af Merge pull request #1700 from hansva/master
725315262f is described below

commit 725315262f03e08ad6ec16e3e14cbc9a8bd4ab26
Author: Hans Van Akelyen <ha...@gmail.com>
AuthorDate: Wed Sep 21 15:40:14 2022 +0200

    HOP-4170: upgrade flink and spark IT environemnt, remove some jars
---
 .../plugins/engines/beam/src/assembly/assembly.xml |  17 ++--
 .../integration-tests/Dockerfile.unit-tests-spark  | 113 ---------------------
 .../integration-tests/integration-tests-flink.yaml |   4 +-
 .../integration-tests/integration-tests-spark.yaml |   2 +-
 docker/integration-tests/spark/Dockerfile.master   |   4 +-
 docker/integration-tests/spark/Dockerfile.worker   |   4 +-
 pom.xml                                            |   2 +-
 7 files changed, 14 insertions(+), 132 deletions(-)

diff --git a/assemblies/plugins/engines/beam/src/assembly/assembly.xml b/assemblies/plugins/engines/beam/src/assembly/assembly.xml
index 9c3967ef4b..e6eb44ae96 100644
--- a/assemblies/plugins/engines/beam/src/assembly/assembly.xml
+++ b/assemblies/plugins/engines/beam/src/assembly/assembly.xml
@@ -247,21 +247,16 @@
                 <include>org.apache.flink:flink-shaded-netty</include>
                 <include>org.apache.flink:flink-shaded-zookeeper-3</include>
                 <include>org.apache.flink:flink-streaming-java</include>
-                <include>org.apache.hadoop:hadoop-annotations</include>
-                <include>org.apache.hadoop:hadoop-auth</include>
-                <include>org.apache.hadoop:hadoop-client</include>
-                <include>org.apache.hadoop:hadoop-common</include>
-                <include>org.apache.hadoop:hadoop-hdfs</include>
-                <include>org.apache.hadoop:hadoop-mapreduce-client-common</include>
-                <include>org.apache.hadoop:hadoop-mapreduce-client-core</include>
-                <include>org.apache.hadoop:hadoop-mapreduce-client-jobclient</include>
-                <include>org.apache.hadoop:hadoop-yarn-api</include>
-                <include>org.apache.hadoop:hadoop-yarn-client</include>
-                <include>org.apache.hadoop:hadoop-yarn-common</include>
+                <include>org.apache.hadoop:hadoop-client-api</include>
+                <include>org.apache.hadoop:hadoop-client-runtime</include>
+                <include>org.apache.hadoop:hadoop-hdfs-client</include>
                 <include>org.apache.htrace:htrace-core</include>
                 <include>org.apache.ivy:ivy</include>
                 <include>org.apache.kafka:kafka-clients</include>
+                <include>org.apache.logging.log4j:log4j-slf4j-impl</include>
+                <include>org.apache.logging.log4j:log4j-api</include>
                 <include>org.apache.logging.log4j:log4j-core</include>
+                <include>org.apache.logging.log4j:log4j-1.2-api</include>
                 <include>org.apache.spark:spark-core_2.12</include>
                 <include>org.apache.spark:spark-kvstore_2.12</include>
                 <include>org.apache.spark:spark-launcher_2.12</include>
diff --git a/docker/integration-tests/Dockerfile.unit-tests-spark b/docker/integration-tests/Dockerfile.unit-tests-spark
deleted file mode 100644
index a97d5034cb..0000000000
--- a/docker/integration-tests/Dockerfile.unit-tests-spark
+++ /dev/null
@@ -1,113 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FROM ubuntu
-MAINTAINER Apache Hop
-
-# Argument Branch name, used to download correct version
-ARG BRANCH_NAME
-ENV BRANCH_NAME=$BRANCH_NAME
-# path to where the artefacts should be deployed to
-ENV DEPLOYMENT_PATH=/opt
-# volume mount point
-ENV VOLUME_MOUNT_POINT=/files
-#Jenkins user an group
-ARG JENKINS_USER=hop
-ARG JENKINS_GROUP=hop
-ARG JENKINS_UID=1000
-ARG JENKINS_GID=1000
-ARG GCP_KEY_FILE=
-# Set system properties
-ENV DEBIAN_FRONTEND=noninteractive
-
-# any JRE settings you want to pass on
-# The “-XX:+AggressiveHeap” tells the container to use all memory assigned to the container. 
-# this removed the need to calculate the necessary heap Xmx
-ENV HOP_OPTIONS=-XX:+AggressiveHeap
-
-# INSTALL REQUIRED PACKAGES AND ADJUST LOCALE
-# procps: The package includes the programs ps, top, vmstat, w, kill, free, slabtop, and skill
-
-RUN apt-get update \
-  && apt-get install --assume-yes \
-  bash \
-  curl \
-  procps \
-  git \
-  python3-pip \
-  openjdk-11-jre-headless \
-  unzip \
-  ttf-mscorefonts-installer \
-  locales \
-  && mkdir ${VOLUME_MOUNT_POINT} \
-  && addgroup -gid ${JENKINS_GID} ${JENKINS_GROUP} \
-  && useradd -m  -d /home/${JENKINS_USER} -u ${JENKINS_UID} -g ${JENKINS_GROUP} ${JENKINS_USER} \
-  && chown ${JENKINS_USER}:${JENKINS_GROUP} ${DEPLOYMENT_PATH} \
-  && chown ${JENKINS_USER}:${JENKINS_GROUP} ${VOLUME_MOUNT_POINT}
-
-# Set Locale correctly
-RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \
-    locale-gen
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US:en
-ENV LC_ALL en_US.UTF-8
-
-# Install parquet-tools from Python
-
-RUN pip3 install parquet-tools
-
-# Copy the hop package from the local resources folder to the container image directory
-
-COPY --chown=${JENKINS_USER}:${JENKINS_GROUP} ./assemblies/client/target/hop-* ${DEPLOYMENT_PATH}/hop.zip
-
-# Unzip and install in correct location
-
-RUN unzip ${DEPLOYMENT_PATH}/hop.zip -d ${DEPLOYMENT_PATH} \
-  && rm ${DEPLOYMENT_PATH}/hop.zip \
-  #Remove Jars for Spark
-  && rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/flink-shaded-jackson* \
-  && rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/jackson-module-scala* \
-  && rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-java8-compat* \
-  && rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-library* \
-  && rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-parser-combinators* \
-  #Add extra jars
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/com/fasterxml/jackson/module/jackson-module-scala_2.12/2.13.3/jackson-module-scala_2.12-2.13.3.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-ast_2.12/3.7.0-M5/json4s-ast_2.12-3.7.0-M5.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-core_2.12/3.7.0-M5/json4s-core_2.12-3.7.0-M5.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-jackson_2.12/3.7.0-M5/json4s-jackson_2.12-3.7.0-M5.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-scalap_2.12/3.7.0-M5/json4s-scalap_2.12-3.7.0-M5.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/log4j/log4j/1.2.17/log4j-1.2.17.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-compiler/2.12.10/scala-compiler-2.12.10.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-library/2.12.10/scala-library-2.12.10.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/modules/scala-parser-combinators_2.12/1.1.2/scala-parser-combinators_2.12-1.1.2.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-reflect/2.12.10/scala-reflect-2.12.10.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/modules/scala-xml_2.12/1.2.0/scala-xml_2.12-1.2.0.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/apache/spark/spark-unsafe_2.12/3.1.3/spark-unsafe_2.12-3.1.3.jar \
-  && wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/apache/xbean/xbean-asm7-shaded/4.15/xbean-asm7-shaded-4.15.jar \
-  && chown -R ${JENKINS_USER}:${JENKINS_GROUP} ${DEPLOYMENT_PATH}/hop \
-  && chmod 700 ${DEPLOYMENT_PATH}/hop/*.sh \
-  && cd ${DEPLOYMENT_PATH}/hop \
-  && ./hop-conf.sh --generate-fat-jar=/tmp/hop-fatjar.jar
-
-# make volume available so that hop pipeline and workflow files can be provided easily
-VOLUME ["/files"]
-USER ${JENKINS_USER}
-ENV PATH=$PATH:${DEPLOYMENT_PATH}/hop
-ENV GOOGLE_APPLICATION_CREDENTIALS="/tmp/google-key-apache-hop-it.json"
-WORKDIR /home/${JENKINS_USER}
-# CMD ["/bin/bash"]
-ENTRYPOINT []
\ No newline at end of file
diff --git a/docker/integration-tests/integration-tests-flink.yaml b/docker/integration-tests/integration-tests-flink.yaml
index d2ac137695..41173416b7 100644
--- a/docker/integration-tests/integration-tests-flink.yaml
+++ b/docker/integration-tests/integration-tests-flink.yaml
@@ -28,7 +28,7 @@ services:
       - taskmanager
 
   jobmanager:
-    image: flink:1.14.5-scala_2.11-java11
+    image: flink:1.15.2-java11
     ports:
       - "8081"
     command: jobmanager
@@ -38,7 +38,7 @@ services:
         jobmanager.rpc.address: jobmanager        
 
   taskmanager:
-    image: flink:1.14.5-scala_2.11-java11
+    image: flink:1.15.2-java11
     depends_on:
       - jobmanager
     command: taskmanager
diff --git a/docker/integration-tests/integration-tests-spark.yaml b/docker/integration-tests/integration-tests-spark.yaml
index bae7899dfe..2b67a478a2 100644
--- a/docker/integration-tests/integration-tests-spark.yaml
+++ b/docker/integration-tests/integration-tests-spark.yaml
@@ -19,7 +19,7 @@ version: '2.4'
 services:
   integration_test_spark:
     extends:
-      file: integration-tests-base-spark.yaml
+      file: integration-tests-base.yaml
       service: integration_test
     depends_on:
       - spark
diff --git a/docker/integration-tests/spark/Dockerfile.master b/docker/integration-tests/spark/Dockerfile.master
index d4bb5db970..37f78abdf8 100644
--- a/docker/integration-tests/spark/Dockerfile.master
+++ b/docker/integration-tests/spark/Dockerfile.master
@@ -21,8 +21,8 @@ ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
 ENV INIT_DAEMON_STEP spark_master_init
 
 ENV BASE_URL=https://archive.apache.org/dist/spark/
-ENV SPARK_VERSION=3.1.3
-ENV HADOOP_VERSION=3.2
+ENV SPARK_VERSION=3.3.0
+ENV HADOOP_VERSION=3
 ENV SPARK_MASTER_PORT 7077
 ENV SPARK_MASTER_WEBUI_PORT 8080
 ENV SPARK_MASTER_LOG /spark/logs
diff --git a/docker/integration-tests/spark/Dockerfile.worker b/docker/integration-tests/spark/Dockerfile.worker
index 958625f1b8..641f5abbb5 100644
--- a/docker/integration-tests/spark/Dockerfile.worker
+++ b/docker/integration-tests/spark/Dockerfile.worker
@@ -22,8 +22,8 @@ ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
 ENV INIT_DAEMON_STEP spark_master_init
 
 ENV BASE_URL=https://archive.apache.org/dist/spark/
-ENV SPARK_VERSION=3.1.3
-ENV HADOOP_VERSION=3.2
+ENV SPARK_VERSION=3.3.0
+ENV HADOOP_VERSION=3
 ENV SPARK_WORKER_WEBUI_PORT 8081
 ENV SPARK_WORKER_LOG /spark/logs
 ENV SPARK_MASTER "spark://spark:7077"
diff --git a/pom.xml b/pom.xml
index 3de320bca2..5b367b3200 100644
--- a/pom.xml
+++ b/pom.xml
@@ -168,7 +168,7 @@
         <jandex-maven-plugin.version>1.0.8</jandex-maven-plugin.version>
         <wagon-plugin.version>2.0.0</wagon-plugin.version>
 
-        <jackson.version>2.12.7</jackson.version>
+        <jackson.version>2.13.0</jackson.version>
         <commons-compress.version>1.21</commons-compress.version>
         <commons-fileupload.version>1.4</commons-fileupload.version>
         <aws-java-sdk.version>1.12.279</aws-java-sdk.version>