You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by rm...@apache.org on 2020/05/13 14:28:59 UTC

[flink] 05/08: [FLINK-11086][e2e] Use HADOOP_CLASSPATH in end to end tests

This is an automated email from the ASF dual-hosted git repository.

rmetzger pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit c3648da4f4fdcd9c134ea05fd4316bc2146dd189
Author: Robert Metzger <rm...@apache.org>
AuthorDate: Mon May 4 10:16:44 2020 +0200

    [FLINK-11086][e2e] Use HADOOP_CLASSPATH in end to end tests
---
 flink-end-to-end-tests/pom.xml                     | 43 ++++++++++++++++++++++
 flink-end-to-end-tests/test-scripts/common.sh      | 10 +++++
 .../test-scripts/common_mesos_docker.sh            |  8 +++-
 .../docker-mesos-cluster/docker-compose.yml        |  1 +
 .../test_mesos_multiple_submissions.sh             |  4 +-
 .../test-scripts/test_mesos_wordcount.sh           |  4 +-
 .../test-scripts/test_streaming_bucketing.sh       |  2 +
 tools/verify_scala_suffixes.sh                     |  2 +-
 8 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/flink-end-to-end-tests/pom.xml b/flink-end-to-end-tests/pom.xml
index 3b97256..dbdcf49 100644
--- a/flink-end-to-end-tests/pom.xml
+++ b/flink-end-to-end-tests/pom.xml
@@ -91,6 +91,23 @@ under the License.
 		<module>flink-netty-shuffle-memory-control-test</module>
 	</modules>
 
+	<dependencies>
+		<dependency>
+			<!-- flink-yarn is required for getting the yarn classpath in the hadoop
+			bash end to end tests -->
+			<groupId>org.apache.flink</groupId>
+			<artifactId>flink-yarn-tests</artifactId>
+			<version>${project.version}</version>
+			<scope>provided</scope>
+			<exclusions>
+				<exclusion>
+					<groupId>*</groupId>
+					<artifactId>*</artifactId>
+				</exclusion>
+			</exclusions>
+		</dependency>
+	</dependencies>
+
 	<profiles>
 		<profile>
 			<id>e2e-travis1</id>
@@ -154,6 +171,32 @@ under the License.
 	<build>
 		<plugins>
 			<plugin>
+				<artifactId>maven-resources-plugin</artifactId>
+				<!-- <version>3.1.0</version> -->
+				<executions>
+					<execution>
+						<id>copy-resources</id>
+						<!-- here the phase you need -->
+						<phase>package</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>${basedir}/test-scripts/hadoop</outputDirectory>
+							<resources>
+								<resource>
+									<directory>../flink-yarn-tests/target/</directory>
+									<filtering>true</filtering>
+									<includes>
+										<include>yarn.classpath</include>
+									</includes>
+								</resource>
+							</resources>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-deploy-plugin</artifactId>
 				<configuration>
diff --git a/flink-end-to-end-tests/test-scripts/common.sh b/flink-end-to-end-tests/test-scripts/common.sh
index 8329c12..c51fa18 100644
--- a/flink-end-to-end-tests/test-scripts/common.sh
+++ b/flink-end-to-end-tests/test-scripts/common.sh
@@ -55,6 +55,15 @@ REST_PROTOCOL="http"
 CURL_SSL_ARGS=""
 source "${TEST_INFRA_DIR}/common_ssl.sh"
 
+function set_hadoop_classpath {
+  YARN_CLASSPATH_LOCATION="${TEST_INFRA_DIR}/hadoop/yarn.classpath";
+  if [ ! -f $YARN_CLASSPATH_LOCATION ]; then
+    echo "File '$YARN_CLASSPATH_LOCATION' does not exist."
+    exit 1
+  fi
+  export HADOOP_CLASSPATH=`cat $YARN_CLASSPATH_LOCATION`
+}
+
 function print_mem_use_osx {
     declare -a mem_types=("active" "inactive" "wired down")
     used=""
@@ -353,6 +362,7 @@ function check_logs_for_errors {
       | grep -v "Failed Elasticsearch item request" \
       | grep -v "[Terror] modules" \
       | grep -v "HeapDumpOnOutOfMemoryError" \
+      | grep -v "error_prone_annotations" \
       | grep -ic "error" || true)
   if [[ ${error_count} -gt 0 ]]; then
     echo "Found error in log files:"
diff --git a/flink-end-to-end-tests/test-scripts/common_mesos_docker.sh b/flink-end-to-end-tests/test-scripts/common_mesos_docker.sh
index 5d90e5a..83aca21 100644
--- a/flink-end-to-end-tests/test-scripts/common_mesos_docker.sh
+++ b/flink-end-to-end-tests/test-scripts/common_mesos_docker.sh
@@ -44,8 +44,13 @@ function start_flink_cluster_with_mesos() {
         echo "ERROR: Could not build mesos image. Aborting..."
         exit 1
     fi
+    # build docker image with java and mesos
     build_image
 
+    # we need to export the MVN_REPO location so that mesos can access the files referenced in HADOOP_CLASSPATH
+    export MVN_REPO=`mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout`
+
+    # start mesos cluster
     docker-compose -f $END_TO_END_DIR/test-scripts/docker-mesos-cluster/docker-compose.yml up -d
 
     # wait for the Mesos master and slave set up
@@ -58,7 +63,8 @@ function start_flink_cluster_with_mesos() {
     set_config_key "jobmanager.rpc.address" "mesos-master"
     set_config_key "rest.address" "mesos-master"
 
-    docker exec -itd mesos-master bash -c "${FLINK_DIR}/bin/mesos-appmaster.sh -Dmesos.master=mesos-master:5050"
+    docker exec --env HADOOP_CLASSPATH=$HADOOP_CLASSPATH -itd mesos-master bash -c "${FLINK_DIR}/bin/mesos-appmaster.sh -Dmesos.master=mesos-master:5050"
+
     wait_rest_endpoint_up "http://${NODENAME}:8081/taskmanagers" "Dispatcher" "\{\"taskmanagers\":\[.*\]\}"
     return 0
 }
diff --git a/flink-end-to-end-tests/test-scripts/docker-mesos-cluster/docker-compose.yml b/flink-end-to-end-tests/test-scripts/docker-mesos-cluster/docker-compose.yml
index 6445a3a..366d858 100644
--- a/flink-end-to-end-tests/test-scripts/docker-mesos-cluster/docker-compose.yml
+++ b/flink-end-to-end-tests/test-scripts/docker-mesos-cluster/docker-compose.yml
@@ -36,6 +36,7 @@ services:
     volumes:
       - ${END_TO_END_DIR}:${END_TO_END_DIR}
       - ${FLINK_DIR}:${FLINK_DIR}
+      - ${MVN_REPO}:${MVN_REPO}
     environment:
       MESOS_PORT: 5050
       MESOS_QUORUM: 1
diff --git a/flink-end-to-end-tests/test-scripts/test_mesos_multiple_submissions.sh b/flink-end-to-end-tests/test-scripts/test_mesos_multiple_submissions.sh
index fbfcd31..1cb0746 100755
--- a/flink-end-to-end-tests/test-scripts/test_mesos_multiple_submissions.sh
+++ b/flink-end-to-end-tests/test-scripts/test_mesos_multiple_submissions.sh
@@ -29,7 +29,7 @@ TEST_PROGRAM_JAR=$END_TO_END_DIR/flink-cli-test/target/PeriodicStreamingJob.jar
 
 function submit_job {
     local output_path=$1
-    docker exec mesos-master bash -c "${FLINK_DIR}/bin/flink run -d -p 1 ${TEST_PROGRAM_JAR} --durationInSecond ${DURATION} --outputPath ${output_path}" \
+    docker exec --env HADOOP_CLASSPATH=$HADOOP_CLASSPATH mesos-master bash -c "${FLINK_DIR}/bin/flink run -d -p 1 ${TEST_PROGRAM_JAR} --durationInSecond ${DURATION} --outputPath ${output_path}" \
         | grep "Job has been submitted with JobID" | sed 's/.* //g' | tr -d '\r'
 }
 
@@ -40,6 +40,8 @@ mkdir -p "${TEST_DATA_DIR}"
 # To ensure the old slots are being reused.
 set_config_key "mesos.resourcemanager.tasks.cpus" "${MESOS_AGENT_CPU}"
 
+set_hadoop_classpath
+
 start_flink_cluster_with_mesos
 
 JOB1_ID=$(submit_job ${FIRST_OUTPUT_LOCATION})
diff --git a/flink-end-to-end-tests/test-scripts/test_mesos_wordcount.sh b/flink-end-to-end-tests/test-scripts/test_mesos_wordcount.sh
index 94db94e..1e1c7c2 100755
--- a/flink-end-to-end-tests/test-scripts/test_mesos_wordcount.sh
+++ b/flink-end-to-end-tests/test-scripts/test_mesos_wordcount.sh
@@ -29,8 +29,10 @@ TEST_PROGRAM_JAR=${FLINK_DIR}/examples/batch/WordCount.jar
 
 mkdir -p "${TEST_DATA_DIR}"
 
+set_hadoop_classpath
+
 start_flink_cluster_with_mesos
 
-docker exec mesos-master nohup bash -c "${FLINK_DIR}/bin/flink run -p 1 ${TEST_PROGRAM_JAR} ${INPUT_ARGS} --output ${OUTPUT_LOCATION}"
+docker exec --env HADOOP_CLASSPATH=$HADOOP_CLASSPATH mesos-master nohup bash -c "${FLINK_DIR}/bin/flink run -p 1 ${TEST_PROGRAM_JAR} ${INPUT_ARGS} --output ${OUTPUT_LOCATION}"
 
 check_result_hash "Mesos WordCount test" "${OUTPUT_LOCATION}" "${RESULT_HASH}"
diff --git a/flink-end-to-end-tests/test-scripts/test_streaming_bucketing.sh b/flink-end-to-end-tests/test-scripts/test_streaming_bucketing.sh
index fbd4749..fef4dd2 100755
--- a/flink-end-to-end-tests/test-scripts/test_streaming_bucketing.sh
+++ b/flink-end-to-end-tests/test-scripts/test_streaming_bucketing.sh
@@ -19,6 +19,8 @@
 
 source "$(dirname "$0")"/common.sh
 
+set_hadoop_classpath
+
 TEST_PROGRAM_JAR=${END_TO_END_DIR}/flink-bucketing-sink-test/target/BucketingSinkTestProgram.jar
 JOB_OUTPUT_DIR=${TEST_DATA_DIR}/out/result
 LOG_DIR=${FLINK_DIR}/log
diff --git a/tools/verify_scala_suffixes.sh b/tools/verify_scala_suffixes.sh
index 714f6db..8fd6589 100755
--- a/tools/verify_scala_suffixes.sh
+++ b/tools/verify_scala_suffixes.sh
@@ -85,7 +85,7 @@ block_infected=0
 # b) exist only for dev purposes
 # c) no-one should depend on them
 e2e_modules=$(find flink-end-to-end-tests -mindepth 2 -maxdepth 5 -name 'pom.xml' -printf '%h\n' | sort -u | tr '\n' ',')
-excluded_modules=\!${e2e_modules//,/,\!},!flink-docs
+excluded_modules=\!${e2e_modules//,/,\!},!flink-docs,!flink-end-to-end-tests
 
 echo "Analyzing modules for Scala dependencies using 'mvn dependency:tree'."
 echo "If you haven't built the project, please do so first by running \"mvn clean install -DskipTests\""