You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by rm...@apache.org on 2020/05/26 17:49:25 UTC

[flink] 05/07: [FLINK-17375] Refactor travis_watchdog.sh into separate ci/ and azure-pipelines/ scripts.

This is an automated email from the ASF dual-hosted git repository.

rmetzger pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit a22b130941365813bd055bb8b7a77f6c1d499c33
Author: Robert Metzger <rm...@apache.org>
AuthorDate: Mon May 18 21:34:40 2020 +0200

    [FLINK-17375] Refactor travis_watchdog.sh into separate ci/ and azure-pipelines/ scripts.
    
    The guiding principle in this refactoring was to put everything generic (independent
    of concrete CI system (such as Travis or Azure)) into tools/ci/*
    and the scripts specific to a CI system (currently Azure) into tools/azure-pipelines/*.
---
 .github/PULL_REQUEST_TEMPLATE.md                   |   2 +-
 azure-pipelines.yml                                |   3 +-
 flink-end-to-end-tests/run-nightly-tests.sh        |   6 +-
 flink-end-to-end-tests/run-pre-commit-tests.sh     |  64 ----
 .../java/org/apache/flink/yarn/YarnTestBase.java   |  10 +-
 tools/azure-pipelines/azure_controller.sh          | 198 -------------
 tools/azure-pipelines/build-apache-repo.yml        |   3 +-
 tools/azure-pipelines/build-python-wheels.yml      |  12 +-
 tools/azure-pipelines/create_build_artifact.sh     |  40 +++
 tools/azure-pipelines/debug_files_utils.sh         |  35 +++
 tools/azure-pipelines/jobs-template.yml            |  54 ++--
 ...epare_precommit.sh => unpack_build_artifact.sh} |  26 +-
 tools/ci/ci_controller.sh                          | 327 ---------------------
 tools/ci/compile.sh                                |  81 +++++
 tools/ci/controller_utils.sh                       |  62 ++++
 tools/ci/{log4j-ci.properties => log4j.properties} |   0
 tools/ci/maven-utils.sh                            |   2 +-
 tools/ci/shade.sh                                  |   0
 tools/ci/stage.sh                                  |   4 +
 tools/ci/test_controller.sh                        | 124 ++++++++
 tools/ci/watchdog.sh                               | 111 +++++++
 21 files changed, 523 insertions(+), 641 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 6227dc2..6297bcb 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -12,7 +12,7 @@
 
   - Fill out the template below to describe the changes contributed by the pull request. That will give reviewers the context they need to do the review.
   
-  - Make sure that the change passes the automated tests, i.e., `mvn clean verify` passes. You can set up Travis CI to do that following [this guide](https://flink.apache.org/contributing/contribute-code.html#open-a-pull-request).
+  - Make sure that the change passes the automated tests, i.e., `mvn clean verify` passes. You can set up Azure Pipelines CI to do that following [this guide](https://cwiki.apache.org/confluence/display/FLINK/Azure+Pipelines#AzurePipelines-Tutorial:SettingupAzurePipelinesforaforkoftheFlinkrepository).
 
   - Each pull request should address only one issue, not mix up code from multiple issues.
   
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 82c0831..fba75ed 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -48,10 +48,11 @@ resources:
 #   to understand why the secrets are handled like this
 variables:
   MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
+  E2E_CACHE_FOLDER: $(Pipeline.Workspace)/e2e_cache
   MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
   CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/**
   CACHE_FALLBACK_KEY: maven | $(Agent.OS)
-  CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache
+  FLINK_ARTIFACT_DIR: $(Pipeline.Workspace)/flink_artifact
   SECRET_S3_BUCKET: $[variables.IT_CASE_S3_BUCKET]
   SECRET_S3_ACCESS_KEY: $[variables.IT_CASE_S3_ACCESS_KEY]
   SECRET_S3_SECRET_KEY: $[variables.IT_CASE_S3_SECRET_KEY]
diff --git a/flink-end-to-end-tests/run-nightly-tests.sh b/flink-end-to-end-tests/run-nightly-tests.sh
index f7af3b3..69a2ec3 100755
--- a/flink-end-to-end-tests/run-nightly-tests.sh
+++ b/flink-end-to-end-tests/run-nightly-tests.sh
@@ -45,7 +45,7 @@ if [ ! -z "$TF_BUILD" ] ; then
 		echo "COMPRESSING build artifacts."
 		COMPRESSED_ARCHIVE=${BUILD_BUILDNUMBER}.tgz
 		mkdir compressed-archive-dir
-		tar -zcvf compressed-archive-dir/${COMPRESSED_ARCHIVE} $ARTIFACTS_DIR
+		tar -zcvf compressed-archive-dir/${COMPRESSED_ARCHIVE} -C $ARTIFACTS_DIR .
 		echo "##vso[task.setvariable variable=ARTIFACT_DIR]$(pwd)/compressed-archive-dir"
 	}
 	on_exit compress_logs
@@ -235,7 +235,7 @@ printf "Running Java end-to-end tests\n"
 printf "==============================================================================\n"
 
 
-LOG4J_PROPERTIES=${END_TO_END_DIR}/../tools/ci/log4j-ci.properties
+LOG4J_PROPERTIES=${END_TO_END_DIR}/../tools/ci/log4j.properties
 
 MVN_LOGGING_OPTIONS="-Dlog.dir=${ARTIFACTS_DIR} -DlogBackupDir=${ARTIFACTS_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES"
 MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build"
@@ -243,7 +243,7 @@ e2e_modules=$(find flink-end-to-end-tests -mindepth 2 -maxdepth 5 -name 'pom.xml
 e2e_modules="${e2e_modules},$(find flink-walkthroughs -mindepth 2 -maxdepth 2 -name 'pom.xml' -printf '%h\n' | sort -u | tr '\n' ',')"
 
 PROFILE="$PROFILE -Pe2e-travis1 -Pe2e-travis2 -Pe2e-travis3 -Pe2e-travis4 -Pe2e-travis5 -Pe2e-travis6"
-run_mvn ${MVN_COMMON_OPTIONS} ${MVN_LOGGING_OPTIONS} ${PROFILE} verify -pl ${e2e_modules} -DdistDir=$(readlink -e build-target)
+run_mvn ${MVN_COMMON_OPTIONS} ${MVN_LOGGING_OPTIONS} ${PROFILE} verify -pl ${e2e_modules} -DdistDir=$(readlink -e build-target) -Dcache-dir=$E2E_CACHE_FOLDER -Dcache-ttl=P1M
 
 EXIT_CODE=$?
 
diff --git a/flink-end-to-end-tests/run-pre-commit-tests.sh b/flink-end-to-end-tests/run-pre-commit-tests.sh
deleted file mode 100755
index 94c9b9e..0000000
--- a/flink-end-to-end-tests/run-pre-commit-tests.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env bash
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-END_TO_END_DIR="`dirname \"$0\"`" # relative
-END_TO_END_DIR="`( cd \"$END_TO_END_DIR\" && pwd -P )`" # absolutized and normalized
-if [ -z "$END_TO_END_DIR" ] ; then
-    # error; for some reason, the path is not accessible
-    # to the script (e.g. permissions re-evaled after suid)
-    exit 1  # fail
-fi
-
-export END_TO_END_DIR
-
-if [ -z "$FLINK_DIR" ] ; then
-    echo "You have to export the Flink distribution directory as FLINK_DIR"
-    exit 1
-fi
-
-source ${END_TO_END_DIR}/test-scripts/test-runner-common.sh
-
-FLINK_DIR="`( cd \"$FLINK_DIR\" && pwd -P)`" # absolutized and normalized
-
-echo "flink-end-to-end-test directory: $END_TO_END_DIR"
-echo "Flink distribution directory: $FLINK_DIR"
-
-# Template for adding a test:
-# run_test "<description>" "$END_TO_END_DIR/test-scripts/<script_name>" ["skip_check_exceptions"]
-
-# IMPORTANT:
-# With the "skip_check_exceptions" flag one can disable default exceptions and errors checking in log files. This should be done
-# carefully though. A valid reasons for doing so could be e.g killing TMs randomly as we cannot predict what exception could be thrown. Whenever
-# those checks are disabled, one should take care that a proper checks are performed in the tests itself that ensure that the test finished
-# in an expected state.
-
-run_test "State Migration end-to-end test from 1.6" "$END_TO_END_DIR/test-scripts/test_state_migration.sh"
-run_test "State Evolution end-to-end test" "$END_TO_END_DIR/test-scripts/test_state_evolution.sh"
-run_test "Wordcount end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh file"
-run_test "Shaded Hadoop S3A end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh hadoop"
-run_test "Shaded Hadoop S3A end-to-end test (minio)" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh hadoop_minio"
-run_test "Shaded Presto S3 end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh presto"
-run_test "Shaded Presto S3 end-to-end test (minio)" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh presto_minio"
-run_test "Custom FS plugin end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh dummy-fs"
-
-run_test "Kinesis end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_kinesis.sh"
-run_test "class loading end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_classloader.sh"
-run_test "Distributed cache end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_distributed_cache_via_blob.sh"
-printf "\n[PASS] All tests passed\n"
-exit 0
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
index 4fbf89a..fca47e0 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
@@ -449,7 +449,7 @@ public abstract class YarnTestBase extends TestLogger {
 							}
 
 							if (!whitelistedFound) {
-								// logging in FATAL to see the actual message in TRAVIS tests.
+								// logging in FATAL to see the actual message in CI tests.
 								Marker fatal = MarkerFactory.getMarker("FATAL");
 								LOG.error(fatal, "Prohibited String '{}' in '{}:{}'", aProhibited, f.getAbsolutePath(), lineFromFile);
 
@@ -1048,10 +1048,10 @@ public abstract class YarnTestBase extends TestLogger {
 			hdfsSiteXML.delete();
 		}
 
-		// When we are on travis, we copy the temp files of JUnit (containing the MiniYARNCluster log files)
+		// When we are on CI, we copy the temp files of JUnit (containing the MiniYARNCluster log files)
 		// to <flinkRoot>/target/flink-yarn-tests-*.
 		// The files from there are picked up by the tools/ci/* scripts to upload them.
-		if (isOnTravis()) {
+		if (isOnCI()) {
 			File target = new File("../target" + YARN_CONFIGURATION.get(TEST_CLUSTER_NAME_KEY));
 			if (!target.mkdirs()) {
 				LOG.warn("Error creating dirs to {}", target);
@@ -1067,8 +1067,8 @@ public abstract class YarnTestBase extends TestLogger {
 
 	}
 
-	public static boolean isOnTravis() {
-		return System.getenv("TRAVIS") != null && System.getenv("TRAVIS").equals("true");
+	public static boolean isOnCI() {
+		return System.getenv("IS_CI") != null && System.getenv("IS_CI").equals("true");
 	}
 
 	protected void waitApplicationFinishedElseKillIt(
diff --git a/tools/azure-pipelines/azure_controller.sh b/tools/azure-pipelines/azure_controller.sh
deleted file mode 100755
index bbe2faa..0000000
--- a/tools/azure-pipelines/azure_controller.sh
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env bash
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-HERE="`dirname \"$0\"`"             # relative
-HERE="`( cd \"$HERE\" && pwd )`"    # absolutized and normalized
-if [ -z "$HERE" ] ; then
-    exit 1  # fail
-fi
-CI_DIR="$HERE/../ci"
-
-# source required ci scripts
-source "${CI_DIR}/stage.sh"
-source "${CI_DIR}/shade.sh"
-source "${CI_DIR}/maven-utils.sh"
-
-echo $M2_HOME
-echo $PATH
-echo $MAVEN_OPTS
-
-run_mvn -version
-echo "Commit: $(git rev-parse HEAD)"
-
-print_system_info() {
-    echo "CPU information"
-    lscpu
-
-    echo "Memory information"
-    cat /proc/meminfo
-
-    echo "Disk information"
-    df -hH
-
-    echo "Running build as"
-    whoami
-}
-
-print_system_info
-
-
-STAGE=$1
-echo "Current stage: \"$STAGE\""
-
-EXIT_CODE=0
-
-# Set up a custom Maven settings file, configuring an Google-hosted maven central
-# mirror. We use a different mirror because the official maven central mirrors
-# often lead to connection timeouts (probably due to rate-limiting)
-
-MVN="run_mvn clean install $MAVEN_OPTS -Dflink.convergence.phase=install -Pcheck-convergence -Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dmaven.javadoc.skip=true -U -DskipTests"
-
-# Run actual compile&test steps
-if [ $STAGE == "$STAGE_COMPILE" ]; then
-    # run mvn clean install:
-    $MVN
-    EXIT_CODE=$?
-
-    if [ $EXIT_CODE == 0 ]; then
-        echo "\n\n==============================================================================\n"
-        echo "Checking scala suffixes\n"
-        echo "==============================================================================\n"
-
-        ./tools/ci/verify_scala_suffixes.sh "${PROFILE}"
-        EXIT_CODE=$?
-    else
-        echo "\n==============================================================================\n"
-        echo "Previous build failure detected, skipping scala-suffixes check.\n"
-        echo "==============================================================================\n"
-    fi
-    
-    if [ $EXIT_CODE == 0 ]; then
-        check_shaded_artifacts
-        EXIT_CODE=$(($EXIT_CODE+$?))
-        check_shaded_artifacts_s3_fs hadoop
-        EXIT_CODE=$(($EXIT_CODE+$?))
-        check_shaded_artifacts_s3_fs presto
-        EXIT_CODE=$(($EXIT_CODE+$?))
-        check_shaded_artifacts_connector_elasticsearch 2
-        EXIT_CODE=$(($EXIT_CODE+$?))
-        check_shaded_artifacts_connector_elasticsearch 5
-        EXIT_CODE=$(($EXIT_CODE+$?))
-        check_shaded_artifacts_connector_elasticsearch 6
-        EXIT_CODE=$(($EXIT_CODE+$?))
-    else
-        echo "=============================================================================="
-        echo "Previous build failure detected, skipping shaded dependency check."
-        echo "=============================================================================="
-    fi
-
-    if [ $EXIT_CODE == 0 ]; then
-        echo "Creating cache build directory $CACHE_FLINK_DIR"
-    
-        cp -r . "$CACHE_FLINK_DIR"
-
-        function minimizeCachedFiles() {
-            # reduces the size of the cached directory to speed up
-            # the packing&upload / download&unpacking process
-            # by removing files not required for subsequent stages
-    
-            # jars are re-built in subsequent stages, so no need to cache them (cannot be avoided)
-            find "$CACHE_FLINK_DIR" -maxdepth 8 -type f -name '*.jar' \
-            ! -path "$CACHE_FLINK_DIR/flink-formats/flink-csv/target/flink-csv*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-formats/flink-json/target/flink-json*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-formats/flink-avro/target/flink-avro*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-runtime/target/flink-runtime*tests.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-streaming-java/target/flink-streaming-java*tests.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-dist*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-table_*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-table-blink*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/opt/flink-python*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/opt/flink-sql-client_*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-connectors/flink-connector-elasticsearch-base/target/flink-*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-connectors/flink-connector-kafka-base/target/flink-*.jar" \
-            ! -path "$CACHE_FLINK_DIR/flink-table/flink-table-planner/target/flink-table-planner*tests.jar" | xargs rm -rf
-    
-            # .git directory
-            # not deleting this can cause build stability issues
-            # merging the cached version sometimes fails
-            rm -rf "$CACHE_FLINK_DIR/.git"
-
-            # AZ Pipelines has a problem with links.
-            rm "$CACHE_FLINK_DIR/build-target"
-        }
-    
-        echo "Minimizing cache"
-        minimizeCachedFiles
-    else
-        echo "=============================================================================="
-        echo "Previous build failure detected, skipping cache setup."
-        echo "=============================================================================="
-    fi
-elif [ $STAGE != "$STAGE_CLEANUP" ]; then
-    if ! [ -e $CACHE_FLINK_DIR ]; then
-        echo "Cached flink dir $CACHE_FLINK_DIR does not exist. Exiting build."
-        exit 1
-    fi
-    # merged compiled flink into local clone
-    # this prevents the cache from being re-uploaded
-    echo "Merging cache"
-    cp -RT "$CACHE_FLINK_DIR" "."
-
-    echo "Adjusting timestamps"
-    # adjust timestamps to prevent recompilation
-    find . -type f -name '*.java' | xargs touch
-    find . -type f -name '*.scala' | xargs touch
-    # wait a bit for better odds of different timestamps
-    sleep 5
-    find . -type f -name '*.class' | xargs touch
-    find . -type f -name '*.timestamp' | xargs touch
-
-    if [ $STAGE == $STAGE_PYTHON ]; then
-        echo "=============================================================================="
-        echo "Python stage found. Re-compiling (this is required on Azure for the python tests to pass)"
-        echo "=============================================================================="
-        # run mvn install (w/o "clean"):
-        PY_MVN="${MVN// clean/}"
-        PY_MVN="$PY_MVN -Drat.skip=true"
-        ${PY_MVN}
-        EXIT_CODE=$?
-
-        if [ $EXIT_CODE != 0 ]; then
-            echo "=============================================================================="
-            echo "Compile error for python stage preparation. Exit code: $EXIT_CODE. Failing build"
-            echo "=============================================================================="
-            exit $EXIT_CODE
-        fi
-        
-        echo "Done compiling ... "
-    fi
-
-
-    TEST="$STAGE" "./tools/ci/ci_controller.sh" 900
-    EXIT_CODE=$?
-elif [ $STAGE == "$STAGE_CLEANUP" ]; then
-    echo "Cleaning up $CACHE_BUILD_DIR"
-    rm -rf "$CACHE_BUILD_DIR"
-else
-    echo "Invalid Stage specified: $STAGE"
-    exit 1
-fi
-
-# Exit code for Azure build success/failure
-exit $EXIT_CODE
diff --git a/tools/azure-pipelines/build-apache-repo.yml b/tools/azure-pipelines/build-apache-repo.yml
index dab6d95..fc0f80d 100644
--- a/tools/azure-pipelines/build-apache-repo.yml
+++ b/tools/azure-pipelines/build-apache-repo.yml
@@ -42,10 +42,11 @@ resources:
 
 variables:
   MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
+  E2E_CACHE_FOLDER: $(Pipeline.Workspace)/e2e_cache
   MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
   CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/**
   CACHE_FALLBACK_KEY: maven | $(Agent.OS)
-  CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache
+  FLINK_ARTIFACT_DIR: $(Pipeline.Workspace)/flink_artifact
   SECRET_S3_BUCKET: $[variables.IT_CASE_S3_BUCKET]
   SECRET_S3_ACCESS_KEY: $[variables.IT_CASE_S3_ACCESS_KEY]
   SECRET_S3_SECRET_KEY: $[variables.IT_CASE_S3_SECRET_KEY]
diff --git a/tools/azure-pipelines/build-python-wheels.yml b/tools/azure-pipelines/build-python-wheels.yml
index f4bc620..b2bdfa8 100644
--- a/tools/azure-pipelines/build-python-wheels.yml
+++ b/tools/azure-pipelines/build-python-wheels.yml
@@ -22,8 +22,10 @@ jobs:
       clean: all
     steps:
       # Compile
-      - script: STAGE=compile ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh compile
-        displayName: Build
+      - script: |
+          ${{parameters.environment}} ./tools/ci/compile.sh
+          ./tools/azure-pipelines/create_build_artifact.sh
+        displayName: Compile
 
       - script: |
           VERSION=$(mvn --file pom.xml org.apache.maven.plugins:maven-help-plugin:3.1.0:evaluate -Dexpression=project.version -q -DforceStdout)
@@ -38,8 +40,8 @@ jobs:
       # upload artifacts for building wheels
       - task: PublishPipelineArtifact@1
         inputs:
-          targetPath: $(Pipeline.Workspace)/flink.tar.gz
-          artifactName: FlinkCompileCacheDir-${{parameters.stage_name}}
+          path: $(FLINK_ARTIFACT_DIR)
+          artifact: FlinkCompileArtifact-${{parameters.stage_name}}
 
   - job: build_wheels
     dependsOn: compile_${{parameters.stage_name}}
@@ -58,7 +60,7 @@ jobs:
       - task: DownloadPipelineArtifact@2
         inputs:
           path: $(Pipeline.Workspace)
-          artifact: FlinkCompileCacheDir-${{parameters.stage_name}}
+          artifact: FlinkCompileArtifact-${{parameters.stage_name}}
       - script: |
           tar zxf $(Pipeline.Workspace)/flink.tar.gz -C $(Pipeline.Workspace)
           mkdir -p flink-dist/target/flink-$(VERSION)-bin
diff --git a/tools/azure-pipelines/create_build_artifact.sh b/tools/azure-pipelines/create_build_artifact.sh
new file mode 100755
index 0000000..6de32e9
--- /dev/null
+++ b/tools/azure-pipelines/create_build_artifact.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+echo "Creating build artifact dir $FLINK_ARTIFACT_DIR"
+
+cp -r . "$FLINK_ARTIFACT_DIR"
+
+echo "Minimizing artifact files"
+
+# reduces the size of the artifact directory to speed up
+# the packing&upload / download&unpacking process
+# by removing files not required for subsequent stages
+
+# jars are re-built in subsequent stages, so no need to cache them (cannot be avoided)
+find "$FLINK_ARTIFACT_DIR" -maxdepth 8 -type f -name '*.jar' | xargs rm -rf
+
+# .git directory
+# not deleting this can cause build stability issues
+# merging the cached version sometimes fails
+rm -rf "$FLINK_ARTIFACT_DIR/.git"
+
+# AZ Pipelines has a problem with links.
+rm "$FLINK_ARTIFACT_DIR/build-target"
+
diff --git a/tools/azure-pipelines/debug_files_utils.sh b/tools/azure-pipelines/debug_files_utils.sh
new file mode 100755
index 0000000..6c87192
--- /dev/null
+++ b/tools/azure-pipelines/debug_files_utils.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+function prepare_debug_files {
+	MODULE=$1
+	export DEBUG_FILES_OUTPUT_DIR="$AGENT_TEMPDIRECTORY/debug_files/"
+	export DEBUG_FILES_NAME="$(echo $MODULE | tr -dc '[:alnum:]\n\r')-$(date +%s)"
+	echo "##vso[task.setvariable variable=DEBUG_FILES_OUTPUT_DIR]$DEBUG_FILES_OUTPUT_DIR"
+	echo "##vso[task.setvariable variable=DEBUG_FILES_NAME]$DEBUG_FILES_NAME"
+	mkdir -p $DEBUG_FILES_OUTPUT_DIR || { echo "FAILURE: cannot create log directory '${DEBUG_FILES_OUTPUT_DIR}'." ; exit 1; }
+}
+
+function compress_debug_files {
+	echo "Compressing debug files"
+	tar -zcvf /tmp/$DEBUG_FILES_NAME.tgz -C $DEBUG_FILES_OUTPUT_DIR .
+	# clean directory
+	rm -rf $DEBUG_FILES_OUTPUT_DIR ; mkdir -p $DEBUG_FILES_OUTPUT_DIR
+	mv /tmp/$DEBUG_FILES_NAME.tgz $DEBUG_FILES_OUTPUT_DIR
+}
diff --git a/tools/azure-pipelines/jobs-template.yml b/tools/azure-pipelines/jobs-template.yml
index fbe5bd8..d8dc2d8 100644
--- a/tools/azure-pipelines/jobs-template.yml
+++ b/tools/azure-pipelines/jobs-template.yml
@@ -24,7 +24,8 @@ parameters:
 
 jobs:
 - job: compile_${{parameters.stage_name}}
-  condition: not(eq(variables['MODE'], 'e2e'))
+  # succeeded() is needed to allow job cancellation
+  condition: and(succeeded(), not(eq(variables['MODE'], 'e2e')))
   pool: ${{parameters.test_pool_definition}}
   container: ${{parameters.container}}
   timeoutInMinutes: 240
@@ -64,14 +65,16 @@ jobs:
     displayName: "Set to jdk11"
     condition: eq('${{parameters.jdk}}', 'jdk11')
   # Compile
-  - script: STAGE=compile ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh compile
-    displayName: Build
+  - script: |
+      ${{parameters.environment}} ./tools/ci/compile.sh || exit $?
+      ./tools/azure-pipelines/create_build_artifact.sh
+    displayName: Compile
 
   # upload artifacts for next stage
   - task: PublishPipelineArtifact@1
     inputs:
-      path: $(CACHE_FLINK_DIR)
-      artifact: FlinkCompileCacheDir-${{parameters.stage_name}}
+      targetPath: $(FLINK_ARTIFACT_DIR)
+      artifact: FlinkCompileArtifact-${{parameters.stage_name}}
 
 - job: test_${{parameters.stage_name}}
   dependsOn: compile_${{parameters.stage_name}}
@@ -107,11 +110,14 @@ jobs:
     condition: not(eq('${{parameters.test_pool_definition.name}}', 'Default'))
     displayName: Free up disk space
 
-  # download artifacts
+  # download artifact from compile stage
   - task: DownloadPipelineArtifact@2
     inputs:
-      path: $(CACHE_FLINK_DIR)
-      artifact: FlinkCompileCacheDir-${{parameters.stage_name}}
+      path: $(FLINK_ARTIFACT_DIR)
+      artifact: FlinkCompileArtifact-${{parameters.stage_name}}
+
+  - script: ./tools/azure-pipelines/unpack_build_artifact.sh
+    displayName: "Unpack Build artifact"
 
   - task: Cache@2
     inputs:
@@ -121,15 +127,25 @@ jobs:
     continueOnError: true # continue the build even if the cache fails.
     condition: not(eq('${{parameters.test_pool_definition.name}}', 'Default'))
     displayName: Cache Maven local repo
+
   - script: |
       echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_11_X64"
       echo "##vso[task.setvariable variable=PATH]$JAVA_HOME_11_X64/bin:$PATH"
     displayName: "Set to jdk11"
     condition: eq('${{parameters.jdk}}', 'jdk11')  
+
   - script: sudo sysctl -w kernel.core_pattern=core.%p
     displayName: Set coredump pattern
+
   # Test
-  - script: STAGE=test ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh $(module)
+  - script: |
+      source ./tools/azure-pipelines/debug_files_utils.sh
+      prepare_debug_files $(module)
+      
+      ${{parameters.environment}} ./tools/ci/test_controller.sh $(module) ; TEST_EXIT_CODE=$?
+    
+      compress_debug_files
+      exit $TEST_EXIT_CODE
     displayName: Test - $(module)
     env:
       IT_CASE_S3_BUCKET: $(SECRET_S3_BUCKET)
@@ -139,13 +155,14 @@ jobs:
   - task: PublishTestResults@2
     inputs:
       testResultsFormat: 'JUnit'
+
   # upload debug artifacts
   - task: PublishPipelineArtifact@1
-    condition: and(succeededOrFailed(), not(eq('$(ARTIFACT_DIR)', '')))
+    condition: and(succeededOrFailed(), not(eq('$(DEBUG_FILES_OUTPUT_DIR)', '')))
     displayName: Upload Logs
     inputs:
-      path: $(ARTIFACT_DIR)
-      artifact: logs-${{parameters.stage_name}}-$(ARTIFACT_NAME)
+      targetPath: $(DEBUG_FILES_OUTPUT_DIR)
+      artifact: logs-${{parameters.stage_name}}-$(DEBUG_FILES_NAME)
 
 - job: e2e_${{parameters.stage_name}}
   # uncomment below condition to run the e2e tests only on request.
@@ -164,6 +181,12 @@ jobs:
         path: $(MAVEN_CACHE_FOLDER)
       displayName: Cache Maven local repo
       continueOnError: true
+    - task: Cache@2
+      inputs:
+        key: e2e-cache | flink-end-to-end-tests/**/*.java, !**/avro/**
+        path: $(E2E_CACHE_FOLDER)
+      displayName: Cache E2E files
+      continueOnError: true
     - script: |
         echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_11_X64"
         echo "##vso[task.setvariable variable=PATH]$JAVA_HOME_11_X64/bin:$PATH"
@@ -178,11 +201,8 @@ jobs:
     - script: ./tools/azure-pipelines/free_disk_space.sh
       displayName: Free up disk space
     - script: sudo apt-get install -y bc
-    - script: ${{parameters.environment}} STAGE=compile ./tools/azure-pipelines/azure_controller.sh compile
+    - script: ${{parameters.environment}} ./tools/ci/compile.sh
       displayName: Build Flink
-    # TODO remove pre-commit tests script by adding the tests to the nightly script
-#    - script: FLINK_DIR=build-target ./flink-end-to-end-tests/run-pre-commit-tests.sh
-#      displayName: Test - precommit 
     - script: ${{parameters.environment}} FLINK_DIR=`pwd`/build-target flink-end-to-end-tests/run-nightly-tests.sh
       displayName: Run e2e tests
       env:
@@ -194,7 +214,7 @@ jobs:
       condition: and(succeededOrFailed(), not(eq(variables['ARTIFACT_DIR'], '')))
       displayName: Upload Logs
       inputs:
-        path: $(ARTIFACT_DIR)
+        targetPath: $(ARTIFACT_DIR)
         artifact: logs-${{parameters.stage_name}}-e2e
         
 
diff --git a/tools/azure-pipelines/prepare_precommit.sh b/tools/azure-pipelines/unpack_build_artifact.sh
similarity index 68%
rename from tools/azure-pipelines/prepare_precommit.sh
rename to tools/azure-pipelines/unpack_build_artifact.sh
index cac545b..1f2b7f0 100755
--- a/tools/azure-pipelines/prepare_precommit.sh
+++ b/tools/azure-pipelines/unpack_build_artifact.sh
@@ -15,10 +15,17 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 # limitations under the License.
+################################################################################
+
 
+if ! [ -e $FLINK_ARTIFACT_DIR ]; then
+    echo "Cached flink dir $FLINK_ARTIFACT_DIR does not exist. Exiting build."
+    exit 1
+fi
 
 echo "Merging cache"
-cp -RT "$CACHE_FLINK_DIR" "."
+cp -RT "$FLINK_ARTIFACT_DIR" "."
+
 echo "Adjusting timestamps"
 # adjust timestamps to prevent recompilation
 find . -type f -name '*.java' | xargs touch
@@ -28,20 +35,3 @@ sleep 5
 find . -type f -name '*.class' | xargs touch
 find . -type f -name '*.timestamp' | xargs touch
 
-
-export M2_HOME=/home/vsts/maven_cache/apache-maven-3.2.5/ 
-export PATH=/home/vsts/maven_cache/apache-maven-3.2.5/bin:$PATH
-run_mvn -version
-MVN_CALL="run_mvn install -DskipTests -Drat.skip"
-$MVN_CALL
-EXIT_CODE=$?
-
-if [ $EXIT_CODE != 0 ]; then
-	echo "=============================================================================="
-	echo "Build error. Exit code: $EXIT_CODE. Failing build"
-	echo "=============================================================================="
-	exit $EXIT_CODE
-fi
-
-chmod -R +x build-target
-chmod -R +x flink-end-to-end-tests
diff --git a/tools/ci/ci_controller.sh b/tools/ci/ci_controller.sh
deleted file mode 100755
index 90ee551..0000000
--- a/tools/ci/ci_controller.sh
+++ /dev/null
@@ -1,327 +0,0 @@
-#!/usr/bin/env bash
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-HERE="`dirname \"$0\"`"				# relative
-HERE="`( cd \"$HERE\" && pwd )`" 	# absolutized and normalized
-if [ -z "$HERE" ] ; then
-	# error; for some reason, the path is not accessible
-	# to the script (e.g. permissions re-evaled after suid)
-	exit 1  # fail
-fi
-
-source "${HERE}/stage.sh"
-source "${HERE}/maven-utils.sh"
-
-ARTIFACTS_DIR="${HERE}/artifacts"
-
-mkdir -p $ARTIFACTS_DIR || { echo "FAILURE: cannot create log directory '${ARTIFACTS_DIR}'." ; exit 1; }
-
-echo "Build for commit ${TRAVIS_COMMIT} of ${TRAVIS_REPO_SLUG} [build ID: ${TRAVIS_BUILD_ID}, job number: $TRAVIS_JOB_NUMBER]." | tee "${ARTIFACTS_DIR}/build_info"
-
-# =============================================================================
-# CONFIG
-# =============================================================================
-
-# Number of seconds w/o output before printing a stack trace and killing $MVN
-MAX_NO_OUTPUT=${1:-900}
-
-# Number of seconds to sleep before checking the output again
-SLEEP_TIME=20
-
-# Maximum times to retry uploading artifacts file to transfer.sh
-TRANSFER_UPLOAD_MAX_RETRIES=2
-
-# The delay between two retries to upload artifacts file to transfer.sh. The default exponential
-# backoff algorithm should be too long for the last several retries.
-TRANSFER_UPLOAD_RETRY_DELAY=5
-
-LOG4J_PROPERTIES=${HERE}/log4j-ci.properties
-
-PYTHON_TEST="./flink-python/dev/lint-python.sh"
-PYTHON_PID="${ARTIFACTS_DIR}/watchdog.python.pid"
-PYTHON_EXIT="${ARTIFACTS_DIR}/watchdog.python.exit"
-PYTHON_OUT="${ARTIFACTS_DIR}/python.out"
-
-MVN_COMPILE_MODULES=$(get_compile_modules_for_stage ${TEST})
-MVN_TEST_MODULES=$(get_test_modules_for_stage ${TEST})
-
-# Maven command to run. We set the forkCount manually, because otherwise Maven sees too many cores
-# on the Travis VMs. Set forkCountTestPackage to 1 for container-based environment (4 GiB memory)
-# and 2 for sudo-enabled environment (7.5 GiB memory).
-MVN_LOGGING_OPTIONS="-Dlog.dir=${ARTIFACTS_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES"
-MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build $MVN_LOGGING_OPTIONS"
-MVN_COMPILE_OPTIONS="-DskipTests"
-MVN_TEST_OPTIONS="-Dflink.tests.with-openssl"
-
-e2e_modules=$(find flink-end-to-end-tests -mindepth 2 -maxdepth 5 -name 'pom.xml' -printf '%h\n' | sort -u | tr '\n' ',')
-
-MVN_COMPILE="run_mvn $MVN_COMMON_OPTIONS $MVN_COMPILE_OPTIONS $PROFILE $MVN_COMPILE_MODULES install"
-MVN_TEST="run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS $PROFILE $MVN_TEST_MODULES verify"
-# don't move the e2e-pre-commit profile activation into the misc entry in .travis.yml, since it breaks caching
-MVN_E2E="run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS -Pe2e-pre-commit -pl ${e2e_modules},flink-dist verify"
-
-MVN_PID="${ARTIFACTS_DIR}/watchdog.mvn.pid"
-MVN_EXIT="${ARTIFACTS_DIR}/watchdog.mvn.exit"
-MVN_OUT="${ARTIFACTS_DIR}/mvn.out"
-
-TRACE_OUT="${ARTIFACTS_DIR}/jps-traces.out"
-
-# E.g. travis-artifacts/apache/flink/1595/1595.1
-UPLOAD_TARGET_PATH="travis-artifacts/${TRAVIS_REPO_SLUG}/${TRAVIS_BUILD_NUMBER}/"
-# These variables are stored as secure variables in '.travis.yml', which are generated per repo via
-# the travis command line tool.
-UPLOAD_BUCKET=$ARTIFACTS_AWS_BUCKET
-UPLOAD_ACCESS_KEY=$ARTIFACTS_AWS_ACCESS_KEY
-UPLOAD_SECRET_KEY=$ARTIFACTS_AWS_SECRET_KEY
-
-ARTIFACTS_FILE=${TRAVIS_JOB_NUMBER}.tar.gz
-
-if [ ! -z "$TF_BUILD" ] ; then
-	# set proper artifacts file name on Azure Pipelines
-	ARTIFACTS_FILE=${BUILD_BUILDNUMBER}.tar.gz
-fi
-
-# enable coredumps
-ulimit -c unlimited
-export JAVA_TOOL_OPTIONS="-XX:+HeapDumpOnOutOfMemoryError"
-
-if [ $TEST == $STAGE_PYTHON ]; then
-	CMD=$PYTHON_TEST
-	CMD_PID=$PYTHON_PID
-	CMD_OUT=$PYTHON_OUT
-	CMD_EXIT=$PYTHON_EXIT
-	CMD_TYPE="PYTHON"
-else
-	CMD=$MVN_COMPILE
-	CMD_PID=$MVN_PID
-	CMD_OUT=$MVN_OUT
-	CMD_EXIT=$MVN_EXIT
-	CMD_TYPE="MVN"
-fi
-
-# =============================================================================
-# FUNCTIONS
-# =============================================================================
-
-upload_artifacts_s3() {
-	echo "PRODUCED build artifacts."
-
-	ls $ARTIFACTS_DIR
-
-	echo "COMPRESSING build artifacts."
-
-	cd $ARTIFACTS_DIR
-	dmesg > container.log
-	tar -zcvf $ARTIFACTS_FILE *
-
-	# Upload to secured S3
-	if [ -n "$UPLOAD_BUCKET" ] && [ -n "$UPLOAD_ACCESS_KEY" ] && [ -n "$UPLOAD_SECRET_KEY" ]; then
-
-		# Install artifacts tool
-		curl -sL https://raw.githubusercontent.com/travis-ci/artifacts/master/install | bash
-
-		PATH=$HOME/bin/artifacts:$HOME/bin:$PATH
-
-		echo "UPLOADING build artifacts."
-
-		# Upload everything in $ARTIFACTS_DIR. Use relative path, otherwise the upload tool
-		# re-creates the whole directory structure from root.
-		artifacts upload --bucket $UPLOAD_BUCKET --key $UPLOAD_ACCESS_KEY --secret $UPLOAD_SECRET_KEY --target-paths $UPLOAD_TARGET_PATH $ARTIFACTS_FILE
-	fi
-
-	# On Azure, publish ARTIFACTS_FILE as a build artifact
-	if [ ! -z "$TF_BUILD" ] ; then
-		TIMESTAMP=`date +%s` # append timestamp to name to allow multiple uploads for the same module
-		ARTIFACT_DIR="$(pwd)/artifact-dir"
-		mkdir $ARTIFACT_DIR
-		cp $ARTIFACTS_FILE $ARTIFACT_DIR/
-		
-		echo "##vso[task.setvariable variable=ARTIFACT_DIR]$ARTIFACT_DIR"
-		echo "##vso[task.setvariable variable=ARTIFACT_NAME]$(echo $MODULE | tr -dc '[:alnum:]\n\r')-$TIMESTAMP"
-	fi
-
-	# upload to https://transfer.sh
-	echo "Uploading to transfer.sh"
-	curl --retry ${TRANSFER_UPLOAD_MAX_RETRIES} --retry-delay ${TRANSFER_UPLOAD_RETRY_DELAY} --upload-file $ARTIFACTS_FILE --max-time 60 https://transfer.sh
-}
-
-print_stacktraces () {
-	echo "=============================================================================="
-	echo "The following Java processes are running (JPS)"
-	echo "=============================================================================="
-
-	jps
-
-	local pids=( $(jps | awk '{print $1}') )
-
-	for pid in "${pids[@]}"; do
-		echo "=============================================================================="
-		echo "Printing stack trace of Java process ${pid}"
-		echo "=============================================================================="
-
-		jstack $pid
-	done
-}
-
-# locate YARN logs and put them into artifacts directory
-put_yarn_logs_to_artifacts() {
-	# Make sure to be in project root
-	cd $HERE/../
-	for file in `find ./flink-yarn-tests/target -type f -name '*.log'`; do
-		TARGET_FILE=`echo "$file" | grep -Eo "container_[0-9_]+/(.*).log"`
-		TARGET_DIR=`dirname	 "$TARGET_FILE"`
-		mkdir -p "$ARTIFACTS_DIR/yarn-tests/$TARGET_DIR"
-		cp $file "$ARTIFACTS_DIR/yarn-tests/$TARGET_FILE"
-	done
-}
-
-mod_time () {
-	if [[ `uname` == 'Darwin' ]]; then
-		eval $(stat -s $CMD_OUT)
-		echo $st_mtime
-	else
-		echo `stat -c "%Y" $CMD_OUT`
-	fi
-}
-
-the_time() {
-	echo `date +%s`
-}
-
-# =============================================================================
-# WATCHDOG
-# =============================================================================
-
-watchdog () {
-	touch $CMD_OUT
-
-	while true; do
-		sleep $SLEEP_TIME
-
-		time_diff=$((`the_time` - `mod_time`))
-
-		if [ $time_diff -ge $MAX_NO_OUTPUT ]; then
-			echo "=============================================================================="
-			echo "Maven produced no output for ${MAX_NO_OUTPUT} seconds."
-			echo "=============================================================================="
-
-			print_stacktraces | tee $TRACE_OUT
-
-			# Kill $CMD and all descendants
-			pkill -P $(<$CMD_PID)
-
-			exit 1
-		fi
-	done
-}
-
-run_with_watchdog() {
-	local cmd="$1"
-
-	watchdog &
-	WD_PID=$!
-	echo "STARTED watchdog (${WD_PID})."
-
-	# Make sure to be in project root
-	cd "$HERE/../"
-
-	echo "RUNNING '${cmd}'."
-
-	# Run $CMD and pipe output to $CMD_OUT for the watchdog. The PID is written to $CMD_PID to
-	# allow the watchdog to kill $CMD if it is not producing any output anymore. $CMD_EXIT contains
-	# the exit code. This is important for Travis' build life-cycle (success/failure).
-	( $cmd & PID=$! ; echo $PID >&3 ; wait $PID ; echo $? >&4 ) 3>$CMD_PID 4>$CMD_EXIT | tee $CMD_OUT
-
-	EXIT_CODE=$(<$CMD_EXIT)
-
-	echo "${CMD_TYPE} exited with EXIT CODE: ${EXIT_CODE}."
-
-	# Make sure to kill the watchdog in any case after $CMD has completed
-	echo "Trying to KILL watchdog (${WD_PID})."
-	( kill $WD_PID 2>&1 ) > /dev/null
-
-	rm $CMD_PID
-	rm $CMD_EXIT
-}
-
-run_with_watchdog "$CMD"
-
-# Run tests if compilation was successful
-if [ $CMD_TYPE == "MVN" ]; then
-	if [ $EXIT_CODE == 0 ]; then
-		run_with_watchdog "$MVN_TEST"
-	else
-		echo "=============================================================================="
-		echo "Compilation failure detected, skipping test execution."
-		echo "=============================================================================="
-	fi
-fi
-
-# Post
-
-# only misc builds flink-dist and flink-yarn-tests
-case $TEST in
-	(misc)
-		put_yarn_logs_to_artifacts
-	;;
-esac
-
-collect_coredumps `pwd` $ARTIFACTS_DIR
-
-upload_artifacts_s3
-
-# since we are in flink/tools/artifacts
-# we are going back to
-cd ../../
-
-# only run end-to-end tests in misc because we only have flink-dist here
-case $TEST in
-    (misc)
-        # If we are not on Azure (we are on Travis) run precommit tests in misc stage.
-        # On Azure, we run them in a separate job
-        if [ -z "$TF_BUILD" ] ; then
-            if [ $EXIT_CODE == 0 ]; then
-                echo "\n\n==============================================================================\n"
-                echo "Running bash end-to-end tests\n"
-                echo "==============================================================================\n"
-
-                FLINK_DIR=build-target flink-end-to-end-tests/run-pre-commit-tests.sh
-
-                EXIT_CODE=$?
-            else
-                echo "\n==============================================================================\n"
-                echo "Previous build failure detected, skipping bash end-to-end tests.\n"
-                echo "==============================================================================\n"
-            fi
-	        if [ $EXIT_CODE == 0 ]; then
-	            echo "\n\n==============================================================================\n"
-	            echo "Running java end-to-end tests\n"
-	            echo "==============================================================================\n"
-
-	            run_with_watchdog "$MVN_E2E -DdistDir=$(readlink -e build-target)"
-	        else
-	            echo "\n==============================================================================\n"
-	            echo "Previous build failure detected, skipping java end-to-end tests.\n"
-	        fi
-	    fi
-    ;;
-esac
-
-# Exit code for Travis build success/failure
-exit $EXIT_CODE
diff --git a/tools/ci/compile.sh b/tools/ci/compile.sh
new file mode 100755
index 0000000..b0b4803
--- /dev/null
+++ b/tools/ci/compile.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+#
+# This file contains tooling for compiling Flink
+#
+
+HERE="`dirname \"$0\"`"             # relative
+HERE="`( cd \"$HERE\" && pwd )`"    # absolutized and normalized
+if [ -z "$HERE" ] ; then
+    exit 1  # fail
+fi
+CI_DIR="$HERE/../ci"
+
+# source required ci scripts
+source "${CI_DIR}/stage.sh"
+source "${CI_DIR}/shade.sh"
+source "${CI_DIR}/maven-utils.sh"
+
+echo "Maven version:"
+run_mvn -version
+
+echo "=============================================================================="
+echo "Compiling Flink"
+echo "=============================================================================="
+
+EXIT_CODE=0
+
+run_mvn clean install $MAVEN_OPTS -Dflink.convergence.phase=install -Pcheck-convergence -Dflink.forkCount=2 \
+    -Dflink.forkCountTestPackage=2 -Dmaven.javadoc.skip=true -U -DskipTests
+
+EXIT_CODE=$?
+
+if [ $EXIT_CODE == 0 ]; then
+    echo "=============================================================================="
+    echo "Checking scala suffixes"
+    echo "=============================================================================="
+
+    ${CI_DIR}/verify_scala_suffixes.sh "${PROFILE}"
+    EXIT_CODE=$?
+else
+    echo "=============================================================================="
+    echo "Previous build failure detected, skipping scala-suffixes check."
+    echo "=============================================================================="
+fi
+
+if [ $EXIT_CODE == 0 ]; then
+    check_shaded_artifacts
+    EXIT_CODE=$(($EXIT_CODE+$?))
+    check_shaded_artifacts_s3_fs hadoop
+    EXIT_CODE=$(($EXIT_CODE+$?))
+    check_shaded_artifacts_s3_fs presto
+    EXIT_CODE=$(($EXIT_CODE+$?))
+    check_shaded_artifacts_connector_elasticsearch 5
+    EXIT_CODE=$(($EXIT_CODE+$?))
+    check_shaded_artifacts_connector_elasticsearch 6
+    EXIT_CODE=$(($EXIT_CODE+$?))
+else
+    echo "=============================================================================="
+    echo "Previous build failure detected, skipping shaded dependency check."
+    echo "=============================================================================="
+fi
+
+exit $EXIT_CODE
+
diff --git a/tools/ci/controller_utils.sh b/tools/ci/controller_utils.sh
new file mode 100644
index 0000000..892d5fa
--- /dev/null
+++ b/tools/ci/controller_utils.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+print_system_info() {
+    echo "CPU information"
+    lscpu
+
+    echo "Memory information"
+    cat /proc/meminfo
+
+    echo "Disk information"
+    df -hH
+
+    echo "Running build as"
+    whoami
+}
+
+# locate YARN logs and put them into artifacts directory
+put_yarn_logs_to_artifacts() {
+	for file in `find ./flink-yarn-tests/target -type f -name '*.log'`; do
+		TARGET_FILE=`echo "$file" | grep -Eo "container_[0-9_]+/(.*).log"`
+		TARGET_DIR=`dirname	 "$TARGET_FILE"`
+		mkdir -p "$DEBUG_FILES_OUTPUT_DIR/yarn-tests/$TARGET_DIR"
+		cp $file "$DEBUG_FILES_OUTPUT_DIR/yarn-tests/$TARGET_FILE"
+	done
+}
+
+print_stacktraces () {
+	echo "=============================================================================="
+	echo "The following Java processes are running (JPS)"
+	echo "=============================================================================="
+
+	JAVA_PROCESSES=`jps`
+	echo $JAVA_PROCESSES
+
+	local pids=( $(echo $JAVA_PROCESSES | awk '{print $1}') )
+
+	for pid in "${pids[@]}"; do
+		echo "=============================================================================="
+		echo "Printing stack trace of Java process ${pid}"
+		echo "=============================================================================="
+
+		jstack $pid
+	done
+}
+
diff --git a/tools/ci/log4j-ci.properties b/tools/ci/log4j.properties
similarity index 100%
rename from tools/ci/log4j-ci.properties
rename to tools/ci/log4j.properties
diff --git a/tools/ci/maven-utils.sh b/tools/ci/maven-utils.sh
index c850572..f3be22a 100755
--- a/tools/ci/maven-utils.sh
+++ b/tools/ci/maven-utils.sh
@@ -73,7 +73,7 @@ function collect_coredumps {
 	echo "Searching for .dump, .dumpstream and related files in '$SEARCHDIR'"
 	for file in `find $SEARCHDIR -type f -regextype posix-extended -iregex '.*\.hprof|.*\.dump|.*\.dumpstream|.*hs.*\.log|.*/core(.[0-9]+)?$'`; do
 		echo "Moving '$file' to target directory ('$TARGET_DIR')"
-		mv $file $TARGET_DIR/
+		mv $file $TARGET_DIR/$(echo $file | tr "/" "-")
 	done
 }
 
diff --git a/tools/ci/shade.sh b/tools/ci/shade.sh
old mode 100644
new mode 100755
diff --git a/tools/ci/stage.sh b/tools/ci/stage.sh
old mode 100644
new mode 100755
index e431ebe..401f39f
--- a/tools/ci/stage.sh
+++ b/tools/ci/stage.sh
@@ -158,6 +158,10 @@ function get_compile_modules_for_stage() {
             # the negation takes precedence, thus not all required modules would be built
             echo ""
         ;;
+        (${STAGE_PYTHON})
+            # compile everything for PyFlink.
+            echo ""
+        ;;
     esac
 }
 
diff --git a/tools/ci/test_controller.sh b/tools/ci/test_controller.sh
new file mode 100755
index 0000000..8025e75
--- /dev/null
+++ b/tools/ci/test_controller.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+#
+# This file contains generic control over the test execution.
+#
+
+HERE="`dirname \"$0\"`"             # relative
+HERE="`( cd \"$HERE\" && pwd )`"    # absolutized and normalized
+if [ -z "$HERE" ] ; then
+	exit 1
+fi
+
+source "${HERE}/stage.sh"
+source "${HERE}/maven-utils.sh"
+source "${HERE}/controller_utils.sh"
+source "${HERE}/watchdog.sh"
+STAGE=$1
+
+# =============================================================================
+# Step 0: Check & print environment information & configure env
+# =============================================================================
+
+# check preconditions
+if [ -z "$DEBUG_FILES_OUTPUT_DIR" ] ; then
+	echo "ERROR: Environment variable 'DEBUG_FILES_OUTPUT_DIR' is not set but expected by test_controller.sh. Tests may use this location to store debugging files."
+	exit 1
+fi
+
+if [ ! -d "$DEBUG_FILES_OUTPUT_DIR" ] ; then
+	echo "ERROR: Environment variable DEBUG_FILES_OUTPUT_DIR=$DEBUG_FILES_OUTPUT_DIR points to a directory that does not exist"
+	exit 1
+fi
+
+if [ -z "$STAGE" ] ; then
+	echo "ERROR: Environment variable 'STAGE' is not set but expected by test_controller.sh. THe variable refers to the stage being executed."
+	exit 1
+fi
+
+echo "Printing environment information"
+
+echo "PATH=$PATH"
+run_mvn -version
+echo "Commit: $(git rev-parse HEAD)"
+print_system_info
+
+# enable coredumps for this process
+ulimit -c unlimited
+
+# configure JVMs to produce heap dumps
+export JAVA_TOOL_OPTIONS="-XX:+HeapDumpOnOutOfMemoryError"
+
+# some tests provide additional logs if they find this variable
+export IS_CI=true
+
+# =============================================================================
+# Step 1: Rebuild jars and install Flink to local maven repository
+# =============================================================================
+
+LOG4J_PROPERTIES=${HERE}/log4j.properties
+MVN_LOGGING_OPTIONS="-Dlog.dir=${DEBUG_FILES_OUTPUT_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES"
+
+MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build $MVN_LOGGING_OPTIONS"
+MVN_COMPILE_OPTIONS="-DskipTests"
+MVN_COMPILE_MODULES=$(get_compile_modules_for_stage ${STAGE})
+
+CALLBACK_ON_TIMEOUT="print_stacktraces | tee ${DEBUG_FILES_OUTPUT_DIR}/jps-traces.out"
+run_with_watchdog "run_mvn $MVN_COMMON_OPTIONS $MVN_COMPILE_OPTIONS $PROFILE $MVN_COMPILE_MODULES install" $CALLBACK_ON_TIMEOUT
+EXIT_CODE=$?
+
+if [ $EXIT_CODE != 0 ]; then
+	echo "=============================================================================="
+	echo "Compilation failure detected, skipping test execution."
+	echo "=============================================================================="
+	exit $EXIT_CODE
+fi
+
+
+# =============================================================================
+# Step 2: Run tests
+# =============================================================================
+
+if [ $STAGE == $STAGE_PYTHON ]; then
+	run_with_watchdog "./flink-python/dev/lint-python.sh" $CALLBACK_ON_TIMEOUT
+	EXIT_CODE=$?
+else
+	MVN_TEST_OPTIONS="-Dflink.tests.with-openssl"
+	MVN_TEST_MODULES=$(get_test_modules_for_stage ${STAGE})
+
+	run_with_watchdog "run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS $PROFILE $MVN_TEST_MODULES verify" $CALLBACK_ON_TIMEOUT
+	EXIT_CODE=$?
+fi
+
+# =============================================================================
+# Step 3: Put extra logs into $DEBUG_FILES_OUTPUT_DIR
+# =============================================================================
+
+# only misc builds flink-yarn-tests
+case $STAGE in
+	(misc)
+		put_yarn_logs_to_artifacts
+	;;
+esac
+
+collect_coredumps $(pwd) $DEBUG_FILES_OUTPUT_DIR
+
+# Exit code for CI build success/failure
+exit $EXIT_CODE
diff --git a/tools/ci/watchdog.sh b/tools/ci/watchdog.sh
new file mode 100755
index 0000000..1e03430
--- /dev/null
+++ b/tools/ci/watchdog.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+#
+# This file contains a watchdog tool to monitor a task and potentially kill it after
+# not producing any output for $MAX_NO_OUTPUT seconds.
+#
+
+# Number of seconds w/o output before printing a stack trace and killing the watched process
+MAX_NO_OUTPUT=${MAX_NO_OUTPUT:-900}
+
+# Number of seconds to sleep before checking the output again
+SLEEP_TIME=${SLEEP_TIME:-20}
+
+# Internal fields
+CMD_OUT="/tmp/watchdog.out"
+CMD_PID="/tmp/watchdog.pid"
+CMD_EXIT="/tmp/watchdog.exit"
+
+
+# =============================================
+# Utility functions
+# ============================================= 
+
+mod_time () {
+	echo `stat -c "%Y" $CMD_OUT`
+}
+
+the_time() {
+	echo `date +%s`
+}
+
+# watchdog process
+
+watchdog () {
+	touch $CMD_OUT
+
+	while true; do
+		sleep $SLEEP_TIME
+
+		time_diff=$((`the_time` - `mod_time`))
+
+		if [ $time_diff -ge $MAX_NO_OUTPUT ]; then
+			echo "=============================================================================="
+			echo "Process produced no output for ${MAX_NO_OUTPUT} seconds."
+			echo "=============================================================================="
+
+			# run timeout callback
+			$CALLBACK_ON_TIMEOUT
+
+			echo "Killing process with pid=$(<$CMD_PID) and all descendants"
+			pkill -P $(<$CMD_PID) # kill descendants
+			kill $(<$CMD_PID) # kill process itself
+
+			exit 1
+		fi
+	done
+}
+
+
+# =============================================
+# main function
+# =============================================
+
+# entrypoint
+function run_with_watchdog() {
+	local cmd="$1"
+	local CALLBACK_ON_TIMEOUT="$2"
+
+	watchdog &
+	WD_PID=$!
+	echo "STARTED watchdog (${WD_PID})."
+
+	echo "RUNNING '${cmd}'."
+
+	# Run $CMD and pipe output to $CMD_OUT for the watchdog. The PID is written to $CMD_PID to
+	# allow the watchdog to kill $CMD if it is not producing any output anymore. $CMD_EXIT contains
+	# the exit code. This is important for CI build life-cycle (success/failure).
+	( $cmd & PID=$! ; echo $PID >&3 ; wait $PID ; echo $? >&4 ) 3>$CMD_PID 4>$CMD_EXIT | tee $CMD_OUT
+
+	EXIT_CODE=$(<$CMD_EXIT)
+
+	echo "Process exited with EXIT CODE: ${EXIT_CODE}."
+
+	# Make sure to kill the watchdog in any case after $CMD has completed
+	echo "Trying to KILL watchdog (${WD_PID})."
+	( kill $WD_PID 2>&1 ) > /dev/null
+
+	rm $CMD_PID
+	rm $CMD_EXIT
+
+	return $EXIT_CODE
+}
+
+