You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by tzulitai <gi...@git.apache.org> on 2018/05/14 06:32:42 UTC
[GitHub] flink pull request #5676: [FLINK-8910][tests] Automated end-to-end test for ...
Github user tzulitai commented on a diff in the pull request:
https://github.com/apache/flink/pull/5676#discussion_r187843886
--- Diff: flink-end-to-end-tests/test-scripts/test_local_recovery_and_scheduling.sh ---
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+source "$(dirname "$0")"/common.sh
+
+# This function checks the logs for entries that indicate problems with local recovery
+function check_logs {
+ local parallelism=$1
+ local attempts=$2
+ (( expected_count=parallelism * (attempts + 1) ))
+
+ # Search for the log message that indicates restore problem from existing local state for the keyed backend.
+ local failed_local_recovery=$(grep '^.*Creating keyed state backend.* from alternative (2/2)\.$' $FLINK_DIR/log/* | wc -l | tr -d ' ')
+
+ # Search for attempts to recover locally.
+ local attempt_local_recovery=$(grep '^.*Creating keyed state backend.* from alternative (1/2)\.$' $FLINK_DIR/log/* | wc -l | tr -d ' ')
+
+ if [ ${failed_local_recovery} -ne 0 ]
+ then
+ PASS=""
+ echo "FAILURE: Found ${failed_local_recovery} failed attempt(s) for local recovery of correctly scheduled task(s)."
+ fi
+
+ if [ ${attempt_local_recovery} -eq 0 ]
+ then
+ PASS=""
+ echo "FAILURE: Found no attempt for local recovery. Configuration problem?"
+ fi
+}
+
+# This function does a cleanup after the test. The configuration is restored, the watchdog is terminated and temporary
+# files and folders are deleted.
+function cleanup_after_test {
+ # Reset the configurations
+ sed -i -e 's/state.backend.local-recovery: .*//' "$FLINK_DIR/conf/flink-conf.yaml"
+ sed -i -e 's/log4j.rootLogger=.*/log4j.rootLogger=INFO, file/' "$FLINK_DIR/conf/log4j.properties"
+ #
+ kill ${watchdog_pid} 2> /dev/null
+ wait ${watchdog_pid} 2> /dev/null
+ #
+ cleanup
+}
+
+# Calls the cleanup step for this tests and exits with an error.
+function cleanup_after_test_and_exit_fail {
+ cleanup_after_test
+ exit 1
+}
+
+## This function executes one run for a certain configuration
+function run_local_recovery_test {
+ local parallelism=$1
+ local max_attempts=$2
+ local backend=$3
+ local incremental=$4
+ local kill_jvm=$5
+
+ echo "Running local recovery test on ${backend} backend: incremental checkpoints = ${incremental}, kill JVM = ${kill_jvm}."
+ TEST_PROGRAM_JAR=$TEST_INFRA_DIR/../../flink-end-to-end-tests/flink-local-recovery-and-allocation-test/target/StickyAllocationAndLocalRecoveryTestJob.jar
+
+ # Enable debug logging
+ sed -i -e 's/log4j.rootLogger=.*/log4j.rootLogger=DEBUG, file/' "$FLINK_DIR/conf/log4j.properties"
+
+ # Enable local recovery
+ sed -i -e 's/state.backend.local-recovery: .*//' "$FLINK_DIR/conf/flink-conf.yaml"
+ echo "state.backend.local-recovery: ENABLE_FILE_BASED" >> "$FLINK_DIR/conf/flink-conf.yaml"
--- End diff --
FYI: we now have a `change_conf` method in `common.sh` to modify Flink configuration in e2e tests
---