You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by ch...@apache.org on 2021/05/11 08:23:55 UTC

[flink] 02/02: [FLINK-22566][test] Adds log extraction for the worker nodes

This is an automated email from the ASF dual-hosted git repository.

chesnay pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b5a9336b9b93920708a6ca4126d4035bb165bab5
Author: Matthias Pohl <ma...@ververica.com>
AuthorDate: Fri May 7 14:53:07 2021 +0200

    [FLINK-22566][test] Adds log extraction for the worker nodes
    
    We struggled to get the logs of the node manager which made it hard to
    investigate FLINK-22566 where there was a lag between setting up the YARN
    containers and starting the TaskExecutor. Hopefully, the nodemanager logs
    located on the worker nodes will help next time to investigate something like
    that.
---
 .../test-scripts/common_yarn_docker.sh             | 40 ++++++++++++++++------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/flink-end-to-end-tests/test-scripts/common_yarn_docker.sh b/flink-end-to-end-tests/test-scripts/common_yarn_docker.sh
index 95d3882..f640976 100755
--- a/flink-end-to-end-tests/test-scripts/common_yarn_docker.sh
+++ b/flink-end-to-end-tests/test-scripts/common_yarn_docker.sh
@@ -144,19 +144,13 @@ function debug_copy_and_show_logs {
     echo -e "\n\nCurrently running JVMs"
     jps -v
 
-    echo -e "\n\nHadoop logs:"
-    mkdir -p $TEST_DATA_DIR/logs
-    docker cp master:/var/log/hadoop/ $TEST_DATA_DIR/logs/
-    ls -lisah $TEST_DATA_DIR/logs/hadoop
-    for f in $TEST_DATA_DIR/logs/hadoop/*; do
-        echo "$f:"
-        cat $f
-    done
+    local log_directory="$TEST_DATA_DIR/logs"
+    local yarn_docker_containers="master $(docker ps --format '{{.Names}}' | grep worker)"
 
-    echo -e "\n\nDocker logs:"
-    docker logs master
+    extract_hadoop_logs ${log_directory} ${yarn_docker_containers}
+    print_logs ${log_directory}
 
-    echo -e "\n\nFlink logs:"
+    echo -e "\n\n ==== Flink logs ===="
     docker exec master bash -c "kinit -kt /home/hadoop-user/hadoop-user.keytab hadoop-user"
     docker exec master bash -c "yarn application -list -appStates ALL"
     application_id=`docker exec master bash -c "yarn application -list -appStates ALL" | grep -i "Flink" | grep -i "cluster" | awk '{print \$1}'`
@@ -167,6 +161,30 @@ function debug_copy_and_show_logs {
     docker exec master bash -c "kdestroy"
 }
 
+function extract_hadoop_logs() {
+    local parent_folder="$1"
+    shift
+    docker_container_aliases="$@"
+
+    for docker_container_alias in $(echo ${docker_container_aliases}); do
+        local target_container_log_folder="${parent_folder}/${docker_container_alias}"
+        echo "Extracting ${docker_container_alias} Hadoop logs into ${target_container_log_folder}"
+        mkdir -p "${target_container_log_folder}"
+        docker cp "${docker_container_alias}:/var/log/hadoop/" "${target_container_log_folder}"
+
+        local target_container_docker_log_file="${target_container_log_folder}/docker-${docker_container_alias}.log"
+        echo "Extracting ${docker_container_alias} Docker logs into ${target_container_docker_log_file}"
+        docker logs "${docker_container_alias}" > "${target_container_docker_log_file}"
+    done
+}
+
+function print_logs() {
+    local parent_folder="$1"
+
+    ls -lisahR "${parent_folder}"
+    find "${parent_folder}" -type f -exec echo -e "\n\nContent of {}:" \; -exec cat {} \;
+}
+
 # expects only one application to be running and waits until this one is in
 # final state SUCCEEDED
 function wait_for_single_yarn_application {