You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by al...@apache.org on 2018/08/01 15:30:42 UTC

[flink] 04/04: [FLINK-10016] Make YARN/Kerberos end-to-end test stricter

This is an automated email from the ASF dual-hosted git repository.

aljoscha pushed a commit to branch release-1.5
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b075ad899a7f797f8583556cdb28eea870176c96
Author: Aljoscha Krettek <al...@gmail.com>
AuthorDate: Wed Jul 18 13:46:29 2018 +0200

    [FLINK-10016] Make YARN/Kerberos end-to-end test stricter
    
    This change ensures that Flink containers are spread across the two
    available NMs. Before, it could happen that all containers are scheduled
    on one NM, which wouldn't trigger FLINK-8286.
    
    This also extends logging output and reduces the slot wait time.
---
 .../docker-hadoop-secure-cluster/config/yarn-site.xml     | 15 +++++++++++++++
 .../test-scripts/test_yarn_kerberos_docker.sh             | 13 ++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/config/yarn-site.xml b/flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/config/yarn-site.xml
index 62bea95..9b17acc 100644
--- a/flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/config/yarn-site.xml
+++ b/flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/config/yarn-site.xml
@@ -21,11 +21,26 @@ under the License.
         <value>mapreduce_shuffle</value>
     </property>
 
+    <!-- this is ignored by the default scheduler but we have to set it because Flink would
+    complain if we didn't have it -->
     <property>
         <name>yarn.nodemanager.resource.cpu-vcores</name>
         <value>1</value>
     </property>
 
+    <!-- the combination of this and the next setting ensures that the YARN/Kerberos test starts
+    containers on all available NMs. If the memory is to big it could happen that all containers
+    are scheduled on one NM, which wouldn't provoke a previously fixed Kerberos keytab bug. -->
+    <property>
+        <name>yarn.nodemanager.resource.memory-mb</name>
+        <value>4100</value>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.minimum-allocation-mb</name>
+        <value>2000</value>
+    </property>
+
     <property>
       <name>yarn.application.classpath</name>
       <value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
diff --git a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh
index e7726c9..c9ef15d 100755
--- a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh
+++ b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh
@@ -72,6 +72,7 @@ docker exec -it master bash -c "tar xzf /home/hadoop-user/$FLINK_TARBALL --direc
 # minimal Flink config, bebe
 docker exec -it master bash -c "echo \"security.kerberos.login.keytab: /home/hadoop-user/hadoop-user.keytab\" > /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
 docker exec -it master bash -c "echo \"security.kerberos.login.principal: hadoop-user\" >> /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
+docker exec -it master bash -c "echo \"slot.request.timeout: 60000\" >> /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
 
 echo "Flink config:"
 docker exec -it master bash -c "cat /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
@@ -83,7 +84,7 @@ OUTPUT_PATH=hdfs:///user/hadoop-user/wc-out-$RANDOM
 start_time=$(date +%s)
 # it's important to run this with higher parallelism, otherwise we might risk that
 # JM and TM are on the same YARN node and that we therefore don't test the keytab shipping
-until docker exec -it master bash -c "export HADOOP_CLASSPATH=\`hadoop classpath\` && /home/hadoop-user/$FLINK_DIRNAME/bin/flink run -m yarn-cluster -yn 3 -ys 1 -ytm 1200 -yjm 800 -p 3 /home/hadoop-user/$FLINK_DIRNAME/examples/streaming/WordCount.jar --output $OUTPUT_PATH"; do
+until docker exec -it master bash -c "export HADOOP_CLASSPATH=\`hadoop classpath\` && /home/hadoop-user/$FLINK_DIRNAME/bin/flink run -m yarn-cluster -yn 3 -ys 1 -ytm 2000 -yjm 2000 -p 3 /home/hadoop-user/$FLINK_DIRNAME/examples/streaming/WordCount.jar --output $OUTPUT_PATH"; do
     current_time=$(date +%s)
 	time_diff=$((current_time - start_time))
 
@@ -106,12 +107,22 @@ until docker exec -it master bash -c "export HADOOP_CLASSPATH=\`hadoop classpath
 done
 
 docker exec -it master bash -c "kinit -kt /home/hadoop-user/hadoop-user.keytab hadoop-user"
+docker exec -it master bash -c "hdfs dfs -ls $OUTPUT_PATH"
 OUTPUT=$(docker exec -it master bash -c "hdfs dfs -cat $OUTPUT_PATH/*")
 docker exec -it master bash -c "kdestroy"
 echo "$OUTPUT"
 
 if [[ ! "$OUTPUT" =~ "consummation,1" ]]; then
     echo "Output does not contain (consummation, 1) as required"
+    mkdir -p $TEST_DATA_DIR/logs
+    echo "Hadoop logs:"
+    docker cp master:/var/log/hadoop/* $TEST_DATA_DIR/logs/
+    for f in $TEST_DATA_DIR/logs/*; do
+        echo "$f:"
+        cat $f
+    done
+    echo "Docker logs:"
+    docker logs master
     exit 1
 fi