You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/12/06 21:18:28 UTC

[spark] branch branch-3.1 updated: [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging configuration is picked from the provided

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 1545df6  [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging configuration is picked from the provided
1545df6 is described below

commit 1545df657bb5dc93197e8014fb7fa656724d4174
Author: Prashant Sharma <pr...@in.ibm.com>
AuthorDate: Sat Dec 5 23:04:55 2020 -0800

    [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging configuration is picked from the provided
    
    ### What changes were proposed in this pull request?
    Fix flaky test "Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j.properties."
    The test is flaking, with multiple flaked instances - the reason for the failure has been similar to:
    
    ```
    
    The code passed to eventually never returned normally. Attempted 109 times over 3.0079882413999997 minutes. Last failure message: Failure executing: GET at:
    https://192.168.39.167:8443/api/v1/namespaces/b37fc72a991b49baa68a2eaaa1516463/pods/spark-pi-97a9bc76308e7fe3-exec-1/log?pretty=false. Message: pods "spark-pi-97a9bc76308e7fe3-exec-1" not found. Received status: Status(apiVersion=v1, code=404, details=StatusDetails(causes=[], group=null, kind=pods, name=spark-pi-97a9bc76308e7fe3-exec-1, retryAfterSeconds=null, uid=null, additionalProperties={}), kind=Status, message=pods "spark-pi-97a9bc76308e7fe3-exec-1" not found, metadata=ListMeta( [...]
    
    ```
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36854/console
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36852/console
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36850/console
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36848/console
    From the above failures, it seems, that executor finishes too quickly and is removed by spark before the test can complete.
    So, in order to mitigate this situation, one way is to turn on the flag
       "spark.kubernetes.executor.deleteOnTermination"
    
    ### Why are the changes needed?
    
    Fixes a flaky test.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Existing tests.
    May be a few runs of jenkins integration test, may reveal if the problem is resolved or not.
    
    Closes #30616 from ScrapCodes/SPARK-33668/fix-flaky-k8s-integration-test.
    
    Authored-by: Prashant Sharma <pr...@in.ibm.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 6317ba29a1bb1b7198fe8df71ddefcf47a55bd51)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../deploy/k8s/integrationtest/KubernetesSuite.scala   | 18 ++++++++++++++++++
 .../k8s/integrationtest/SparkConfPropagateSuite.scala  |  1 +
 2 files changed, 19 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 193a02a..7b2a2d0 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -158,6 +158,7 @@ class KubernetesSuite extends SparkFunSuite
       kubernetesTestComponents.deleteNamespace()
     }
     deleteDriverPod()
+    deleteExecutorPod(appLocator)
   }
 
   protected def runSparkPiAndVerifyCompletion(
@@ -508,6 +509,23 @@ class KubernetesSuite extends SparkFunSuite
         .get() == null)
     }
   }
+
+  private def deleteExecutorPod(appLocator: String): Unit = {
+    kubernetesTestComponents
+      .kubernetesClient
+      .pods()
+      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-role", "executor")
+      .delete()
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      assert(kubernetesTestComponents.kubernetesClient
+        .pods()
+        .withLabel("spark-app-locator", appLocator)
+        .withLabel("spark-role", "executor")
+        .list()
+        .getItems.isEmpty)
+    }
+  }
 }
 
 private[spark] object KubernetesSuite {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
index 5d3b426..0bc6327 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -39,6 +39,7 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
 
       sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
       sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug")
+      sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", "false")
 
       val log4jExpectedLog =
         s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org