You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/12/29 07:36:30 UTC
[spark] branch branch-3.1 updated: [SPARK-33928][SPARK-23365][TEST][CORE] Fix flaky o.a.s.ExecutorAllocationManagerSuite - " Don't update target num executors when killing idle executors"

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 122a83c  [SPARK-33928][SPARK-23365][TEST][CORE] Fix flaky o.a.s.ExecutorAllocationManagerSuite - " Don't update target num executors when killing idle executors"
122a83c is described below

commit 122a83cab7fe109b4c535514bb59e27b5a8d4565
Author: yi.wu <yi...@databricks.com>
AuthorDate: Tue Dec 29 07:35:45 2020 +0000

    [SPARK-33928][SPARK-23365][TEST][CORE] Fix flaky o.a.s.ExecutorAllocationManagerSuite - " Don't update target num executors when killing idle executors"
    
    ### What changes were proposed in this pull request?
    
    Use the testing mode for the test to fix the flaky.
    
    ### Why are the changes needed?
    
    The test is flaky:
    
    ```scala
    [info] - SPARK-23365 Don't update target num executors when killing idle executors *** FAILED *** (126 milliseconds)
    [info] 1 did not equal 2 (ExecutorAllocationManagerSuite.scala:1615)
    [info] org.scalatest.exceptions.TestFailedException:
    [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
    [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
    [info] at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
    [info] at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
    [info] at org.apache.spark.ExecutorAllocationManagerSuite.$anonfun$new$84(ExecutorAllocationManagerSuite.scala:1617)
    ...
    ```
    The root cause should be the same as https://github.com/apache/spark/pull/29773 since the test run under non-testing mode.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manually checked. Flaky is gone by running the test hundreds of times after this fix.
    
    Closes #30956 from Ngone51/fix-flaky-SPARK-23365.
    
    Authored-by: yi.wu <yi...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 1ef7ddd38aa28dcd8166a60a485c722c5a8ded7a)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../scala/org/apache/spark/ExecutorAllocationManagerSuite.scala   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index c1269a9..5ae596b 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -1588,7 +1588,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
   test("SPARK-23365 Don't update target num executors when killing idle executors") {
     val clock = new ManualClock()
     val manager = createManager(
-      createConf(1, 2, 1).set(config.DYN_ALLOCATION_TESTING, false),
+      createConf(1, 2, 1),
       clock = clock)
 
     when(client.requestTotalExecutors(any(), any(), any())).thenReturn(true)
@@ -1616,19 +1616,17 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
     assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
-    verify(client, never).killExecutors(any(), any(), any(), any())
+    assert(manager.executorMonitor.executorsPendingToRemove().isEmpty)
 
     // now we cross the idle timeout for executor-1, so we kill it.  the really important
     // thing here is that we do *not* ask the executor allocation client to adjust the target
     // number of executors down
-    when(client.killExecutors(Seq("executor-1"), false, false, false))
-      .thenReturn(Seq("executor-1"))
     clock.advance(3000)
     schedule(manager)
     assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
     assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     // here's the important verify -- we did kill the executors, but did not adjust the target count
-    verify(client).killExecutors(Seq("executor-1"), false, false, false)
+    assert(manager.executorMonitor.executorsPendingToRemove() === Set("executor-1"))
   }
 
   test("SPARK-26758 check executor target number after idle time out ") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org