You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by erenavsarogullari <gi...@git.apache.org> on 2017/03/12 20:16:34 UTC

[GitHub] spark pull request #15604: [SPARK-18066] [CORE] [TESTS] Add Pool usage polic...

Github user erenavsarogullari commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15604#discussion_r105567716
  
    --- Diff: core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala ---
    @@ -201,12 +202,96 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
         verifyPool(rootPool, "pool_with_surrounded_whitespace", 3, 2, FAIR)
       }
     
    +  /**
    +   * spark.scheduler.pool property should be ignored for the FIFO scheduler,
    +   * because pools are only needed for fair scheduling.
    +   */
    +  test("FIFO scheduler uses root pool and not spark.scheduler.pool property") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FIFO, initMinShare = 0, initWeight = 0)
    +    val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
    +
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, taskScheduler)
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty("spark.scheduler.pool", TEST_POOL)
    +
    +    // When FIFO Scheduler is used and task sets are submitted, they should be added to
    +    // the root pool, and no additional pools should be created
    +    // (even though there's a configured default pool).
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, properties)
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, properties)
    +
    +    assert(rootPool.getSchedulableByName(TEST_POOL) === null)
    +    assert(rootPool.schedulableQueue.size === 2)
    +    assert(rootPool.getSchedulableByName(taskSetManager0.name) === taskSetManager0)
    +    assert(rootPool.getSchedulableByName(taskSetManager1.name) === taskSetManager1)
    +  }
    +
    +  test("FAIR Scheduler uses default pool when spark.scheduler.pool property is not set") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0, initWeight = 0)
    +    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
    +    schedulableBuilder.buildPools()
    +
    +    // Submit a new task set manager with pool properties set to null. This should result
    +    // in the task set manager getting added to the default pool.
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, taskScheduler)
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, null)
    +
    +    val defaultPool = rootPool.getSchedulableByName(schedulableBuilder.DEFAULT_POOL_NAME)
    +    assert(defaultPool !== null)
    +    assert(defaultPool.schedulableQueue.size === 1)
    +    assert(defaultPool.getSchedulableByName(taskSetManager0.name) === taskSetManager0)
    +
    +    // When a task set manager is submitted with spark.scheduler.pool unset, it should be added to
    +    // the default pool (as above).
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, taskScheduler)
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, new Properties())
    +
    +    assert(defaultPool.schedulableQueue.size === 2)
    +    assert(defaultPool.getSchedulableByName(taskSetManager1.name) === taskSetManager1)
    +  }
    +
    +  test("FAIR Scheduler creates a new pool when spark.scheduler.pool property points to " +
    +      "a non-existent pool") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0, initWeight = 0)
    +    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
    +    schedulableBuilder.buildPools()
    +
    +    assert(rootPool.getSchedulableByName(TEST_POOL) === null)
    +
    +    val taskSetManager = createTaskSetManager(stageId = 0, numTasks = 1, taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES, TEST_POOL)
    +
    +    // The fair scheduler should create a new pool with default values when spark.scheduler.pool
    +    // points to a pool that doesn't exist yet (this can happen when the file that pools are read
    --- End diff --
    
    Currently, this behavior is **not** highlighted via logs and we just points the pool creation as 
    `Created pool: ..., schedulingMode: ..., minShare: ..., weight: ...` so i agree it can be useful for the user and it is addressed with the latest commits.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org