You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/03/07 11:23:20 UTC

[GitHub] [spark] LantaoJin commented on a change in pull request #23951: [SPARK-27038][CORE][YARN] Re-implement RackResolver to reduce resolving time

LantaoJin commented on a change in pull request #23951: [SPARK-27038][CORE][YARN] Re-implement RackResolver to reduce resolving time
URL: https://github.com/apache/spark/pull/23951#discussion_r263340561
 
 

 ##########
 File path: core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
 ##########
 @@ -1602,4 +1617,28 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     verify(sched.dagScheduler).taskEnded(manager.tasks(3), Success, result.value(),
       result.accumUpdates, info3)
   }
+
+  test("SPARK-27038: Verify the rack resolving time and result when initialing TaskSetManager") {
+    sc = new SparkContext("local", "test")
+    for (i <- 1 to 100) {
+      FakeRackUtil.assignHostToRack("host" + i, "rack" + i)
+    }
+    sched = new FakeTaskScheduler(sc,
+      ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+    sched.slowRackResolve = true
+    val locations = new ArrayBuffer[Seq[TaskLocation]]()
+    for (i <- 1 to 100) {
+      locations += Seq(TaskLocation("host" + i))
+    }
+    val taskSet = FakeTask.createTaskSet(100, locations: _*)
+    val clock = new ManualClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+    var total = 0
+    for (i <- 1 to 100) {
+      total += manager.getPendingTasksForRack("rack" + i).length
+    }
+    assert(total === 100) // verify the total number always equals 100 with/without SPARK-27038
+    // verify elapsed time should be less than 1s, without SPARK-27038, it should be larger 10s
+    assert(manager.addTaskElapsedTime < 1)
 
 Review comment:
   Thanks @squito , I changed it with a simulation function of `runResolveCommand()` in `org.apache.hadoop.net.ScriptBasedMapping`. I think it could equal to time reducing.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org