You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ka...@apache.org on 2017/03/02 05:15:56 UTC

spark git commit: [SPARK-19777] Scan runningTasksSet when check speculatable tasks in TaskSetManager.

Repository: spark
Updated Branches:
  refs/heads/master db0ddce52 -> 51be63365


[SPARK-19777] Scan runningTasksSet when check speculatable tasks in TaskSetManager.

## What changes were proposed in this pull request?

When check speculatable tasks in `TaskSetManager`, only scan `runningTasksSet` instead of scanning all `taskInfos`.

## How was this patch tested?
Existing tests.

Author: jinxing <ji...@126.com>

Closes #17111 from jinxing64/SPARK-19777.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51be6336
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51be6336
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51be6336

Branch: refs/heads/master
Commit: 51be633657800d470de5dcebbed09e6bf08f6e2a
Parents: db0ddce
Author: jinxing <ji...@126.com>
Authored: Wed Mar 1 21:15:22 2017 -0800
Committer: Kay Ousterhout <ka...@gmail.com>
Committed: Wed Mar 1 21:15:22 2017 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/scheduler/TaskSetManager.scala  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/51be6336/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 3b25513..e63feb8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -906,8 +906,6 @@ private[spark] class TaskSetManager(
    * Check for tasks to be speculated and return true if there are any. This is called periodically
    * by the TaskScheduler.
    *
-   * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
-   * we don't scan the whole task set. It might also help to make this sorted by launch time.
    */
   override def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean = {
     // Can't speculate if we only have one task, and no need to speculate if the task set is a
@@ -927,7 +925,8 @@ private[spark] class TaskSetManager(
       // TODO: Threshold should also look at standard deviation of task durations and have a lower
       // bound based on that.
       logDebug("Task length threshold for speculation: " + threshold)
-      for ((tid, info) <- taskInfos) {
+      for (tid <- runningTasksSet) {
+        val info = taskInfos(tid)
         val index = info.index
         if (!successful(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
           !speculatableTasks.contains(index)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org