You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ka...@apache.org on 2016/02/11 22:29:23 UTC
spark git commit: [SPARK-13279] Remove O(n^2) operation from
scheduler.
Repository: spark
Updated Branches:
refs/heads/master 0d50a2208 -> 50fa6fd1b
[SPARK-13279] Remove O(n^2) operation from scheduler.
This commit removes an unnecessary duplicate check in addPendingTask that meant
that scheduling a task set took time proportional to (# tasks)^2.
Author: Sital Kedia <sk...@fb.com>
Closes #11167 from sitalkedia/fix_stuck_driver and squashes the following commits:
3fe1af8 [Sital Kedia] [SPARK-13279] Remove unnecessary duplicate check in addPendingTask function
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50fa6fd1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50fa6fd1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50fa6fd1
Branch: refs/heads/master
Commit: 50fa6fd1b365d5db7e2b2c59624a365cef0d1696
Parents: 0d50a22
Author: Sital Kedia <sk...@fb.com>
Authored: Thu Feb 11 13:28:03 2016 -0800
Committer: Kay Ousterhout <ka...@gmail.com>
Committed: Thu Feb 11 13:28:14 2016 -0800
----------------------------------------------------------------------
.../org/apache/spark/scheduler/TaskSetManager.scala | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/50fa6fd1/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index cf97877..4b19beb 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -114,9 +114,14 @@ private[spark] class TaskSetManager(
// treated as stacks, in which new tasks are added to the end of the
// ArrayBuffer and removed from the end. This makes it faster to detect
// tasks that repeatedly fail because whenever a task failed, it is put
- // back at the head of the stack. They are also only cleaned up lazily;
- // when a task is launched, it remains in all the pending lists except
- // the one that it was launched from, but gets removed from them later.
+ // back at the head of the stack. These collections may contain duplicates
+ // for two reasons:
+ // (1): Tasks are only removed lazily; when a task is launched, it remains
+ // in all the pending lists except the one that it was launched from.
+ // (2): Tasks may be re-added to these lists multiple times as a result
+ // of failures.
+ // Duplicates are handled in dequeueTaskFromList, which ensures that a
+ // task hasn't already started running before launching it.
private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]]
// Set of pending tasks for each host. Similar to pendingTasksForExecutor,
@@ -181,9 +186,7 @@ private[spark] class TaskSetManager(
private def addPendingTask(index: Int) {
// Utility method that adds `index` to a list only if it's not already there
def addTo(list: ArrayBuffer[Int]) {
- if (!list.contains(index)) {
- list += index
- }
+ list += index
}
for (loc <- tasks(index).preferredLocations) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org