You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by zs...@apache.org on 2016/09/02 19:54:05 UTC

spark git commit: [SPARK-17316][CORE] Make CoarseGrainedSchedulerBackend.removeExecutor non-blocking

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 412b0e896 -> b84a92c24


[SPARK-17316][CORE] Make CoarseGrainedSchedulerBackend.removeExecutor non-blocking

## What changes were proposed in this pull request?

StandaloneSchedulerBackend.executorRemoved is a blocking call right now. It may cause some deadlock since it's called inside StandaloneAppClient.ClientEndpoint.

This PR just changed CoarseGrainedSchedulerBackend.removeExecutor to be non-blocking. It's safe since the only two usages (StandaloneSchedulerBackend and YarnSchedulerEndpoint) don't need the return value).

## How was this patch tested?

Jenkins unit tests.

Author: Shixiong Zhu <sh...@databricks.com>

Closes #14882 from zsxwing/SPARK-17316.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b84a92c2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b84a92c2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b84a92c2

Branch: refs/heads/branch-1.6
Commit: b84a92c248e571af1d81586948d5c84b41e18d07
Parents: 412b0e8
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Wed Aug 31 10:56:02 2016 -0700
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Fri Sep 2 12:45:06 2016 -0700

----------------------------------------------------------------------
 .../cluster/CoarseGrainedSchedulerBackend.scala    | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b84a92c2/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 75b1d29..5fc3c84 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -364,14 +364,15 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     conf.getInt("spark.default.parallelism", math.max(totalCoreCount.get(), 2))
   }
 
-  // Called by subclasses when notified of a lost worker
-  def removeExecutor(executorId: String, reason: ExecutorLossReason) {
-    try {
-      driverEndpoint.askWithRetry[Boolean](RemoveExecutor(executorId, reason))
-    } catch {
-      case e: Exception =>
-        throw new SparkException("Error notifying standalone scheduler's driver endpoint", e)
-    }
+  /**
+   * Called by subclasses when notified of a lost worker. It just fires the message and returns
+   * at once.
+   */
+  protected def removeExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
+    // Only log the failure since we don't care about the result.
+    driverEndpoint.ask(RemoveExecutor(executorId, reason)).onFailure { case t =>
+      logError(t.getMessage, t)
+    }(ThreadUtils.sameThread)
   }
 
   def sufficientResourcesRegistered(): Boolean = true


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org