You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2014/12/04 00:08:17 UTC

spark git commit: [SPARK-4498][core] Don't transition ExecutorInfo to RUNNING until Driver adds Executor

Repository: spark
Updated Branches:
  refs/heads/master 513ef82e8 -> 96b27855c


[SPARK-4498][core] Don't transition ExecutorInfo to RUNNING until Driver adds Executor

The ExecutorInfo only reaches the RUNNING state if the Driver is alive to send the ExecutorStateChanged message to master.  Else, appInfo.resetRetryCount() is never called and failing Executors will eventually exceed ApplicationState.MAX_NUM_RETRY, resulting in the application being removed from the master's accounting.

JoshRosen

Author: Mark Hamstra <ma...@gmail.com>

Closes #3550 from markhamstra/SPARK-4498 and squashes the following commits:

8f543b1 [Mark Hamstra] Don't transition ExecutorInfo to RUNNING until Executor is added by Driver


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96b27855
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96b27855
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96b27855

Branch: refs/heads/master
Commit: 96b27855c5f9789d1f15316564a8e0fa2cd5a51b
Parents: 513ef82
Author: Mark Hamstra <ma...@gmail.com>
Authored: Wed Dec 3 15:08:01 2014 -0800
Committer: Josh Rosen <jo...@databricks.com>
Committed: Wed Dec 3 15:08:01 2014 -0800

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala | 1 +
 .../main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/96b27855/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index 98a93d1..4efebca 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -134,6 +134,7 @@ private[spark] class AppClient(
         val fullId = appId + "/" + id
         logInfo("Executor added: %s on %s (%s) with %d cores".format(fullId, workerId, hostPort,
           cores))
+        master ! ExecutorStateChanged(appId, id, ExecutorState.RUNNING, None, None)
         listener.executorAdded(fullId, workerId, hostPort, cores, memory)
 
       case ExecutorUpdated(id, state, message, exitStatus) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/96b27855/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8ba6a01..f4fedc6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -144,8 +144,6 @@ private[spark] class ExecutorRunner(
       Files.write(header, stderr, UTF_8)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
-      state = ExecutorState.RUNNING
-      worker ! ExecutorStateChanged(appId, execId, state, None, None)
       // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
       // or with nonzero exit code
       val exitCode = process.waitFor()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org