You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by tg...@apache.org on 2015/01/07 15:11:00 UTC

spark git commit: [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

Repository: spark
Updated Branches:
  refs/heads/master e21acc197 -> 5fde66163


[YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

Nowadays, yarn-client will exit directly when the HA change happens no matter how many times the am should retry.
The reason may be that the default final status only considerred the sys.exit, and the yarn-client HA cann't benefit from this.
So we should distinct the default final status between client and cluster, because the SUCCEEDED status may cause the HA failed in client mode and UNDEFINED may cause the error reporter in cluster when using sys.exit.

Author: huangzhaowei <ca...@gmail.com>

Closes #3771 from SaintBacchus/YarnHA and squashes the following commits:

c02bfcc [huangzhaowei] Improve the comment of the funciton 'getDefaultFinalStatus'
0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fde6616
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fde6616
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fde6616

Branch: refs/heads/master
Commit: 5fde66163fe460d6f64b145047f76cc4ee33601a
Parents: e21acc1
Author: huangzhaowei <ca...@gmail.com>
Authored: Wed Jan 7 08:10:42 2015 -0600
Committer: Thomas Graves <tg...@apache.org>
Committed: Wed Jan 7 08:10:42 2015 -0600

----------------------------------------------------------------------
 .../spark/deploy/yarn/ApplicationMaster.scala       | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5fde6616/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 9c77dff..618db7f 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
   @volatile private var exitCode = 0
   @volatile private var unregistered = false
   @volatile private var finished = false
-  @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED
+  @volatile private var finalStatus = getDefaultFinalStatus
   @volatile private var finalMsg: String = ""
   @volatile private var userClassThread: Thread = _
 
@@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
   }
 
   /**
+   * Set the default final application status for client mode to UNDEFINED to handle
+   * if YARN HA restarts the application so that it properly retries. Set the final
+   * status to SUCCEEDED in cluster mode to handle if the user calls System.exit
+   * from the application code.
+   */
+  final def getDefaultFinalStatus() = {
+    if (isDriver) {
+      FinalApplicationStatus.SUCCEEDED
+    } else {
+      FinalApplicationStatus.UNDEFINED
+    }
+  }
+
+  /**
    * unregister is used to completely unregister the application from the ResourceManager.
    * This means the ResourceManager will not retry the application attempt on your behalf if
    * a failure occurred.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org