You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2013/12/01 21:47:04 UTC

[1/3] git commit: Scheduler quits when createStage fails.

Updated Branches:
  refs/heads/master 60e23a58b -> 740922f25


Scheduler quits when createStage fails.

The current scheduler thread does not handle exceptions from createStage stage while launching new jobs. The thread fails on any exception that gets triggered at that level, leaving the cluster hanging with no schduler.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/4d53830e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/4d53830e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/4d53830e

Branch: refs/heads/master
Commit: 4d53830eb79174cfd9641f6342727bc980d5c3e0
Parents: 743a31a
Author: Sundeep Narravula <su...@yahoo-inc.com>
Authored: Sat Nov 30 16:18:12 2013 -0800
Committer: Sundeep Narravula <su...@yahoo-inc.com>
Committed: Sat Nov 30 16:18:12 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/scheduler/DAGScheduler.scala   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/4d53830e/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 4457525..f6a4482 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -384,7 +384,15 @@ class DAGScheduler(
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
     event match {
       case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>
-        val finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite))
+        var finalStage:Stage  = null
+        try {
+          finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite))
+        } catch {
+          case e: Exception =>
+            logWarning("Creating new stage failed due to exception - job: " + jobId )
+            listener.jobFailed(e)
+            return false
+        }
         val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)
         clearCacheLocs()
         logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length +


[2/3] git commit: Log exception in scheduler in addition to passing it to the caller. Code Styling changes.

Posted by rx...@apache.org.
Log exception in scheduler in addition to passing it to the caller.
Code Styling changes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/be3ea239
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/be3ea239
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/be3ea239

Branch: refs/heads/master
Commit: be3ea2394fa2e626fb6b5f2cd46e7156016c9b3f
Parents: 4d53830
Author: Sundeep Narravula <su...@yahoo-inc.com>
Authored: Sun Dec 1 00:50:34 2013 -0800
Committer: Sundeep Narravula <su...@yahoo-inc.com>
Committed: Sun Dec 1 00:50:34 2013 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/be3ea239/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index f6a4482..9159186 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -384,12 +384,14 @@ class DAGScheduler(
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
     event match {
       case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>
-        var finalStage:Stage  = null
+        var finalStage: Stage = null
         try {
+          // New stage creation at times and if its not protected, the scheduler thread is killed. 
+          // e.g. it can fail when jobs are run on HadoopRDD whose underlying hdfs files have been deleted
           finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite))
         } catch {
           case e: Exception =>
-            logWarning("Creating new stage failed due to exception - job: " + jobId )
+            logWarning("Creating new stage failed due to exception - job: " + jobId, e)
             listener.jobFailed(e)
             return false
         }


[3/3] git commit: Merge pull request #219 from sundeepn/schedulerexception

Posted by rx...@apache.org.
Merge pull request #219 from sundeepn/schedulerexception

Scheduler quits when newStage fails

The current scheduler thread does not handle exceptions from newStage stage while launching new jobs. The thread fails on any exception that gets triggered at that level, leaving the cluster hanging with no schduler.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/740922f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/740922f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/740922f2

Branch: refs/heads/master
Commit: 740922f25d5f81617fbe02c7bcd1610d6426bbef
Parents: 60e23a5 be3ea23
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Dec 1 12:46:58 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Dec 1 12:46:58 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/scheduler/DAGScheduler.scala | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/740922f2/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
----------------------------------------------------------------------