You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2014/11/02 19:45:01 UTC

git commit: [SPARK-4166][Core][WebUI] Display the executor ID in the Web UI when ExecutorLostFailure happens

Repository: spark
Updated Branches:
  refs/heads/master 6181577e9 -> 4e6a7a0b3


[SPARK-4166][Core][WebUI] Display the executor ID in the Web UI when ExecutorLostFailure happens

Now when ExecutorLostFailure happens, it only displays `ExecutorLostFailure (executor lost)`. Adding the executor id will help locate the faulted executor.

Author: zsxwing <zs...@gmail.com>

Closes #3033 from zsxwing/SPARK-4166 and squashes the following commits:

ff4664c [zsxwing] Backward-compatible support
c5c4cf2 [zsxwing] Display the executor ID in the Web UI when ExecutorLostFailure happens


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e6a7a0b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e6a7a0b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e6a7a0b

Branch: refs/heads/master
Commit: 4e6a7a0b3e55098374a22f3ae9500404f7e4e91a
Parents: 6181577
Author: zsxwing <zs...@gmail.com>
Authored: Sun Nov 2 10:44:52 2014 -0800
Committer: Josh Rosen <jo...@databricks.com>
Committed: Sun Nov 2 10:44:52 2014 -0800

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/TaskEndReason.scala     | 4 ++--
 .../scala/org/apache/spark/scheduler/TaskSetManager.scala    | 2 +-
 core/src/main/scala/org/apache/spark/util/JsonProtocol.scala | 8 ++++++--
 .../org/apache/spark/ui/jobs/JobProgressListenerSuite.scala  | 2 +-
 .../test/scala/org/apache/spark/util/JsonProtocolSuite.scala | 5 +++--
 5 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4e6a7a0b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 8f0c5e7..202fba6 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -117,8 +117,8 @@ case object TaskKilled extends TaskFailedReason {
  * the task crashed the JVM.
  */
 @DeveloperApi
-case object ExecutorLostFailure extends TaskFailedReason {
-  override def toErrorString: String = "ExecutorLostFailure (executor lost)"
+case class ExecutorLostFailure(execId: String) extends TaskFailedReason {
+  override def toErrorString: String = s"ExecutorLostFailure (executor ${execId} lost)"
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/4e6a7a0b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index a976734..d8fb640 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -732,7 +732,7 @@ private[spark] class TaskSetManager(
     }
     // Also re-enqueue any tasks that were running on the node
     for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
-      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
+      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(execId))
     }
     // recalculate valid locality levels and waits when executor is lost
     recomputeLocality()

http://git-wip-us.apache.org/repos/asf/spark/blob/4e6a7a0b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 5b2e7d3..43c7fba 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -272,7 +272,7 @@ private[spark] object JsonProtocol {
 
   def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
     val reason = Utils.getFormattedClassName(taskEndReason)
-    val json = taskEndReason match {
+    val json: JObject = taskEndReason match {
       case fetchFailed: FetchFailed =>
         val blockManagerAddress = Option(fetchFailed.bmAddress).
           map(blockManagerIdToJson).getOrElse(JNothing)
@@ -287,6 +287,8 @@ private[spark] object JsonProtocol {
         ("Description" -> exceptionFailure.description) ~
         ("Stack Trace" -> stackTrace) ~
         ("Metrics" -> metrics)
+      case ExecutorLostFailure(executorId) =>
+        ("Executor ID" -> executorId)
       case _ => Utils.emptyJson
     }
     ("Reason" -> reason) ~ json
@@ -636,7 +638,9 @@ private[spark] object JsonProtocol {
         new ExceptionFailure(className, description, stackTrace, metrics)
       case `taskResultLost` => TaskResultLost
       case `taskKilled` => TaskKilled
-      case `executorLostFailure` => ExecutorLostFailure
+      case `executorLostFailure` =>
+        val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
+        ExecutorLostFailure(executorId.getOrElse("Unknown"))
       case `unknownReason` => UnknownReason
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/4e6a7a0b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
index 3370dd4..6567c5a 100644
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -119,7 +119,7 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
       new ExceptionFailure("Exception", "description", null, None),
       TaskResultLost,
       TaskKilled,
-      ExecutorLostFailure,
+      ExecutorLostFailure("0"),
       UnknownReason)
     var failCount = 0
     for (reason <- taskFailedReasons) {

http://git-wip-us.apache.org/repos/asf/spark/blob/4e6a7a0b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index f1f88c5..d235d7a 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -115,7 +115,7 @@ class JsonProtocolSuite extends FunSuite {
     testTaskEndReason(exceptionFailure)
     testTaskEndReason(TaskResultLost)
     testTaskEndReason(TaskKilled)
-    testTaskEndReason(ExecutorLostFailure)
+    testTaskEndReason(ExecutorLostFailure("100"))
     testTaskEndReason(UnknownReason)
 
     // BlockId
@@ -403,7 +403,8 @@ class JsonProtocolSuite extends FunSuite {
         assertOptionEquals(r1.metrics, r2.metrics, assertTaskMetricsEquals)
       case (TaskResultLost, TaskResultLost) =>
       case (TaskKilled, TaskKilled) =>
-      case (ExecutorLostFailure, ExecutorLostFailure) =>
+      case (ExecutorLostFailure(execId1), ExecutorLostFailure(execId2)) =>
+        assert(execId1 === execId2)
       case (UnknownReason, UnknownReason) =>
       case _ => fail("Task end reasons don't match in types!")
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org