You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/03/04 07:11:02 UTC

[GitHub] [spark] martin-g commented on a change in pull request #34622: [SPARK-37340][UI] Display StageIds in Operators for SQL UI

martin-g commented on a change in pull request #34622:
URL: https://github.com/apache/spark/pull/34622#discussion_r819314166



##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
##########
@@ -138,6 +139,30 @@ class SQLAppStatusListener(
     }
   }
 
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
+    if (!isSQLStage(stageCompleted.stageInfo.stageId)) {
+      return
+    }
+    val stageNum = stageCompleted.stageInfo.stageId
+    val attemptID = stageCompleted.stageInfo.attemptNumber()
+
+    // gets the executionID that finished the stage
+    val liveExecution = liveExecutions.values().asScala
+    val execID = liveExecution.filter(_.stages.contains(stageNum)).head.executionId

Review comment:
       is this filter always returning non-empty collection ?

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
##########
@@ -79,8 +79,26 @@ class SQLAppStatusStore(
   def planGraph(executionId: Long): SparkPlanGraph = {
     store.read(classOf[SparkPlanGraphWrapper], executionId).toSparkPlanGraph()
   }
+
+  def getStageAttempt(executionId: Long): List[(Int, Int)] = {

Review comment:
       ```suggestion
     def getStageAttempts(executionId: Long): List[(Int, Int)] = {
   ```

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
##########
@@ -179,6 +185,9 @@ class SparkPlanGraphNode(
       // Note: whitespace between two "\n"s is to create an empty line between the name of
       // SparkPlan and metrics. If removing it, it won't display the empty line in UI.
       builder ++= "<br><br>"
+      if (!stagesGraph.getOrElse(id, List()).isEmpty) {

Review comment:
       ```suggestion
         if (!stagesGraph.getOrElse(id, Nil).isEmpty) {
   ```

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
##########
@@ -179,6 +185,9 @@ class SparkPlanGraphNode(
       // Note: whitespace between two "\n"s is to create an empty line between the name of
       // SparkPlan and metrics. If removing it, it won't display the empty line in UI.
       builder ++= "<br><br>"
+      if (!stagesGraph.getOrElse(id, List()).isEmpty) {
+        builder ++= "Stages: " + stagesGraph.getOrElse(id, List()).mkString(",") + "\n"

Review comment:
       ```suggestion
           builder ++= "Stages: " + stagesGraph.getOrElse(id, Nil).mkString(",") + "\n"
   ```

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
##########
@@ -128,6 +136,10 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
   private def jobURL(request: HttpServletRequest, jobId: Long): String =
     "%s/jobs/job/?id=%s".format(UIUtils.prependBaseUri(request, parent.basePath), jobId)
 
+  private def stageURL(request: HttpServletRequest, stageId: Int, attemptId: Int): String =
+    "%s/stages/stage/?id=%s&attempt=%s".format(

Review comment:
       nit: since you use `String.format()` you can use `%d` for the Int parameters, but it doesn't really matter

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
##########
@@ -550,6 +550,10 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
     assertJobs(statusStore.execution(0), completed = 0 to 1)
     assert(statusStore.execution(0).get.stages === (0 to 3).toSet)
+
+    // Check stage and attemptID are gathered correctly.
+    val stageAttempt = statusStore.getStageAttempt(executionId)

Review comment:
       ```suggestion
       val stageAttempts = statusStore.getStageAttempt(executionId)
   ```

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
##########
@@ -53,6 +53,12 @@ case class SparkPlanGraph(
       case node => Seq(node)
     }
   }
+
+  def getAllIds: Seq[Long] = {
+    allNodes.map {

Review comment:
       ```suggestion
       allNodes.map(_.id)
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org