You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by zs...@apache.org on 2020/02/25 23:31:08 UTC

[spark] branch branch-3.0 updated: [SPARK-30943][SS] Show "batch ID" in tool tip string for Structured Streaming UI graphs

This is an automated email from the ASF dual-hosted git repository.

zsxwing pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 5343059  [SPARK-30943][SS] Show "batch ID" in tool tip string for Structured Streaming UI graphs
5343059 is described below

commit 53430594587ad0134eff5cd2b5e06a7a3eec1b99
Author: Jungtaek Lim (HeartSaVioR) <ka...@gmail.com>
AuthorDate: Tue Feb 25 15:29:36 2020 -0800

    [SPARK-30943][SS] Show "batch ID" in tool tip string for Structured Streaming UI graphs
    
    ### What changes were proposed in this pull request?
    
    This patch changes the tool tip string in Structured Streaming UI graphs to show batch ID (and timestamp as well) instead of only showing timestamp, which was a key for DStream but no longer a key for Structured Streaming.
    
    This patch does some refactoring as there're some spots on confusion between js file for streaming and structured streaming.
    
    Note that this patch doesn't actually change the x axis, as once we change it we should decouple the logic for graphs between streaming and structured streaming. It won't change UX meaningfully as in x axis we only show min and max which we still would like to know about "time" as well as batch ID.
    
    ### Why are the changes needed?
    
    In Structured Streaming, everything is aligned for "batch ID" where the UI is only showing timestamp - end users have to manually find and correlate batch ID and the timestamp which is clearly a huge pain.
    
    ### Does this PR introduce any user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Manually tested. Screenshots:
    
    ![Screen Shot 2020-02-25 at 7 22 38 AM](https://user-images.githubusercontent.com/1317309/75197701-40b2ce80-57a2-11ea-9578-c2eb2d1091de.png)
    ![Screen Shot 2020-02-25 at 7 22 44 AM](https://user-images.githubusercontent.com/1317309/75197704-427c9200-57a2-11ea-9439-e0a8303d0860.png)
    ![Screen Shot 2020-02-25 at 7 22 58 AM](https://user-images.githubusercontent.com/1317309/75197706-43152880-57a2-11ea-9617-1276c3ba181e.png)
    ![Screen Shot 2020-02-25 at 7 23 04 AM](https://user-images.githubusercontent.com/1317309/75197708-43152880-57a2-11ea-9de2-7d37eaf88102.png)
    ![Screen Shot 2020-02-25 at 7 23 31 AM](https://user-images.githubusercontent.com/1317309/75197710-43adbf00-57a2-11ea-9ae4-4e292de39c36.png)
    
    Closes #27687 from HeartSaVioR/SPARK-30943.
    
    Authored-by: Jungtaek Lim (HeartSaVioR) <ka...@gmail.com>
    Signed-off-by: Shixiong Zhu <zs...@gmail.com>
    (cherry picked from commit 9ea6c0a8975a1277abba799b51aca4e2818c23e7)
    Signed-off-by: Shixiong Zhu <zs...@gmail.com>
---
 .../org/apache/spark/ui/static/streaming-page.js   |  2 +-
 .../spark/ui/static/structured-streaming-page.js   |  4 +--
 .../ui/StreamingQueryStatisticsPage.scala          | 36 ++++++++++++++++++----
 .../apache/spark/streaming/ui/StreamingPage.scala  | 13 +++++++-
 4 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
index 5b75bc3..ed3e65c3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
@@ -171,7 +171,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
             .attr("cy", function(d) { return y(d.y); })
             .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "3";})
             .on('mouseover', function(d) {
-                var tip = formatYValue(d.y) + " " + unitY + " at " + timeFormat[d.x];
+                var tip = formatYValue(d.y) + " " + unitY + " at " + timeTipStrings[d.x];
                 showBootstrapTooltip(d3.select(this).node(), tip);
                 // show the point
                 d3.select(this)
diff --git a/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js
index 70250fd..c92226b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js
@@ -106,12 +106,12 @@ function drawAreaStack(id, labels, values, minX, maxX, minY, maxY) {
         .on('mouseover', function(d) {
             var tip = '';
             var idx = 0;
-            var _values = timeToValues[d._x]
+            var _values = formattedTimeToValues[d._x];
             _values.forEach(function (k) {
                 tip += labels[idx] + ': ' + k + '   ';
                 idx += 1;
             });
-            tip += " at " + d._x
+            tip += " at " + formattedTimeTipStrings[d._x];
             showBootstrapTooltip(d3.select(this).node(), tip);
         })
         .on('mouseout',  function() {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 56672ce..fa9896e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -70,11 +70,30 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     <script>{Unparsed(js)}</script>
   }
 
-  def generateVar(values: Array[(Long, ju.Map[String, JLong])]): Seq[Node] = {
+  def generateTimeTipStrings(values: Array[(Long, Long)]): Seq[Node] = {
+    val js = "var timeTipStrings = {};\n" + values.map { case (batchId, time) =>
+      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
+      s"timeTipStrings[$time] = 'batch $batchId ($formattedTime)';"
+    }.mkString("\n")
+
+    <script>{Unparsed(js)}</script>
+  }
+
+  def generateFormattedTimeTipStrings(values: Array[(Long, Long)]): Seq[Node] = {
+    val js = "var formattedTimeTipStrings = {};\n" + values.map { case (batchId, time) =>
+      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
+      s"""formattedTimeTipStrings["$formattedTime"] = 'batch $batchId ($formattedTime)';"""
+    }.mkString("\n")
+
+    <script>{Unparsed(js)}</script>
+  }
+
+  def generateTimeToValues(values: Array[(Long, ju.Map[String, JLong])]): Seq[Node] = {
     val durationDataPadding = SparkUIUtils.durationDataPadding(values)
-    val js = "var timeToValues = {};\n" + durationDataPadding.map { case (x, y) =>
+    val js = "var formattedTimeToValues = {};\n" + durationDataPadding.map { case (x, y) =>
       val s = y.toSeq.sortBy(_._1).map(e => s""""${e._2}"""").mkString("[", ",", "]")
-      s"""timeToValues["${SparkUIUtils.formatBatchTime(x, 1, showYYYYMMSS = false)}"] = $s;"""
+      val formattedTime = SparkUIUtils.formatBatchTime(x, 1, showYYYYMMSS = false)
+      s"""formattedTimeToValues["$formattedTime"] = $s;"""
     }.mkString("\n")
 
     <script>{Unparsed(js)}</script>
@@ -112,8 +131,10 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
   }
 
   def generateStatTable(query: StreamingQueryUIData): Seq[Node] = {
-    val batchTimes = withNoProgress(query,
-      query.recentProgress.map(p => df.parse(p.timestamp).getTime), Array.empty[Long])
+    val batchToTimestamps = withNoProgress(query,
+      query.recentProgress.map(p => (p.batchId, df.parse(p.timestamp).getTime)),
+      Array.empty[(Long, Long)])
+    val batchTimes = batchToTimestamps.map(_._2)
     val minBatchTime =
       withNoProgress(query, df.parse(query.recentProgress.head.timestamp).getTime, 0L)
     val maxBatchTime =
@@ -266,6 +287,9 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
       </table>
     // scalastyle:on
 
-    generateVar(operationDurationData) ++ generateTimeMap(batchTimes) ++ table ++ jsCollector.toHtml
+    generateTimeToValues(operationDurationData) ++
+      generateFormattedTimeTipStrings(batchToTimestamps) ++
+      generateTimeMap(batchTimes) ++ generateTimeTipStrings(batchToTimestamps) ++
+      table ++ jsCollector.toHtml
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index d47287b..8596aa0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -140,6 +140,16 @@ private[ui] class StreamingPage(parent: StreamingTab)
     <script>{Unparsed(js)}</script>
   }
 
+  private def generateTimeTipStrings(times: Seq[Long]): Seq[Node] = {
+    // We leverage timeFormat as the value would be same as timeFormat. This means it is
+    // sensitive to the order - generateTimeMap should be called earlier than this.
+    val js = "var timeTipStrings = {};\n" + times.map { time =>
+      s"timeTipStrings[$time] = timeFormat[$time];"
+    }.mkString("\n")
+
+    <script>{Unparsed(js)}</script>
+  }
+
   private def generateStatTable(): Seq[Node] = {
     val batches = listener.retainedBatches
 
@@ -313,7 +323,8 @@ private[ui] class StreamingPage(parent: StreamingTab)
     </table>
     // scalastyle:on
 
-    generateTimeMap(batchTimes) ++ table ++ jsCollector.toHtml
+    generateTimeMap(batchTimes) ++ generateTimeTipStrings(batchTimes) ++ table ++
+      jsCollector.toHtml
   }
 
   private def generateInputDStreamsTable(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org