You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2019/05/14 14:05:36 UTC
[spark] branch master updated: [SPARK-25719][UI] : Search
functionality in datatables in stages page should search over formatted
data rather than the raw data
This is an automated email from the ASF dual-hosted git repository.
srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 695dbe2 [SPARK-25719][UI] : Search functionality in datatables in stages page should search over formatted data rather than the raw data
695dbe2 is described below
commit 695dbe27ced4776e9517b87e870a4d69a5501a73
Author: pgandhi <pg...@verizonmedia.com>
AuthorDate: Tue May 14 09:05:13 2019 -0500
[SPARK-25719][UI] : Search functionality in datatables in stages page should search over formatted data rather than the raw data
The Pull Request to add datatables to stage page SPARK-21809 got merged. The search functionality in those datatables being a great improvement for searching through a large number of tasks, also performs search over the raw data rather than the formatted data displayed in the tables. It would be great if the search can happen for the formatted data as well.
## What changes were proposed in this pull request?
Added code to enable searching over displayed data in tables e.g. searching on "165.7 MiB" or "0.3 ms" will now return the search results. Also, earlier we were missing search for two columns in the task table "Shuffle Read Bytes" as well as "Shuffle Remote Reads", which I have added here.
## How was this patch tested?
Manual Tests
Closes #24419 from pgandhi999/SPARK-25719.
Authored-by: pgandhi <pg...@verizonmedia.com>
Signed-off-by: Sean Owen <se...@databricks.com>
---
.../spark/status/api/v1/StagesResource.scala | 41 ++++++++++++++--------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index 9d1d66a..db53a40 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -24,8 +24,9 @@ import org.apache.spark.SparkException
import org.apache.spark.scheduler.StageInfo
import org.apache.spark.status.api.v1.StageStatus._
import org.apache.spark.status.api.v1.TaskSorting._
-import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.{SparkUI, UIUtils}
import org.apache.spark.ui.jobs.ApiHelper._
+import org.apache.spark.util.Utils
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class StagesResource extends BaseAppResource {
@@ -189,32 +190,42 @@ private[v1] class StagesResource extends BaseAppResource {
val taskMetricsContainsValue = (task: TaskData) => task.taskMetrics match {
case None => false
case Some(metrics) =>
- (containsValue(task.taskMetrics.get.executorDeserializeTime)
- || containsValue(task.taskMetrics.get.executorRunTime)
- || containsValue(task.taskMetrics.get.jvmGcTime)
- || containsValue(task.taskMetrics.get.resultSerializationTime)
- || containsValue(task.taskMetrics.get.memoryBytesSpilled)
- || containsValue(task.taskMetrics.get.diskBytesSpilled)
- || containsValue(task.taskMetrics.get.peakExecutionMemory)
- || containsValue(task.taskMetrics.get.inputMetrics.bytesRead)
+ (containsValue(UIUtils.formatDuration(task.taskMetrics.get.executorDeserializeTime))
+ || containsValue(UIUtils.formatDuration(task.taskMetrics.get.executorRunTime))
+ || containsValue(UIUtils.formatDuration(task.taskMetrics.get.jvmGcTime))
+ || containsValue(UIUtils.formatDuration(task.taskMetrics.get.resultSerializationTime))
+ || containsValue(Utils.bytesToString(task.taskMetrics.get.memoryBytesSpilled))
+ || containsValue(Utils.bytesToString(task.taskMetrics.get.diskBytesSpilled))
+ || containsValue(Utils.bytesToString(task.taskMetrics.get.peakExecutionMemory))
+ || containsValue(Utils.bytesToString(task.taskMetrics.get.inputMetrics.bytesRead))
|| containsValue(task.taskMetrics.get.inputMetrics.recordsRead)
- || containsValue(task.taskMetrics.get.outputMetrics.bytesWritten)
+ || containsValue(Utils.bytesToString(
+ task.taskMetrics.get.outputMetrics.bytesWritten))
|| containsValue(task.taskMetrics.get.outputMetrics.recordsWritten)
- || containsValue(task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime)
+ || containsValue(UIUtils.formatDuration(
+ task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime))
+ || containsValue(Utils.bytesToString(
+ task.taskMetrics.get.shuffleReadMetrics.remoteBytesRead))
+ || containsValue(Utils.bytesToString(
+ task.taskMetrics.get.shuffleReadMetrics.localBytesRead +
+ task.taskMetrics.get.shuffleReadMetrics.remoteBytesRead))
|| containsValue(task.taskMetrics.get.shuffleReadMetrics.recordsRead)
- || containsValue(task.taskMetrics.get.shuffleWriteMetrics.bytesWritten)
+ || containsValue(Utils.bytesToString(
+ task.taskMetrics.get.shuffleWriteMetrics.bytesWritten))
|| containsValue(task.taskMetrics.get.shuffleWriteMetrics.recordsWritten)
- || containsValue(task.taskMetrics.get.shuffleWriteMetrics.writeTime))
+ || containsValue(UIUtils.formatDuration(
+ task.taskMetrics.get.shuffleWriteMetrics.writeTime / 1000000)))
}
val filteredTaskDataSequence: Seq[TaskData] = taskDataList.filter(f =>
(containsValue(f.taskId) || containsValue(f.index) || containsValue(f.attempt)
- || containsValue(f.launchTime)
+ || containsValue(UIUtils.formatDate(f.launchTime))
|| containsValue(f.resultFetchStart.getOrElse(defaultOptionString))
|| containsValue(f.executorId) || containsValue(f.host) || containsValue(f.status)
|| containsValue(f.taskLocality) || containsValue(f.speculative)
|| containsValue(f.errorMessage.getOrElse(defaultOptionString))
|| taskMetricsContainsValue(f)
- || containsValue(f.schedulerDelay) || containsValue(f.gettingResultTime)))
+ || containsValue(UIUtils.formatDuration(f.schedulerDelay))
+ || containsValue(UIUtils.formatDuration(f.gettingResultTime))))
filteredTaskDataSequence
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org