You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/01/20 11:52:53 UTC
[spark] branch branch-3.1 updated: [SPARK-34005][CORE][3.1] Update
peak memory metrics for each Executor on task end
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 7b870e3 [SPARK-34005][CORE][3.1] Update peak memory metrics for each Executor on task end
7b870e3 is described below
commit 7b870e38d7c6ff46e16785e31a471120fe5b8428
Author: Kousuke Saruta <sa...@oss.nttdata.com>
AuthorDate: Wed Jan 20 19:50:05 2021 +0800
[SPARK-34005][CORE][3.1] Update peak memory metrics for each Executor on task end
### What changes were proposed in this pull request?
This PR backports SPARK-34005 (#31029).
This PR makes `AppStatusListener` update the peak memory metrics for each Executor on task end like other peak memory metrics (e.g, stage, executors in a stage).
### Why are the changes needed?
When `AppStatusListener#onExecutorMetricsUpdate` is called, peak memory metrics for Executors, stages and executors in a stage are updated but currently, the metrics only for Executors are not updated on task end.
### Does this PR introduce _any_ user-facing change?
Yes. Executor peak memory metrics is updated more accurately.
### How was this patch tested?
After I run a job with `local-cluster[1,1,1024]` and visited `/api/v1/<appid>/executors`, I confirmed `peakExecutorMemory` metrics is shown for an Executor even though the life time of each job is very short .
I also modify the json files for `HistoryServerSuite`.
Closes #31261 from sarutak/SPARK-34005-branch-3.1.
Authored-by: Kousuke Saruta <sa...@oss.nttdata.com>
Signed-off-by: Gengliang Wang <ge...@databricks.com>
---
.../apache/spark/status/AppStatusListener.scala | 1 +
.../executor_list_json_expectation.json | 22 ++++++
.../executor_memory_usage_expectation.json | 88 ++++++++++++++++++++++
...executor_node_excludeOnFailure_expectation.json | 88 ++++++++++++++++++++++
...e_excludeOnFailure_unexcluding_expectation.json | 88 ++++++++++++++++++++++
5 files changed, 287 insertions(+)
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 6cb013b..52d41cd 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -759,6 +759,7 @@ private[spark] class AppStatusListener(
exec.completedTasks += completedDelta
exec.failedTasks += failedDelta
exec.totalDuration += event.taskInfo.duration
+ exec.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics)
// Note: For resubmitted tasks, we continue to use the metrics that belong to the
// first attempt of this task. This may not be 100% accurate because the first attempt
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index c18a2e3..be12507 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -21,6 +21,28 @@
"addTime" : "2015-02-03T16:43:00.906GMT",
"executorLogs" : { },
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index 5144934..0a3eb81 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -64,6 +64,28 @@
"totalOffHeapStorageMemory" : 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -101,6 +123,28 @@
"totalOffHeapStorageMemory" : 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -138,6 +182,28 @@
"totalOffHeapStorageMemory": 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -175,6 +241,28 @@
"totalOffHeapStorageMemory" : 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
index 47a01b2..8869fb4 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
@@ -64,6 +64,28 @@
"totalOffHeapStorageMemory" : 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -101,6 +123,28 @@
"totalOffHeapStorageMemory" : 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -138,6 +182,28 @@
"totalOffHeapStorageMemory": 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -175,6 +241,28 @@
"totalOffHeapStorageMemory": 524288000
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
index 46e8f81..21cc9d0 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
@@ -52,6 +52,28 @@
"stderr" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stderr"
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -83,6 +105,28 @@
"stderr" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stderr"
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -114,6 +158,28 @@
"stderr" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stderr"
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
@@ -145,6 +211,28 @@
"stderr" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stderr"
},
"blacklistedInStages" : [ ],
+ "peakMemoryMetrics" : {
+ "JVMHeapMemory" : 0,
+ "JVMOffHeapMemory" : 0,
+ "OnHeapExecutionMemory" : 0,
+ "OffHeapExecutionMemory" : 0,
+ "OnHeapStorageMemory" : 0,
+ "OffHeapStorageMemory" : 0,
+ "OnHeapUnifiedMemory" : 0,
+ "OffHeapUnifiedMemory" : 0,
+ "DirectPoolMemory" : 0,
+ "MappedPoolMemory" : 0,
+ "ProcessTreeJVMVMemory" : 0,
+ "ProcessTreeJVMRSSMemory" : 0,
+ "ProcessTreePythonVMemory" : 0,
+ "ProcessTreePythonRSSMemory" : 0,
+ "ProcessTreeOtherVMemory" : 0,
+ "ProcessTreeOtherRSSMemory" : 0,
+ "MinorGCCount" : 0,
+ "MinorGCTime" : 0,
+ "MajorGCCount" : 0,
+ "MajorGCTime" : 0
+ },
"attributes" : { },
"resources" : { },
"resourceProfileId" : 0,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org