You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ir...@apache.org on 2020/01/31 20:28:30 UTC
[spark] branch master updated: [SPARK-27324][DOC][CORE] Document
configurations related to executor metrics and modify a configuration
This is an automated email from the ASF dual-hosted git repository.
irashid pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 387ce89 [SPARK-27324][DOC][CORE] Document configurations related to executor metrics and modify a configuration
387ce89 is described below
commit 387ce89a0631f1a4c6668b90ff2a7bbcf11919cd
Author: Wing Yew Poon <wy...@cloudera.com>
AuthorDate: Fri Jan 31 14:28:02 2020 -0600
[SPARK-27324][DOC][CORE] Document configurations related to executor metrics and modify a configuration
### What changes were proposed in this pull request?
Add a section to the Configuration page to document configurations for executor metrics.
At the same time, rename spark.eventLog.logStageExecutorProcessTreeMetrics.enabled to spark.executor.processTreeMetrics.enabled and make it independent of spark.eventLog.logStageExecutorMetrics.enabled.
### Why are the changes needed?
Executor metrics are new in Spark 3.0. They lack documentation.
Memory metrics as a whole are always collected, but the ones obtained from the process tree have to be optionally enabled. Making this depend on a single configuration makes for more intuitive behavior. Given this, the configuration property is renamed to better reflect its meaning.
### Does this PR introduce any user-facing change?
Yes, only in that the configurations are all new to 3.0.
### How was this patch tested?
Not necessary.
Closes #27329 from wypoon/SPARK-27324.
Authored-by: Wing Yew Poon <wy...@cloudera.com>
Signed-off-by: Imran Rashid <ir...@cloudera.com>
---
.../spark/executor/ExecutorMetricsSource.scala | 3 +-
.../spark/executor/ProcfsMetricsGetter.scala | 8 ++---
.../org/apache/spark/internal/config/package.scala | 17 +++++++---
.../spark/deploy/history/HistoryServerSuite.scala | 2 +-
docs/configuration.md | 37 ++++++++++++++++++++++
docs/monitoring.md | 20 ++++++------
6 files changed, 65 insertions(+), 22 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala
index b052e43..14645f7 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala
@@ -32,8 +32,7 @@ import org.apache.spark.metrics.source.Source
* spark.executor.metrics.pollingInterval=<interval in ms>.
* (2) Procfs metrics are gathered all in one-go and only conditionally:
* if the /proc filesystem exists
- * and spark.eventLog.logStageExecutorProcessTreeMetrics.enabled=true
- * and spark.eventLog.logStageExecutorMetrics.enabled=true.
+ * and spark.executor.processTreeMetrics.enabled=true.
*/
private[spark] class ExecutorMetricsSource extends Source {
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
index 0d5dcfb4..80ef757 100644
--- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -58,11 +58,9 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
logWarning("Exception checking for procfs dir", ioe)
false
}
- val shouldLogStageExecutorMetrics =
- SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS)
- val shouldLogStageExecutorProcessTreeMetrics =
- SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS)
- procDirExists.get && shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics
+ val shouldPollProcessTreeMetrics =
+ SparkEnv.get.conf.get(config.EXECUTOR_PROCESS_TREE_METRICS_ENABLED)
+ procDirExists.get && shouldPollProcessTreeMetrics
}
}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 40b05cf..e68368f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -148,11 +148,8 @@ package object config {
private[spark] val EVENT_LOG_STAGE_EXECUTOR_METRICS =
ConfigBuilder("spark.eventLog.logStageExecutorMetrics.enabled")
- .booleanConf
- .createWithDefault(false)
-
- private[spark] val EVENT_LOG_PROCESS_TREE_METRICS =
- ConfigBuilder("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled")
+ .doc("Whether to write per-stage peaks of executor metrics (for each executor) " +
+ "to the event log.")
.booleanConf
.createWithDefault(false)
@@ -215,8 +212,18 @@ package object config {
private[spark] val EXECUTOR_HEARTBEAT_MAX_FAILURES =
ConfigBuilder("spark.executor.heartbeat.maxFailures").internal().intConf.createWithDefault(60)
+ private[spark] val EXECUTOR_PROCESS_TREE_METRICS_ENABLED =
+ ConfigBuilder("spark.executor.processTreeMetrics.enabled")
+ .doc("Whether to collect process tree metrics (from the /proc filesystem) when collecting " +
+ "executor metrics.")
+ .booleanConf
+ .createWithDefault(false)
+
private[spark] val EXECUTOR_METRICS_POLLING_INTERVAL =
ConfigBuilder("spark.executor.metrics.pollingInterval")
+ .doc("How often to collect executor metrics (in milliseconds). " +
+ "If 0, the polling is done on executor heartbeats. " +
+ "If positive, the polling is done at this interval.")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("0")
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 06554e5..206db0f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -84,7 +84,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
.set(IS_TESTING, true)
.set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
.set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
- .set(EVENT_LOG_PROCESS_TREE_METRICS, true)
+ .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true)
conf.setAll(extraConf)
provider = new FsHistoryProvider(conf)
provider.checkForLogs()
diff --git a/docs/configuration.md b/docs/configuration.md
index 559c5cd..8164ed4 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1624,6 +1624,43 @@ Apart from these, the following properties are also available, and may be useful
</tr>
</table>
+### Executor Metrics
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+ <td><code>spark.eventLog.logStageExecutorMetrics.enabled</code></td>
+ <td>false</td>
+ <td>
+ Whether to write per-stage peaks of executor metrics (for each executor) to the event log.
+ <br />
+ <em>Note:</em> The metrics are polled (collected) and sent in the executor heartbeat,
+ and this is always done; this configuration is only to determine if aggregated metric peaks
+ are written to the event log.
+ </td>
+</tr>
+ <td><code>spark.executor.processTreeMetrics.enabled</code></td>
+ <td>false</td>
+ <td>
+ Whether to collect process tree metrics (from the /proc filesystem) when collecting
+ executor metrics.
+ <br />
+ <em>Note:</em> The process tree metrics are collected only if the /proc filesystem
+ exists.
+ </td>
+<tr>
+ <td><code>spark.executor.metrics.pollingInterval</code></td>
+ <td>0</td>
+ <td>
+ How often to collect executor metrics (in milliseconds).
+ <br />
+ If 0, the polling is done on executor heartbeats (thus at the heartbeat interval,
+ specified by <code>spark.executor.heartbeatInterval</code>).
+ If positive, the polling is done at this interval.
+ </td>
+</tr>
+</table>
+
### Networking
<table class="table">
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 31bf1eb..fc95f6a 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -659,7 +659,10 @@ A list of the available metrics, with a short description:
### Executor Metrics
-Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC information. Metrics `peakExecutorMetrics.*` are only enabled if `spark.eventLog.logStageExecutorMetrics.enabled` is true.
+Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC information.
+Executor metric values and their measured peak values per executor are exposed via the REST API at the end point `/applications/[app-id]/executors`.
+In addition, aggregated per-stage peak values of the executor metrics are written to the event log if `spark.eventLog.logStageExecutorMetrics.enabled` is true.
+Executor metrics are also exposed via the Spark metrics system based on the Dropwizard metrics library.
A list of the available metrics, with a short description:
<table class="table">
@@ -755,7 +758,7 @@ A list of the available metrics, with a short description:
</tr>
<tr>
<td> .ProcessTreeJVMVMemory</td>
- <td>Virtual memory size in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ <td>Virtual memory size in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .ProcessTreeJVMRSSMemory</td>
@@ -763,23 +766,23 @@ A list of the available metrics, with a short description:
in real memory. This is just the pages which count
toward text, data, or stack space. This does not
include pages which have not been demand-loaded in,
- or which are swapped out. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ or which are swapped out. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .ProcessTreePythonVMemory</td>
- <td>Virtual memory size for Python in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ <td>Virtual memory size for Python in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .ProcessTreePythonRSSMemory</td>
- <td>Resident Set Size for Python. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ <td>Resident Set Size for Python. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .ProcessTreeOtherVMemory</td>
- <td>Virtual memory size for other kind of process in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ <td>Virtual memory size for other kind of process in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .ProcessTreeOtherRSSMemory</td>
- <td>Resident Set Size for other kind of process. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+ <td>Resident Set Size for other kind of process. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
</tr>
<tr>
<td> .MinorGCCount</td>
@@ -1102,8 +1105,7 @@ when running in local mode.
- ProcessTreeOtherRSSMemory
- **note:** "ProcessTree*" metrics are collected only under certain conditions.
The conditions are the logical AND of the following: `/proc` filesystem exists,
- `spark.eventLog.logStageExecutorProcessTreeMetrics.enabled=true`,
- `spark.eventLog.logStageExecutorMetrics.enabled=true`.
+ `spark.executor.processTreeMetrics.enabled=true`.
"ProcessTree*" metrics report 0 when those conditions are not met.
- namespace=JVMCPU
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org