You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/10/14 16:19:55 UTC
[spark] branch master updated: [SPARK-33026][SQL][FOLLOWUP] metrics
name should be numOutputRows
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f3ad32f [SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows
f3ad32f is described below
commit f3ad32f4b6fc55e89e7fb222ed565ad3e32d47c6
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Wed Oct 14 16:17:28 2020 +0000
[SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows
### What changes were proposed in this pull request?
Follow the convention and rename the metrics `numRows` to `numOutputRows`
### Why are the changes needed?
`FilterExec`, `HashAggregateExec`, etc. all use `numOutputRows`
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
existing tests
Closes #30039 from cloud-fan/minor.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/execution/exchange/BroadcastExchangeExec.scala | 8 ++++----
.../org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 4b884df..0c5fee2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -78,7 +78,7 @@ case class BroadcastExchangeExec(
override lazy val metrics = Map(
"dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
- "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"),
+ "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
"collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
"buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"),
"broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast"))
@@ -91,8 +91,8 @@ case class BroadcastExchangeExec(
override def runtimeStatistics: Statistics = {
val dataSize = metrics("dataSize").value
- val numRows = metrics("numRows").value
- Statistics(dataSize, Some(numRows))
+ val rowCount = metrics("numOutputRows").value
+ Statistics(dataSize, Some(rowCount))
}
@transient
@@ -116,11 +116,11 @@ case class BroadcastExchangeExec(
val beforeCollect = System.nanoTime()
// Use executeCollect/executeCollectIterator to avoid conversion to Scala types
val (numRows, input) = child.executeCollectIterator()
+ longMetric("numOutputRows") += numRows
if (numRows >= MAX_BROADCAST_TABLE_ROWS) {
throw new SparkException(
s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows")
}
- longMetric("numRows") += numRows
val beforeBuild = System.nanoTime()
longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e404e46..4872906 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -751,7 +751,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
}
assert(exchanges.size === 1)
- testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2))
+ testMetricsInSparkPlanOperator(exchanges.head, Map("numOutputRows" -> 2))
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org