You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/10/14 16:19:55 UTC

[spark] branch master updated: [SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f3ad32f  [SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows
f3ad32f is described below

commit f3ad32f4b6fc55e89e7fb222ed565ad3e32d47c6
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Wed Oct 14 16:17:28 2020 +0000

    [SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows
    
    ### What changes were proposed in this pull request?
    
    Follow the convention and rename the metrics `numRows` to `numOutputRows`
    
    ### Why are the changes needed?
    
    `FilterExec`, `HashAggregateExec`, etc. all use `numOutputRows`
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    existing tests
    
    Closes #30039 from cloud-fan/minor.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/execution/exchange/BroadcastExchangeExec.scala      | 8 ++++----
 .../org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 4b884df..0c5fee2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -78,7 +78,7 @@ case class BroadcastExchangeExec(
 
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
-    "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"),
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
     "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"),
     "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast"))
@@ -91,8 +91,8 @@ case class BroadcastExchangeExec(
 
   override def runtimeStatistics: Statistics = {
     val dataSize = metrics("dataSize").value
-    val numRows = metrics("numRows").value
-    Statistics(dataSize, Some(numRows))
+    val rowCount = metrics("numOutputRows").value
+    Statistics(dataSize, Some(rowCount))
   }
 
   @transient
@@ -116,11 +116,11 @@ case class BroadcastExchangeExec(
             val beforeCollect = System.nanoTime()
             // Use executeCollect/executeCollectIterator to avoid conversion to Scala types
             val (numRows, input) = child.executeCollectIterator()
+            longMetric("numOutputRows") += numRows
             if (numRows >= MAX_BROADCAST_TABLE_ROWS) {
               throw new SparkException(
                 s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows")
             }
-            longMetric("numRows") += numRows
 
             val beforeBuild = System.nanoTime()
             longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e404e46..4872906 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -751,7 +751,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
         }
 
         assert(exchanges.size === 1)
-        testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2))
+        testMetricsInSparkPlanOperator(exchanges.head, Map("numOutputRows" -> 2))
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org