You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/04/18 12:01:21 UTC

[spark] branch master updated: [SPARK-31477][SQL] Dump codegen and compile time in BenchmarkQueryTest

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6bf5f01  [SPARK-31477][SQL] Dump codegen and compile time in BenchmarkQueryTest
6bf5f01 is described below

commit 6bf5f01a4a8b7708ce563e0a0e9a49e8ff89c71e
Author: gatorsmile <ga...@gmail.com>
AuthorDate: Sat Apr 18 20:59:45 2020 +0900

    [SPARK-31477][SQL] Dump codegen and compile time in BenchmarkQueryTest
    
    ### What changes were proposed in this pull request?
    This PR is to dump the codegen and compilation time for benchmark query tests.
    
    ### Why are the changes needed?
    Measure the codegen and compilation time costs in TPC-DS queries
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    Manual test in my local laptop:
    ```
    23:13:12.845 WARN org.apache.spark.sql.TPCDSQuerySuite:
    === Metrics of Whole-stage Codegen ===
    Total code generation time: 21.275102261 seconds
    Total compilation time: 12.223771828 seconds
    ```
    
    Closes #28252 from gatorsmile/testMastercode.
    
    Authored-by: gatorsmile <ga...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../sql/catalyst/expressions/codegen/CodeGenerator.scala    |  2 +-
 .../apache/spark/sql/execution/WholeStageCodegenExec.scala  |  2 +-
 .../scala/org/apache/spark/sql/BenchmarkQueryTest.scala     | 13 +++++++++++++
 .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 13 +++++++------
 4 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 3042a27..1cc7836 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1324,7 +1324,7 @@ object CodeGenerator extends Logging {
 
   // Reset compile time.
   // Visible for testing
-  def resetCompileTime: Unit = _compileTime.reset()
+  def resetCompileTime(): Unit = _compileTime.reset()
 
   /**
    * Compile the Java source code into a Java class, using Janino.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 9f6e4fc..0244542 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -586,7 +586,7 @@ object WholeStageCodegenExec {
 
   // Reset generation time of Java source code.
   // Visible for testing
-  def resetCodeGenTime: Unit = _codeGenTime.set(0L)
+  def resetCodeGenTime(): Unit = _codeGenTime.set(0L)
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index 07afd41..2c3b37a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
@@ -36,7 +37,17 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
   protected override def afterAll(): Unit = {
     try {
       // For debugging dump some statistics about how much time was spent in various optimizer rules
+      // code generation, and compilation.
       logWarning(RuleExecutor.dumpTimeSpent())
+      val codeGenTime = WholeStageCodegenExec.codeGenTime.toDouble / NANOS_PER_SECOND
+      val compileTime = CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND
+      val codegenInfo =
+        s"""
+           |=== Metrics of Whole-stage Codegen ===
+           |Total code generation time: $codeGenTime seconds
+           |Total compile time: $compileTime seconds
+         """.stripMargin
+      logWarning(codegenInfo)
       spark.sessionState.catalog.reset()
     } finally {
       super.afterAll()
@@ -46,6 +57,8 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
   override def beforeAll(): Unit = {
     super.beforeAll()
     RuleExecutor.resetMetrics()
+    CodeGenerator.resetCompileTime()
+    WholeStageCodegenExec.resetCodeGenTime()
   }
 
   protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index bc9b4d8..2b977e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -683,8 +683,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
     // Add Locale setting
     Locale.setDefault(Locale.US)
     RuleExecutor.resetMetrics()
-    CodeGenerator.resetCompileTime
-    WholeStageCodegenExec.resetCodeGenTime
+    CodeGenerator.resetCompileTime()
+    WholeStageCodegenExec.resetCodeGenTime()
   }
 
   override def afterAll(): Unit = {
@@ -696,12 +696,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       // For debugging dump some statistics about how much time was spent in various optimizer rules
       logWarning(RuleExecutor.dumpTimeSpent())
 
-      val generateJavaTime = WholeStageCodegenExec.codeGenTime
+      val codeGenTime = WholeStageCodegenExec.codeGenTime.toDouble / NANOS_PER_SECOND
+      val compileTime = CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND
       val codegenInfo =
         s"""
-           |=== Metrics of Whole-Stage Codegen ===
-           |Total code generation time: ${generateJavaTime.toDouble / NANOS_PER_SECOND} seconds
-           |Total compile time: ${CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND} seconds
+           |=== Metrics of Whole-stage Codegen ===
+           |Total code generation time: $codeGenTime seconds
+           |Total compile time: $compileTime seconds
          """.stripMargin
       logWarning(codegenInfo)
     } finally {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org