You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/04/18 12:01:21 UTC
[spark] branch master updated: [SPARK-31477][SQL] Dump codegen and
compile time in BenchmarkQueryTest
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6bf5f01 [SPARK-31477][SQL] Dump codegen and compile time in BenchmarkQueryTest
6bf5f01 is described below
commit 6bf5f01a4a8b7708ce563e0a0e9a49e8ff89c71e
Author: gatorsmile <ga...@gmail.com>
AuthorDate: Sat Apr 18 20:59:45 2020 +0900
[SPARK-31477][SQL] Dump codegen and compile time in BenchmarkQueryTest
### What changes were proposed in this pull request?
This PR is to dump the codegen and compilation time for benchmark query tests.
### Why are the changes needed?
Measure the codegen and compilation time costs in TPC-DS queries
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Manual test in my local laptop:
```
23:13:12.845 WARN org.apache.spark.sql.TPCDSQuerySuite:
=== Metrics of Whole-stage Codegen ===
Total code generation time: 21.275102261 seconds
Total compilation time: 12.223771828 seconds
```
Closes #28252 from gatorsmile/testMastercode.
Authored-by: gatorsmile <ga...@gmail.com>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../sql/catalyst/expressions/codegen/CodeGenerator.scala | 2 +-
.../apache/spark/sql/execution/WholeStageCodegenExec.scala | 2 +-
.../scala/org/apache/spark/sql/BenchmarkQueryTest.scala | 13 +++++++++++++
.../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 13 +++++++------
4 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 3042a27..1cc7836 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1324,7 +1324,7 @@ object CodeGenerator extends Logging {
// Reset compile time.
// Visible for testing
- def resetCompileTime: Unit = _compileTime.reset()
+ def resetCompileTime(): Unit = _compileTime.reset()
/**
* Compile the Java source code into a Java class, using Janino.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 9f6e4fc..0244542 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -586,7 +586,7 @@ object WholeStageCodegenExec {
// Reset generation time of Java source code.
// Visible for testing
- def resetCodeGenTime: Unit = _codeGenTime.set(0L)
+ def resetCodeGenTime(): Unit = _codeGenTime.set(0L)
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index 07afd41..2c3b37a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.util.Utils
@@ -36,7 +37,17 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
protected override def afterAll(): Unit = {
try {
// For debugging dump some statistics about how much time was spent in various optimizer rules
+ // code generation, and compilation.
logWarning(RuleExecutor.dumpTimeSpent())
+ val codeGenTime = WholeStageCodegenExec.codeGenTime.toDouble / NANOS_PER_SECOND
+ val compileTime = CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND
+ val codegenInfo =
+ s"""
+ |=== Metrics of Whole-stage Codegen ===
+ |Total code generation time: $codeGenTime seconds
+ |Total compile time: $compileTime seconds
+ """.stripMargin
+ logWarning(codegenInfo)
spark.sessionState.catalog.reset()
} finally {
super.afterAll()
@@ -46,6 +57,8 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
override def beforeAll(): Unit = {
super.beforeAll()
RuleExecutor.resetMetrics()
+ CodeGenerator.resetCompileTime()
+ WholeStageCodegenExec.resetCodeGenTime()
}
protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index bc9b4d8..2b977e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -683,8 +683,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
// Add Locale setting
Locale.setDefault(Locale.US)
RuleExecutor.resetMetrics()
- CodeGenerator.resetCompileTime
- WholeStageCodegenExec.resetCodeGenTime
+ CodeGenerator.resetCompileTime()
+ WholeStageCodegenExec.resetCodeGenTime()
}
override def afterAll(): Unit = {
@@ -696,12 +696,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
// For debugging dump some statistics about how much time was spent in various optimizer rules
logWarning(RuleExecutor.dumpTimeSpent())
- val generateJavaTime = WholeStageCodegenExec.codeGenTime
+ val codeGenTime = WholeStageCodegenExec.codeGenTime.toDouble / NANOS_PER_SECOND
+ val compileTime = CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND
val codegenInfo =
s"""
- |=== Metrics of Whole-Stage Codegen ===
- |Total code generation time: ${generateJavaTime.toDouble / NANOS_PER_SECOND} seconds
- |Total compile time: ${CodeGenerator.compileTime.toDouble / NANOS_PER_SECOND} seconds
+ |=== Metrics of Whole-stage Codegen ===
+ |Total code generation time: $codeGenTime seconds
+ |Total compile time: $compileTime seconds
""".stripMargin
logWarning(codegenInfo)
} finally {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org