You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/02/21 03:26:12 UTC
spark git commit: [SPARK-23424][SQL] Add codegenStageId in comment
Repository: spark
Updated Branches:
refs/heads/master 601d653bf -> 95e25ed1a
[SPARK-23424][SQL] Add codegenStageId in comment
## What changes were proposed in this pull request?
This PR always adds `codegenStageId` in comment of the generated class. This is a replication of #20419 for post-Spark 2.3.
Closes #20419
```
/* 001 */ public Object generate(Object[] references) {
/* 002 */ return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */ private Object[] references;
...
```
## How was this patch tested?
Existing tests
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Closes #20612 from kiszk/SPARK-23424.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/95e25ed1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/95e25ed1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/95e25ed1
Branch: refs/heads/master
Commit: 95e25ed1a8b56937345eff637c0032aea85a503d
Parents: 601d653
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Wed Feb 21 11:26:06 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Wed Feb 21 11:26:06 2018 +0800
----------------------------------------------------------------------
.../expressions/codegen/CodeGenerator.scala | 21 +++++++++++++++++---
.../sql/execution/WholeStageCodegenExec.scala | 4 +++-
2 files changed, 21 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/95e25ed1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 31ba29a..60a6f50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1232,14 +1232,29 @@ class CodegenContext {
/**
* Register a comment and return the corresponding place holder
+ *
+ * @param placeholderId an optionally specified identifier for the comment's placeholder.
+ * The caller should make sure this identifier is unique within the
+ * compilation unit. If this argument is not specified, a fresh identifier
+ * will be automatically created and used as the placeholder.
+ * @param force whether to force registering the comments
*/
- def registerComment(text: => String): String = {
+ def registerComment(
+ text: => String,
+ placeholderId: String = "",
+ force: Boolean = false): String = {
// By default, disable comments in generated code because computing the comments themselves can
// be extremely expensive in certain cases, such as deeply-nested expressions which operate over
// inputs with wide schemas. For more details on the performance issues that motivated this
// flat, see SPARK-15680.
- if (SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) {
- val name = freshName("c")
+ if (force ||
+ SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) {
+ val name = if (placeholderId != "") {
+ assert(!placeHolderToComments.contains(placeholderId))
+ placeholderId
+ } else {
+ freshName("c")
+ }
val comment = if (text.contains("\n") || text.contains("\r")) {
text.split("(\r\n)|\r|\n").mkString("/**\n * ", "\n * ", "\n */")
} else {
http://git-wip-us.apache.org/repos/asf/spark/blob/95e25ed1/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 0e525b1..deb0a04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -540,7 +540,9 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
${ctx.registerComment(
s"""Codegend pipeline for stage (id=$codegenStageId)
- |${this.treeString.trim}""".stripMargin)}
+ |${this.treeString.trim}""".stripMargin,
+ "wsc_codegenPipeline")}
+ ${ctx.registerComment(s"codegenStageId=$codegenStageId", "wsc_codegenStageId", true)}
final class $className extends ${classOf[BufferedRowIterator].getName} {
private Object[] references;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org