You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Kousuke Saruta (Jira)" <ji...@apache.org> on 2020/12/19 21:36:00 UTC

[jira] [Created] (SPARK-33853) EXPLAIN CODEGEN doesn't show subquery code

Kousuke Saruta created SPARK-33853:
--------------------------------------

             Summary: EXPLAIN CODEGEN doesn't show subquery code
                 Key: SPARK-33853
                 URL: https://issues.apache.org/jira/browse/SPARK-33853
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 3.0.1, 3.0.0, 3.1.0, 3.2.0
            Reporter: Kousuke Saruta
            Assignee: Kousuke Saruta


EXPLAIN CODEGEN doesn't show the corresponding code for subqueries.

{code}
spark.conf.set("spark.sql.adaptive.enabled", "false")
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")

scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
Found 1 WholeStageCodegen subtrees.
== Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); numInnerClasses:0) ==
*(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
:  +- Subquery scalar-subquery#3, [id=#24]
:     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
:        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
:           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L])
:              +- *(1) Range (1, 100, step=1, splits=12)
+- *(1) Scan OneRowRelation[]

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private scala.collection.Iterator rdd_input_0;
/* 010 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] project_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 011 */
/* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 013 */     this.references = references;
/* 014 */   }
/* 015 */
/* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 017 */     partitionIndex = index;
/* 018 */     this.inputs = inputs;
/* 019 */     rdd_input_0 = inputs[0];
/* 020 */     project_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 021 */
/* 022 */   }
/* 023 */
/* 024 */   private void project_doConsume_0() throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     project_mutableStateArray_0[0].reset();
/* 028 */
/* 029 */     if (false) {
/* 030 */       project_mutableStateArray_0[0].setNullAt(0);
/* 031 */     } else {
/* 032 */       project_mutableStateArray_0[0].write(0, 1L);
/* 033 */     }
/* 034 */     append((project_mutableStateArray_0[0].getRow()));
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   protected void processNext() throws java.io.IOException {
/* 039 */     while ( rdd_input_0.hasNext()) {
/* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
/* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 042 */       project_doConsume_0();
/* 043 */       if (shouldStop()) return;
/* 044 */     }
/* 045 */   }
/* 046 */
/* 047 */ }
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org