You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/03/09 21:48:16 UTC
spark git commit: [SPARK-23628][SQL][BACKPORT-2.3] calculateParamLength should not return 1 + num of expressions

Repository: spark
Updated Branches:
  refs/heads/branch-2.3 bc5ce0476 -> 3ec25d5a8


[SPARK-23628][SQL][BACKPORT-2.3] calculateParamLength should not return 1 + num of expressions

## What changes were proposed in this pull request?

Backport of ea480990e726aed59750f1cea8d40adba56d991a to branch 2.3.

## How was this patch tested?

added UT

cc cloud-fan hvanhovell

Author: Marco Gaido <ma...@gmail.com>

Closes #20783 from mgaido91/SPARK-23628_2.3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ec25d5a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ec25d5a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ec25d5a

Branch: refs/heads/branch-2.3
Commit: 3ec25d5a803888e5e24a47a511e9d88c423c5310
Parents: bc5ce04
Author: Marco Gaido <ma...@gmail.com>
Authored: Fri Mar 9 13:48:12 2018 -0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Fri Mar 9 13:48:12 2018 -0800

----------------------------------------------------------------------
 .../expressions/codegen/CodeGenerator.scala         |  7 ++++---
 .../catalyst/expressions/CodeGenerationSuite.scala  |  6 ++++++
 .../sql/execution/WholeStageCodegenSuite.scala      | 16 ++++++++--------
 3 files changed, 18 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3ec25d5a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4dcbb70..a54af03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1253,14 +1253,15 @@ class CodegenContext {
    */
   def calculateParamLength(params: Seq[Expression]): Int = {
     def paramLengthForExpr(input: Expression): Int = {
-      // For a nullable expression, we need to pass in an extra boolean parameter.
-      (if (input.nullable) 1 else 0) + javaType(input.dataType) match {
+      val javaParamLength = javaType(input.dataType) match {
         case JAVA_LONG | JAVA_DOUBLE => 2
         case _ => 1
       }
+      // For a nullable expression, we need to pass in an extra boolean parameter.
+      (if (input.nullable) 1 else 0) + javaParamLength
     }
     // Initial value is 1 for `this`.
-    1 + params.map(paramLengthForExpr(_)).sum
+    1 + params.map(paramLengthForExpr).sum
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/3ec25d5a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 676ba39..3958bff 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -436,4 +436,10 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     ctx.addImmutableStateIfNotExists("String", mutableState2)
     assert(ctx.inlinedMutableStates.length == 2)
   }
+
+  test("SPARK-23628: calculateParamLength should compute properly the param length") {
+    val ctx = new CodegenContext
+    assert(ctx.calculateParamLength(Seq.range(0, 100).map(Literal(_))) == 101)
+    assert(ctx.calculateParamLength(Seq.range(0, 100).map(x => Literal(x.toLong))) == 201)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/3ec25d5a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index ef16292..0fb9dd2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.expressions.scalalang.typed
-import org.apache.spark.sql.functions.{avg, broadcast, col, max}
+import org.apache.spark.sql.functions.{avg, broadcast, col, lit, max}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
@@ -249,12 +249,12 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
   }
 
   test("Skip splitting consume function when parameter number exceeds JVM limit") {
-    import testImplicits._
-
-    Seq((255, false), (254, true)).foreach { case (columnNum, hasSplit) =>
+    // since every field is nullable we have 2 params for each input column (one for the value
+    // and one for the isNull variable)
+    Seq((128, false), (127, true)).foreach { case (columnNum, hasSplit) =>
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        spark.range(10).select(Seq.tabulate(columnNum) {i => ('id + i).as(s"c$i")} : _*)
+        spark.range(10).select(Seq.tabulate(columnNum) {i => lit(i).as(s"c$i")} : _*)
           .write.mode(SaveMode.Overwrite).parquet(path)
 
         withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "255",
@@ -263,10 +263,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
           val df = spark.read.parquet(path).selectExpr(projection: _*)
 
           val plan = df.queryExecution.executedPlan
-          val wholeStageCodeGenExec = plan.find(p => p match {
-            case wp: WholeStageCodegenExec => true
+          val wholeStageCodeGenExec = plan.find {
+            case _: WholeStageCodegenExec => true
             case _ => false
-          })
+          }
           assert(wholeStageCodeGenExec.isDefined)
           val code = wholeStageCodeGenExec.get.asInstanceOf[WholeStageCodegenExec].doCodeGen()._2
           assert(code.body.contains("project_doConsume") == hasSplit)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org