You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/12 23:22:42 UTC
[spark] branch branch-2.4 updated: [SPARK-28361][SQL][TEST] Test
equality of generated code with id in class name
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 1a6a67f [SPARK-28361][SQL][TEST] Test equality of generated code with id in class name
1a6a67f is described below
commit 1a6a67f526e3dd018e47dcd7a6b74e6e3a57fe42
Author: gatorsmile <ga...@gmail.com>
AuthorDate: Fri Jul 12 16:06:44 2019 -0700
[SPARK-28361][SQL][TEST] Test equality of generated code with id in class name
A code gen test in WholeStageCodeGenSuite was flaky because it used the codegen metrics class to test if the generated code for equivalent plans was identical under a particular flag. This patch switches the test to compare the generated code directly.
N/A
Closes #25131 from gatorsmile/WholeStageCodegenSuite.
Authored-by: gatorsmile <ga...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit 60b89cf8097ff583a29a6a19f1db4afa780f3109)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../sql/execution/WholeStageCodegenSuite.scala | 37 +++++++++++-----------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 3c9a090..1cd5f5a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -17,8 +17,7 @@
package org.apache.spark.sql.execution
-import org.apache.spark.metrics.source.CodegenMetrics
-import org.apache.spark.sql.{QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator}
import org.apache.spark.sql.execution.aggregate.HashAggregateExec
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
@@ -168,10 +167,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
.select("int")
val plan = df.queryExecution.executedPlan
- assert(!plan.find(p =>
+ assert(plan.find(p =>
p.isInstanceOf[WholeStageCodegenExec] &&
p.asInstanceOf[WholeStageCodegenExec].child.children(0)
- .isInstanceOf[SortMergeJoinExec]).isDefined)
+ .isInstanceOf[SortMergeJoinExec]).isEmpty)
assert(df.collect() === Array(Row(1), Row(2)))
}
}
@@ -204,6 +203,13 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
wholeStageCodeGenExec.get.asInstanceOf[WholeStageCodegenExec].doCodeGen()._2
}
+ def genCode(ds: Dataset[_]): Seq[CodeAndComment] = {
+ val plan = ds.queryExecution.executedPlan
+ val wholeStageCodeGenExecs = plan.collect { case p: WholeStageCodegenExec => p }
+ assert(wholeStageCodeGenExecs.nonEmpty, "WholeStageCodegenExec is expected")
+ wholeStageCodeGenExecs.map(_.doCodeGen()._2)
+ }
+
ignore("SPARK-21871 check if we can get large code size when compiling too long functions") {
val codeWithShortFunctions = genGroupByCode(3)
val (_, maxCodeSize1) = CodeGenerator.compile(codeWithShortFunctions)
@@ -283,9 +289,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
val df = spark.range(100)
val join = df.join(df, "id")
val plan = join.queryExecution.executedPlan
- assert(!plan.find(p =>
+ assert(plan.find(p =>
p.isInstanceOf[WholeStageCodegenExec] &&
- p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0).isDefined,
+ p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0).isEmpty,
"codegen stage IDs should be preserved through ReuseExchange")
checkAnswer(join, df.toDF)
}
@@ -295,18 +301,13 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
import testImplicits._
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") {
- val bytecodeSizeHisto = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE
-
- // the same query run twice should hit the codegen cache
- spark.range(3).select('id + 2).collect
- val after1 = bytecodeSizeHisto.getCount
- spark.range(3).select('id + 2).collect
- val after2 = bytecodeSizeHisto.getCount // same query shape as above, deliberately
- // bytecodeSizeHisto's count is always monotonically increasing if new compilation to
- // bytecode had occurred. If the count stayed the same that means we've got a cache hit.
- assert(after1 == after2, "Should hit codegen cache. No new compilation to bytecode expected")
-
- // a different query can result in codegen cache miss, that's by design
+ // the same query run twice should produce identical code, which would imply a hit in
+ // the generated code cache.
+ val ds1 = spark.range(3).select('id + 2)
+ val code1 = genCode(ds1)
+ val ds2 = spark.range(3).select('id + 2)
+ val code2 = genCode(ds2) // same query shape as above, deliberately
+ assert(code1 == code2, "Should produce same code")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org