You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2017/02/10 09:50:11 UTC
spark git commit: [SPARK-19512][BACKPORT-2.1][SQL] codegen for
compare structs fails #16852
Repository: spark
Updated Branches:
refs/heads/branch-2.1 a3d5300a0 -> ff5818b8c
[SPARK-19512][BACKPORT-2.1][SQL] codegen for compare structs fails #16852
## What changes were proposed in this pull request?
Set currentVars to null in GenerateOrdering.genComparisons before genCode is called. genCode ignores INPUT_ROW if currentVars is not null and in genComparisons we want it to use INPUT_ROW.
## How was this patch tested?
Added test with 2 queries in WholeStageCodegenSuite
Author: Bogdan Raducanu <bo...@gmail.com>
Closes #16875 from bogdanrdc/SPARK-19512-2.1.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff5818b8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff5818b8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff5818b8
Branch: refs/heads/branch-2.1
Commit: ff5818b8cee7c718ef5bdef125c8d6971d64acde
Parents: a3d5300
Author: Bogdan Raducanu <bo...@databricks.com>
Authored: Fri Feb 10 10:50:07 2017 +0100
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Feb 10 10:50:07 2017 +0100
----------------------------------------------------------------------
.../catalyst/expressions/codegen/CodeGenerator.scala | 2 --
.../expressions/codegen/GenerateOrdering.scala | 14 ++++++++++++--
.../spark/sql/execution/WholeStageCodegenSuite.scala | 12 ++++++++++++
3 files changed, 24 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 891c1aa..683b9cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -555,7 +555,6 @@ class CodegenContext {
addNewFunction(compareFunc, funcCode)
s"this.$compareFunc($c1, $c2)"
case schema: StructType =>
- INPUT_ROW = "i"
val comparisons = GenerateOrdering.genComparisons(this, schema)
val compareFunc = freshName("compareStruct")
val funcCode: String =
@@ -566,7 +565,6 @@ class CodegenContext {
if (a instanceof UnsafeRow && b instanceof UnsafeRow && a.equals(b)) {
return 0;
}
- InternalRow i = null;
$comparisons
return 0;
}
http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index b7335f1..f7fc2d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -73,7 +73,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
*/
def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = {
val comparisons = ordering.map { order =>
+ val oldCurrentVars = ctx.currentVars
+ ctx.INPUT_ROW = "i"
+ // to use INPUT_ROW we must make sure currentVars is null
+ ctx.currentVars = null
val eval = order.child.genCode(ctx)
+ ctx.currentVars = oldCurrentVars
val asc = order.isAscending
val isNullA = ctx.freshName("isNullA")
val primitiveA = ctx.freshName("primitiveA")
@@ -119,7 +124,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
"""
}
- ctx.splitExpressions(
+ val code = ctx.splitExpressions(
expressions = comparisons,
funcName = "compare",
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
@@ -142,6 +147,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
"""
}.mkString
})
+ // make sure INPUT_ROW is declared even if splitExpressions
+ // returns an inlined block
+ s"""
+ |InternalRow ${ctx.INPUT_ROW} = null;
+ |$code
+ """.stripMargin
}
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
@@ -165,7 +176,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
${ctx.declareAddedFunctions()}
public int compare(InternalRow a, InternalRow b) {
- InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
$comparisons
return 0;
}
http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index f26e5e7..9f6ef03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -113,4 +113,16 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext {
p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0)))
}
+
+ test("SPARK-19512 codegen for comparing structs is incorrect") {
+ // this would raise CompileException before the fix
+ spark.range(10)
+ .selectExpr("named_struct('a', id) as col1", "named_struct('a', id+2) as col2")
+ .filter("col1 = col2").count()
+ // this would raise java.lang.IndexOutOfBoundsException before the fix
+ spark.range(10)
+ .selectExpr("named_struct('a', id, 'b', id) as col1",
+ "named_struct('a',id+2, 'b',id+2) as col2")
+ .filter("col1 = col2").count()
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org