You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/01/10 11:35:58 UTC
spark git commit: [SPARK-16845][SQL]
`GeneratedClass$SpecificOrdering` grows beyond 64 KB
Repository: spark
Updated Branches:
refs/heads/master b0319c2ec -> acfc5f354
[SPARK-16845][SQL] `GeneratedClass$SpecificOrdering` grows beyond 64 KB
## What changes were proposed in this pull request?
Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB.
``` scala
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* ..... */ ...
/* 10969 */ private int compare(InternalRow a, InternalRow b) {
/* 10970 */ InternalRow i = null; // Holds current row being evaluated.
/* 10971 */
/* 1.... */ code for comparing field0
/* 1.... */ code for comparing field1
/* 1.... */ ...
/* 1.... */ code for comparing field449
/* 15012 */
/* 15013 */ return 0;
/* 15014 */ }
/* 15015 */ }
```
This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like:
``` scala
/* 001 */ public SpecificOrdering generate(Object[] references) {
/* 002 */ return new SpecificOrdering(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* 006 */
/* 007 */ ...
/* 1.... */
/* 11290 */ private int compare_0(InternalRow a, InternalRow b) {
/* 11291 */ InternalRow i = null; // Holds current row being evaluated.
/* 11292 */
/* 11293 */ i = a;
/* 11294 */ boolean isNullA;
/* 11295 */ UTF8String primitiveA;
/* 11296 */ {
/* 11297 */
/* 11298 */ Object obj = ((Expression) references[0]).eval(null);
/* 11299 */ UTF8String value = (UTF8String) obj;
/* 11300 */ isNullA = false;
/* 11301 */ primitiveA = value;
/* 11302 */ }
/* 11303 */ i = b;
/* 11304 */ boolean isNullB;
/* 11305 */ UTF8String primitiveB;
/* 11306 */ {
/* 11307 */
/* 11308 */ Object obj = ((Expression) references[0]).eval(null);
/* 11309 */ UTF8String value = (UTF8String) obj;
/* 11310 */ isNullB = false;
/* 11311 */ primitiveB = value;
/* 11312 */ }
/* 11313 */ if (isNullA && isNullB) {
/* 11314 */ // Nothing
/* 11315 */ } else if (isNullA) {
/* 11316 */ return -1;
/* 11317 */ } else if (isNullB) {
/* 11318 */ return 1;
/* 11319 */ } else {
/* 11320 */ int comp = primitiveA.compare(primitiveB);
/* 11321 */ if (comp != 0) {
/* 11322 */ return comp;
/* 11323 */ }
/* 11324 */ }
/* 11325 */
/* 11326 */
/* 11327 */ i = a;
/* 11328 */ boolean isNullA1;
/* 11329 */ UTF8String primitiveA1;
/* 11330 */ {
/* 11331 */
/* 11332 */ Object obj1 = ((Expression) references[1]).eval(null);
/* 11333 */ UTF8String value1 = (UTF8String) obj1;
/* 11334 */ isNullA1 = false;
/* 11335 */ primitiveA1 = value1;
/* 11336 */ }
/* 11337 */ i = b;
/* 11338 */ boolean isNullB1;
/* 11339 */ UTF8String primitiveB1;
/* 11340 */ {
/* 11341 */
/* 11342 */ Object obj1 = ((Expression) references[1]).eval(null);
/* 11343 */ UTF8String value1 = (UTF8String) obj1;
/* 11344 */ isNullB1 = false;
/* 11345 */ primitiveB1 = value1;
/* 11346 */ }
/* 11347 */ if (isNullA1 && isNullB1) {
/* 11348 */ // Nothing
/* 11349 */ } else if (isNullA1) {
/* 11350 */ return -1;
/* 11351 */ } else if (isNullB1) {
/* 11352 */ return 1;
/* 11353 */ } else {
/* 11354 */ int comp = primitiveA1.compare(primitiveB1);
/* 11355 */ if (comp != 0) {
/* 11356 */ return comp;
/* 11357 */ }
/* 11358 */ }
/* 1.... */
/* 1.... */ ...
/* 1.... */
/* 12652 */ return 0;
/* 12653 */ }
/* 1.... */
/* 1.... */ ...
/* 15387 */
/* 15388 */ public int compare(InternalRow a, InternalRow b) {
/* 15389 */
/* 15390 */ int comp_0 = compare_0(a, b);
/* 15391 */ if (comp_0 != 0) {
/* 15392 */ return comp_0;
/* 15393 */ }
/* 15394 */
/* 15395 */ int comp_1 = compare_1(a, b);
/* 15396 */ if (comp_1 != 0) {
/* 15397 */ return comp_1;
/* 15398 */ }
/* 1.... */
/* 1.... */ ...
/* 1.... */
/* 15450 */ return 0;
/* 15451 */ }
/* 15452 */ }
```
## How was this patch tested?
- a new added test case which
- would fail prior to this patch
- would pass with this patch
- ordering correctness should already be covered by existing tests like those in `OrderingSuite`
## Acknowledgement
A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin.
Author: Liwei Lin <lw...@gmail.com>
Author: Takuya UESHIN <ue...@happy-camper.st>
Author: Takuya Ueshin <ue...@happy-camper.st>
Closes #15480 from lw-lin/spec-ordering-64k-.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acfc5f35
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acfc5f35
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acfc5f35
Branch: refs/heads/master
Commit: acfc5f354332107cc744fb636e3730f6fc48b2fe
Parents: b0319c2
Author: Liwei Lin <lw...@gmail.com>
Authored: Tue Jan 10 19:35:46 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Tue Jan 10 19:35:46 2017 +0800
----------------------------------------------------------------------
.../expressions/codegen/CodeGenerator.scala | 27 ++++++++++++++++----
.../expressions/codegen/GenerateOrdering.scala | 27 ++++++++++++++++++--
.../catalyst/expressions/OrderingSuite.scala | 10 ++++++++
3 files changed, 57 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/acfc5f35/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d7746ca..f8f868b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -640,8 +640,24 @@ class CodegenContext {
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
}
- private def splitExpressions(
- expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = {
+ /**
+ * Splits the generated code of expressions into multiple functions, because function has
+ * 64kb code size limit in JVM
+ *
+ * @param expressions the codes to evaluate expressions.
+ * @param funcName the split function name base.
+ * @param arguments the list of (type, name) of the arguments of the split function.
+ * @param returnType the return type of the split function.
+ * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
+ * @param foldFunctions folds the split function calls.
+ */
+ def splitExpressions(
+ expressions: Seq[String],
+ funcName: String,
+ arguments: Seq[(String, String)],
+ returnType: String = "void",
+ makeSplitFunction: String => String = identity,
+ foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
val blocks = new ArrayBuffer[String]()
val blockBuilder = new StringBuilder()
for (code <- expressions) {
@@ -662,18 +678,19 @@ class CodegenContext {
blocks.head
} else {
val func = freshName(funcName)
+ val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
val functions = blocks.zipWithIndex.map { case (body, i) =>
val name = s"${func}_$i"
val code = s"""
- |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) {
- | $body
+ |private $returnType $name($argString) {
+ | ${makeSplitFunction(body)}
|}
""".stripMargin
addNewFunction(name, code)
name
}
- functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n")
+ foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/acfc5f35/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 1cef956..b7335f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}
"""
- }.mkString("\n")
- comparisons
+ }
+
+ ctx.splitExpressions(
+ expressions = comparisons,
+ funcName = "compare",
+ arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
+ returnType = "int",
+ makeSplitFunction = { body =>
+ s"""
+ InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
+ $body
+ return 0;
+ """
+ },
+ foldFunctions = { funCalls =>
+ funCalls.zipWithIndex.map { case (funCall, i) =>
+ val comp = ctx.freshName("comp")
+ s"""
+ int $comp = $funCall;
+ if ($comp != 0) {
+ return $comp;
+ }
+ """
+ }.mkString
+ })
}
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
http://git-wip-us.apache.org/repos/asf/spark/blob/acfc5f35/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 8cc2ab4..190fab5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}
+
+ test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
+ val sortOrder = Literal("abc").asc
+
+ // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
+ GenerateOrdering.generate(Array.fill(40)(sortOrder))
+
+ // verify that we can support up to 5000 ordering comparisons, which should be sufficient
+ GenerateOrdering.generate(Array.fill(5000)(sortOrder))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org