You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/10/01 00:32:04 UTC
spark git commit: [SPARK-17703][SQL] Add unnamed version of
addReferenceObj for minor objects.
Repository: spark
Updated Branches:
refs/heads/master f327e1686 -> 81455a9cd
[SPARK-17703][SQL] Add unnamed version of addReferenceObj for minor objects.
## What changes were proposed in this pull request?
There are many minor objects in references, which are extracted to the generated class field, e.g. `errMsg` in `GetExternalRowField` or `ValidateExternalType`, but number of fields in class is limited so we should reduce the number.
This pr adds unnamed version of `addReferenceObj` for these minor objects not to store the object into field but refer it from the `references` field at the time of use.
## How was this patch tested?
Existing tests.
Author: Takuya UESHIN <ue...@happy-camper.st>
Closes #15276 from ueshin/issues/SPARK-17703.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/81455a9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/81455a9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/81455a9c
Branch: refs/heads/master
Commit: 81455a9cd963098613bad10182e3fafc83a6e352
Parents: f327e16
Author: Takuya UESHIN <ue...@happy-camper.st>
Authored: Fri Sep 30 17:31:59 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Sep 30 17:31:59 2016 -0700
----------------------------------------------------------------------
.../catalyst/expressions/codegen/CodeGenerator.scala | 15 +++++++++++++++
.../apache/spark/sql/catalyst/expressions/misc.scala | 5 ++++-
.../sql/catalyst/expressions/objects/objects.scala | 12 +++++++++---
3 files changed, 28 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 33b9b80..cb808e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -85,6 +85,21 @@ class CodegenContext {
val references: mutable.ArrayBuffer[Any] = new mutable.ArrayBuffer[Any]()
/**
+ * Add an object to `references`.
+ *
+ * Returns the code to access it.
+ *
+ * This is for minor objects not to store the object into field but refer it from the references
+ * field at the time of use because number of fields in class is limited so we should reduce it.
+ */
+ def addReferenceObj(obj: Any): String = {
+ val idx = references.length
+ references += obj
+ val clsName = obj.getClass.getName
+ s"(($clsName) references[$idx])"
+ }
+
+ /**
* Add an object to `references`, create a class member to access it.
*
* Returns the name of class member.
http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 92f8fb8..dbb52a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -517,7 +517,10 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val eval = child.genCode(ctx)
- val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+
+ // Use unnamed reference that doesn't create a local field here to reduce the number of fields
+ // because errMsgField is used only when the value is null or false.
+ val errMsgField = ctx.addReferenceObj(errMsg)
ExprCode(code = s"""${eval.code}
|if (${eval.isNull} || !${eval.value}) {
| throw new RuntimeException($errMsgField);
http://git-wip-us.apache.org/repos/asf/spark/blob/81455a9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index faf8fec..50e2ac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -906,7 +906,9 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String])
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val childGen = child.genCode(ctx)
- val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+ // Use unnamed reference that doesn't create a local field here to reduce the number of fields
+ // because errMsgField is used only when the value is null.
+ val errMsgField = ctx.addReferenceObj(errMsg)
val code = s"""
${childGen.code}
@@ -941,7 +943,9 @@ case class GetExternalRowField(
private val errMsg = s"The ${index}th field '$fieldName' of input row cannot be null."
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+ // Use unnamed reference that doesn't create a local field here to reduce the number of fields
+ // because errMsgField is used only when the field is null.
+ val errMsgField = ctx.addReferenceObj(errMsg)
val row = child.genCode(ctx)
val code = s"""
${row.code}
@@ -979,7 +983,9 @@ case class ValidateExternalType(child: Expression, expected: DataType)
private val errMsg = s" is not a valid external type for schema of ${expected.simpleString}"
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
+ // Use unnamed reference that doesn't create a local field here to reduce the number of fields
+ // because errMsgField is used only when the type doesn't match.
+ val errMsgField = ctx.addReferenceObj(errMsg)
val input = child.genCode(ctx)
val obj = input.value
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org