You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/12/01 05:56:47 UTC
spark git commit: [SPARK-12018][SQL] Refactor common subexpression
elimination code
Repository: spark
Updated Branches:
refs/heads/master f73379be2 -> 9693b0d5a
[SPARK-12018][SQL] Refactor common subexpression elimination code
JIRA: https://issues.apache.org/jira/browse/SPARK-12018
The code of common subexpression elimination can be factored and simplified. Some unnecessary variables can be removed.
Author: Liang-Chi Hsieh <vi...@appier.com>
Closes #10009 from viirya/refactor-subexpr-eliminate.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9693b0d5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9693b0d5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9693b0d5
Branch: refs/heads/master
Commit: 9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b
Parents: f73379b
Author: Liang-Chi Hsieh <vi...@appier.com>
Authored: Mon Nov 30 20:56:42 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Nov 30 20:56:42 2015 -0800
----------------------------------------------------------------------
.../sql/catalyst/expressions/Expression.scala | 10 ++----
.../expressions/codegen/CodeGenerator.scala | 34 ++++++--------------
.../codegen/GenerateUnsafeProjection.scala | 4 +--
3 files changed, 14 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 169435a..b55d365 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -94,13 +94,9 @@ abstract class Expression extends TreeNode[Expression] {
def gen(ctx: CodeGenContext): GeneratedExpressionCode = {
ctx.subExprEliminationExprs.get(this).map { subExprState =>
// This expression is repeated meaning the code to evaluated has already been added
- // as a function, `subExprState.fnName`. Just call that.
- val code =
- s"""
- |/* $this */
- |${subExprState.fnName}(${ctx.INPUT_ROW});
- """.stripMargin.trim
- GeneratedExpressionCode(code, subExprState.code.isNull, subExprState.code.value)
+ // as a function and called in advance. Just use it.
+ val code = s"/* $this */"
+ GeneratedExpressionCode(code, subExprState.isNull, subExprState.value)
}.getOrElse {
val isNull = ctx.freshName("isNull")
val primitive = ctx.freshName("primitive")
http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 2f3d6ae..440c7d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -104,16 +104,13 @@ class CodeGenContext {
val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
// State used for subexpression elimination.
- case class SubExprEliminationState(
- isLoaded: String,
- code: GeneratedExpressionCode,
- fnName: String)
+ case class SubExprEliminationState(isNull: String, value: String)
// Foreach expression that is participating in subexpression elimination, the state to use.
val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
- // The collection of isLoaded variables that need to be reset on each row.
- val subExprIsLoadedVariables = mutable.ArrayBuffer.empty[String]
+ // The collection of sub-exression result resetting methods that need to be called on each row.
+ val subExprResetVariables = mutable.ArrayBuffer.empty[String]
final val JAVA_BOOLEAN = "boolean"
final val JAVA_BYTE = "byte"
@@ -408,7 +405,6 @@ class CodeGenContext {
val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
commonExprs.foreach(e => {
val expr = e.head
- val isLoaded = freshName("isLoaded")
val isNull = freshName("isNull")
val value = freshName("value")
val fnName = freshName("evalExpr")
@@ -417,18 +413,12 @@ class CodeGenContext {
val code = expr.gen(this)
val fn =
s"""
- |private void $fnName(InternalRow ${INPUT_ROW}) {
- | if (!$isLoaded) {
- | ${code.code.trim}
- | $isLoaded = true;
- | $isNull = ${code.isNull};
- | $value = ${code.value};
- | }
+ |private void $fnName(InternalRow $INPUT_ROW) {
+ | ${code.code.trim}
+ | $isNull = ${code.isNull};
+ | $value = ${code.value};
|}
""".stripMargin
- code.code = fn
- code.isNull = isNull
- code.value = value
addNewFunction(fnName, fn)
@@ -448,18 +438,12 @@ class CodeGenContext {
// 2. Less code.
// Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
// at least two nodes) as the cost of doing it is expected to be low.
-
- // Maintain the loaded value and isNull as member variables. This is necessary if the codegen
- // function is split across multiple functions.
- // TODO: maintaining this as a local variable probably allows the compiler to do better
- // optimizations.
- addMutableState("boolean", isLoaded, s"$isLoaded = false;")
addMutableState("boolean", isNull, s"$isNull = false;")
addMutableState(javaType(expr.dataType), value,
s"$value = ${defaultValue(expr.dataType)};")
- subExprIsLoadedVariables += isLoaded
- val state = SubExprEliminationState(isLoaded, code, fnName)
+ subExprResetVariables += s"$fnName($INPUT_ROW);"
+ val state = SubExprEliminationState(isNull, value)
e.foreach(subExprEliminationExprs.put(_, state))
})
}
http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7b6c937..68005af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -287,8 +287,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val holderClass = classOf[BufferHolder].getName
ctx.addMutableState(holderClass, bufferHolder, s"this.$bufferHolder = new $holderClass();")
- // Reset the isLoaded flag for each row.
- val subexprReset = ctx.subExprIsLoadedVariables.map { v => s"${v} = false;" }.mkString("\n")
+ // Reset the subexpression values for each row.
+ val subexprReset = ctx.subExprResetVariables.mkString("\n")
val code =
s"""
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org