You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/12/01 05:56:47 UTC

spark git commit: [SPARK-12018][SQL] Refactor common subexpression elimination code

Repository: spark
Updated Branches:
  refs/heads/master f73379be2 -> 9693b0d5a


[SPARK-12018][SQL] Refactor common subexpression elimination code

JIRA: https://issues.apache.org/jira/browse/SPARK-12018

The code of common subexpression elimination can be factored and simplified. Some unnecessary variables can be removed.

Author: Liang-Chi Hsieh <vi...@appier.com>

Closes #10009 from viirya/refactor-subexpr-eliminate.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9693b0d5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9693b0d5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9693b0d5

Branch: refs/heads/master
Commit: 9693b0d5a55bc1d2da96f04fe2c6de59a8dfcc1b
Parents: f73379b
Author: Liang-Chi Hsieh <vi...@appier.com>
Authored: Mon Nov 30 20:56:42 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Nov 30 20:56:42 2015 -0800

----------------------------------------------------------------------
 .../sql/catalyst/expressions/Expression.scala   | 10 ++----
 .../expressions/codegen/CodeGenerator.scala     | 34 ++++++--------------
 .../codegen/GenerateUnsafeProjection.scala      |  4 +--
 3 files changed, 14 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 169435a..b55d365 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -94,13 +94,9 @@ abstract class Expression extends TreeNode[Expression] {
   def gen(ctx: CodeGenContext): GeneratedExpressionCode = {
     ctx.subExprEliminationExprs.get(this).map { subExprState =>
       // This expression is repeated meaning the code to evaluated has already been added
-      // as a function, `subExprState.fnName`. Just call that.
-      val code =
-        s"""
-           |/* $this */
-           |${subExprState.fnName}(${ctx.INPUT_ROW});
-         """.stripMargin.trim
-      GeneratedExpressionCode(code, subExprState.code.isNull, subExprState.code.value)
+      // as a function and called in advance. Just use it.
+      val code = s"/* $this */"
+      GeneratedExpressionCode(code, subExprState.isNull, subExprState.value)
     }.getOrElse {
       val isNull = ctx.freshName("isNull")
       val primitive = ctx.freshName("primitive")

http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 2f3d6ae..440c7d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -104,16 +104,13 @@ class CodeGenContext {
   val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
 
   // State used for subexpression elimination.
-  case class SubExprEliminationState(
-      isLoaded: String,
-      code: GeneratedExpressionCode,
-      fnName: String)
+  case class SubExprEliminationState(isNull: String, value: String)
 
   // Foreach expression that is participating in subexpression elimination, the state to use.
   val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
 
-  // The collection of isLoaded variables that need to be reset on each row.
-  val subExprIsLoadedVariables = mutable.ArrayBuffer.empty[String]
+  // The collection of sub-exression result resetting methods that need to be called on each row.
+  val subExprResetVariables = mutable.ArrayBuffer.empty[String]
 
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
@@ -408,7 +405,6 @@ class CodeGenContext {
     val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
     commonExprs.foreach(e => {
       val expr = e.head
-      val isLoaded = freshName("isLoaded")
       val isNull = freshName("isNull")
       val value = freshName("value")
       val fnName = freshName("evalExpr")
@@ -417,18 +413,12 @@ class CodeGenContext {
       val code = expr.gen(this)
       val fn =
         s"""
-           |private void $fnName(InternalRow ${INPUT_ROW}) {
-           |  if (!$isLoaded) {
-           |    ${code.code.trim}
-           |    $isLoaded = true;
-           |    $isNull = ${code.isNull};
-           |    $value = ${code.value};
-           |  }
+           |private void $fnName(InternalRow $INPUT_ROW) {
+           |  ${code.code.trim}
+           |  $isNull = ${code.isNull};
+           |  $value = ${code.value};
            |}
            """.stripMargin
-      code.code = fn
-      code.isNull = isNull
-      code.value = value
 
       addNewFunction(fnName, fn)
 
@@ -448,18 +438,12 @@ class CodeGenContext {
       //   2. Less code.
       // Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
       // at least two nodes) as the cost of doing it is expected to be low.
-
-      // Maintain the loaded value and isNull as member variables. This is necessary if the codegen
-      // function is split across multiple functions.
-      // TODO: maintaining this as a local variable probably allows the compiler to do better
-      // optimizations.
-      addMutableState("boolean", isLoaded, s"$isLoaded = false;")
       addMutableState("boolean", isNull, s"$isNull = false;")
       addMutableState(javaType(expr.dataType), value,
         s"$value = ${defaultValue(expr.dataType)};")
-      subExprIsLoadedVariables += isLoaded
 
-      val state = SubExprEliminationState(isLoaded, code, fnName)
+      subExprResetVariables += s"$fnName($INPUT_ROW);"
+      val state = SubExprEliminationState(isNull, value)
       e.foreach(subExprEliminationExprs.put(_, state))
     })
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/9693b0d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7b6c937..68005af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -287,8 +287,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val holderClass = classOf[BufferHolder].getName
     ctx.addMutableState(holderClass, bufferHolder, s"this.$bufferHolder = new $holderClass();")
 
-    // Reset the isLoaded flag for each row.
-    val subexprReset = ctx.subExprIsLoadedVariables.map { v => s"${v} = false;" }.mkString("\n")
+    // Reset the subexpression values for each row.
+    val subexprReset = ctx.subExprResetVariables.mkString("\n")
 
     val code =
       s"""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org