You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2023/03/22 14:19:03 UTC
[spark] branch master updated: [SPARK-42815][SQL] Subexpression elimination support shortcut expression
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6f7403b9ef1 [SPARK-42815][SQL] Subexpression elimination support shortcut expression
6f7403b9ef1 is described below
commit 6f7403b9ef17352d88ac552e307c68031767aae9
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Wed Mar 22 22:18:35 2023 +0800
[SPARK-42815][SQL] Subexpression elimination support shortcut expression
### What changes were proposed in this pull request?
Add a new config to shortcut subexpression elimination for expression `and`, `or`.
The subexpression may not need to eval even if it appears more than once.
e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true.
### Why are the changes needed?
avoid eval unnecessary expression.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
add test
Closes #40446 from ulysses-you/shortcut.
Authored-by: ulysses-you <ul...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../expressions/EquivalentExpressions.scala | 23 +++++++++++++---
.../org/apache/spark/sql/internal/SQLConf.scala | 13 +++++++++
.../SubexpressionEliminationSuite.scala | 31 +++++++++++++++++++++-
3 files changed, 63 insertions(+), 4 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index f47391c0492..1a84859cc3a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.util.Utils
/**
@@ -30,7 +31,9 @@ import org.apache.spark.util.Utils
* to this class and they subsequently query for expression equality. Expression trees are
* considered equal if for the same input(s), the same result is produced.
*/
-class EquivalentExpressions {
+class EquivalentExpressions(
+ skipForShortcutEnable: Boolean = SQLConf.get.subexpressionEliminationSkipForShotcutExpr) {
+
// For each expression, the set of equivalent expressions.
private val equivalenceMap = mutable.HashMap.empty[ExpressionEquals, ExpressionStats]
@@ -129,13 +132,27 @@ class EquivalentExpressions {
}
}
+ private def skipForShortcut(expr: Expression): Expression = {
+ if (skipForShortcutEnable) {
+ // The subexpression may not need to eval even if it appears more than once.
+ // e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true.
+ expr match {
+ case and: And => and.left
+ case or: Or => or.left
+ case other => other
+ }
+ } else {
+ expr
+ }
+ }
+
// There are some special expressions that we should not recurse into all of its children.
// 1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
// 2. ConditionalExpression: use its children that will always be evaluated.
private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match {
case _: CodegenFallback => Nil
- case c: ConditionalExpression => c.alwaysEvaluatedInputs
- case other => other.children
+ case c: ConditionalExpression => c.alwaysEvaluatedInputs.map(skipForShortcut)
+ case other => skipForShortcut(other).children
}
// For some special expressions we cannot just recurse into all of its children, but we can
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d369eaf4507..ae69e4cf698 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -864,6 +864,16 @@ object SQLConf {
.checkValue(_ >= 0, "The maximum must not be negative")
.createWithDefault(100)
+ val SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR =
+ buildConf("spark.sql.subexpressionElimination.skipForShortcutExpr")
+ .internal()
+ .doc("When true, shortcut eliminate subexpression with `AND`, `OR`. " +
+ "The subexpression may not need to eval even if it appears more than once. " +
+ "e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true.")
+ .version("3.5.0")
+ .booleanConf
+ .createWithDefault(false)
+
val CASE_SENSITIVE = buildConf("spark.sql.caseSensitive")
.internal()
.doc("Whether the query analyzer should be case sensitive or not. " +
@@ -4610,6 +4620,9 @@ class SQLConf extends Serializable with Logging {
def subexpressionEliminationCacheMaxEntries: Int =
getConf(SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES)
+ def subexpressionEliminationSkipForShotcutExpr: Boolean =
+ getConf(SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR)
+
def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
def limitInitialNumPartitions: Int = getConf(LIMIT_INITIAL_NUM_PARTITIONS)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index 44d8ea3a112..f369635a326 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.catalyst.expressions
+import java.util.Properties
+
import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -424,7 +426,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
test("SPARK-38333: PlanExpression expression should skip addExprTree function in Executor") {
try {
// suppose we are in executor
- val context1 = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null, cpus = 0)
+ val context1 = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, new Properties, null, cpus = 0)
TaskContext.setTaskContext(context1)
val equivalence = new EquivalentExpressions
@@ -465,6 +467,33 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
val cseState = equivalence.getExprState(expr)
assert(hasMatching == cseState.isDefined)
}
+
+ test("SPARK-42815: Subexpression elimination support shortcut conditional expression") {
+ val add = Add(Literal(1), Literal(0))
+ val equal = EqualTo(add, add)
+
+ def checkShortcut(expr: Expression, numCommonExpr: Int): Unit = {
+ val e1 = If(expr, Literal(1), Literal(2))
+ val ee1 = new EquivalentExpressions(true)
+ ee1.addExprTree(e1)
+ assert(ee1.getCommonSubexpressions.size == numCommonExpr)
+
+ val e2 = expr
+ val ee2 = new EquivalentExpressions(true)
+ ee2.addExprTree(e2)
+ assert(ee2.getCommonSubexpressions.size == numCommonExpr)
+ }
+
+ // shortcut right child
+ checkShortcut(And(Literal(false), equal), 0)
+ checkShortcut(Or(Literal(true), equal), 0)
+ checkShortcut(Not(And(Literal(true), equal)), 0)
+
+ // always eliminate subexpression for left child
+ checkShortcut((And(equal, Literal(false))), 1)
+ checkShortcut(Or(equal, Literal(true)), 1)
+ checkShortcut(Not(And(equal, Literal(false))), 1)
+ }
}
case class CodegenFallbackExpression(child: Expression)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org