You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/05/09 08:49:49 UTC

[GitHub] [spark] mgaido91 commented on a change in pull request #24495: [SPARK-27604][SQL] Add filter reduction

mgaido91 commented on a change in pull request #24495: [SPARK-27604][SQL] Add filter reduction
URL: https://github.com/apache/spark/pull/24495#discussion_r282391728
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 ##########
 @@ -151,6 +151,238 @@ object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+/**
+ * Substitutes expressions which can be statically reduced by constraints.
+ * eg.
+ * {{{
+ *   SELECT * FROM table WHERE i <= 5 AND i = 5        => ... WHERE i = 5
+ *   SELECT * FROM table WHERE i < j AND ... AND i > j => ... WHERE false
+ * }}}
+ */
+object FilterReduction extends Rule[LogicalPlan] with ConstraintHelper {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case f: Filter =>
+      val newCondition = normalizeAndReduceWithConstraints(f.condition)
+      if (newCondition fastEquals f.condition) {
+        f
+      } else {
+        f.copy(condition = newCondition)
+      }
+  }
+
+  private def normalizeAndReduceWithConstraints(expression: Expression): Expression =
+    reduceWithConstraints(normalize(expression))._1
+
+  private def normalize(expression: Expression) = expression transform {
+    case GreaterThan(x, y) => LessThan(y, x)
+    case GreaterThanOrEqual(x, y) => LessThanOrEqual(y, x)
+  }
+
+  /**
+   * Traverse a condition as a tree and simplify expressions with constraints.
+   * - This functions assumes that the plan has been normalized using [[normalize()]]
+   * - On matching [[And]], recursively traverse both children, simplify child expressions with
+   *   propagated constraints from sibling and propagate up union of constraints.
+   * - If a child of [[And]] is [[LessThan]], [[LessThanOrEqual]], [[EqualTo]], [[EqualNullSafe]],
+   *   propagate the constraint.
+   * - On matching [[Or]] or [[Not]], recursively traverse each children, propagate no constraints.
+   * - Otherwise, stop traversal and propagate no constraints.
+   * @param expression expression to be traversed
+   * @return A tuple including:
+   *         1. Expression: optionally changed expression after traversal
+   *         2. Seq[Expression]: propagated constraints
+   */
+  private def reduceWithConstraints(expression: Expression): (Expression, Seq[Expression]) =
+    expression match {
+      case e @ (_: LessThan | _: LessThanOrEqual | _: EqualTo | _: EqualNullSafe)
+        if e.deterministic => (e, Seq(e))
+      case a @ And(left, right) =>
+        val (newLeft, leftConstraints) = reduceWithConstraints(left)
+        val reducedRight = reduceWithConstraints(right, leftConstraints)
+        val (reducedNewRight, rightConstraints) = reduceWithConstraints(reducedRight)
+        val reducedNewLeft = reduceWithConstraints(newLeft, rightConstraints)
+        val newAnd = if ((reducedNewLeft fastEquals left) &&
+          (reducedNewRight fastEquals right)) {
+          a
+        } else {
+          And(reducedNewLeft, reducedNewRight)
+        }
+        (newAnd, leftConstraints ++ rightConstraints)
+      case o @ Or(left, right) =>
+        // Ignore the EqualityPredicates from children since they are only propagated through And.
+        val (newLeft, _) = reduceWithConstraints(left)
+        val (newRight, _) = reduceWithConstraints(right)
+        val newOr = if ((newLeft fastEquals left) && (newRight fastEquals right)) {
+          o
+        } else {
+          Or(newLeft, newRight)
+        }
+
+        (newOr, Seq.empty)
+      case n @ Not(child) =>
+        // Ignore the EqualityPredicates from children since they are only propagated through And.
+        val (newChild, _) = reduceWithConstraints(child)
+        val newNot = if (newChild fastEquals child) {
+          n
+        } else {
+          Not(newChild)
+        }
+        (newNot, Seq.empty)
+      case _ => (expression, Seq.empty)
+    }
+
+  private def reduceWithConstraints(expression: Expression, constraints: Seq[Expression]) =
+    constraints.foldLeft(expression)((e, constraint) => reduceWithConstraint(e, constraint))
+
+  private def planEqual(x: Expression, y: Expression) =
+    !x.foldable && !y.foldable && x.canonicalized == y.canonicalized
+
+  private def valueEqual(x: Expression, y: Expression) =
+    x.foldable && y.foldable && EqualTo(x, y).eval(EmptyRow).asInstanceOf[Boolean]
+
+  private def valueLessThan(x: Expression, y: Expression) =
+    x.foldable && y.foldable && LessThan(x, y).eval(EmptyRow).asInstanceOf[Boolean]
+
+  private def valueLessThanOrEqual(x: Expression, y: Expression) =
+    x.foldable && y.foldable && LessThanOrEqual(x, y).eval(EmptyRow).asInstanceOf[Boolean]
+
+  private def reduceWithConstraint(expression: Expression, constraint: Expression): Expression =
+    constraint match {
+      case a LessThan b => expression transformUp {
+        case c LessThan d if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c, a)) =>
+          Literal.TrueLiteral
+        case c LessThan d if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d, a)) =>
+          Literal.FalseLiteral
+        case c LessThan d if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b, d)) =>
+          Literal.TrueLiteral
+        case c LessThan d if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b, c)) =>
+          Literal.FalseLiteral
+
+        case c LessThanOrEqual d
+          if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c, a)) =>
+          Literal.TrueLiteral
+        case c LessThanOrEqual d
+          if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d, a)) =>
+          Literal.FalseLiteral
+        case c LessThanOrEqual d
+          if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b, d)) =>
+          Literal.TrueLiteral
+        case c LessThanOrEqual d
+          if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b, c)) =>
+          Literal.FalseLiteral
+
+       case c EqualTo d if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c, a)) =>
+          Literal.FalseLiteral
+        case c EqualTo d if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d, a)) =>
+          Literal.FalseLiteral
+        case c EqualTo d if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b, d)) =>
+          Literal.FalseLiteral
+        case c EqualTo d if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b, c)) =>
+          Literal.FalseLiteral
+
+        case c EqualNullSafe d
+          if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c, a)) =>
+          Literal.FalseLiteral
+        case c EqualNullSafe d
+          if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d, a)) =>
+          Literal.FalseLiteral
+        case c EqualNullSafe d
+          if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b, d)) =>
+          Literal.FalseLiteral
+        case c EqualNullSafe d
+          if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b, c)) =>
+          Literal.FalseLiteral
+
+        case c EqualNullSafe d if planEqual(b, d) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(b, c) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(a, c) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(a, d) => EqualTo(c, d)
+      }
+      case a LessThanOrEqual b => expression transformUp {
+        case c LessThan d if planEqual(b, d) && valueLessThan(c, a) =>
+          Literal.TrueLiteral
+        case c LessThan d if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d, a)) =>
+          Literal.FalseLiteral
+        case c LessThan d if planEqual(a, c) && valueLessThan(b, d) =>
+          Literal.TrueLiteral
+        case c LessThan d if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b, c)) =>
+          Literal.FalseLiteral
+
+        case c LessThanOrEqual d
+          if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c, a)) =>
+          Literal.TrueLiteral
+        case c LessThanOrEqual d if planEqual(b, c) && valueLessThan(d, a) =>
+          Literal.FalseLiteral
+        case c LessThanOrEqual d if planEqual(b, c) && (planEqual(a, d) || valueEqual(a, d)) =>
+          EqualTo(c, d)
+        case c LessThanOrEqual d
+          if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b, d)) =>
+          Literal.TrueLiteral
+        case c LessThanOrEqual d if planEqual(a, d) && valueLessThan(b, c) =>
+          Literal.FalseLiteral
+        case c LessThanOrEqual d if planEqual(a, d) && (planEqual(b, c) || valueEqual(b, c)) =>
+          EqualTo(c, d)
+
+        case c EqualTo d if planEqual(b, d) && valueLessThan(c, a) => Literal.FalseLiteral
+        case c EqualTo d if planEqual(b, c) && valueLessThan(d, a) => Literal.FalseLiteral
+        case c EqualTo d if planEqual(a, c) && valueLessThan(b, d) => Literal.FalseLiteral
+        case c EqualTo d if planEqual(a, d) && valueLessThan(b, c) => Literal.FalseLiteral
+
+        case c EqualNullSafe d if planEqual(b, d) && valueLessThan(c, a) => Literal.FalseLiteral
+        case c EqualNullSafe d if planEqual(b, c) && valueLessThan(d, a) => Literal.FalseLiteral
+        case c EqualNullSafe d if planEqual(a, c) && valueLessThan(b, d) => Literal.FalseLiteral
+        case c EqualNullSafe d if planEqual(a, d) && valueLessThan(b, c) => Literal.FalseLiteral
+
+        case c EqualNullSafe d if planEqual(b, d) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(b, c) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(a, c) => EqualTo(c, d)
+        case c EqualNullSafe d if planEqual(a, d) => EqualTo(c, d)
+      }
+      case a EqualTo b => expression transformUp {
+        case c LessThan d if planEqual(b, d) && planEqual(a, c) => Literal.FalseLiteral
+        case c LessThan d if planEqual(b, c) && planEqual(a, d) => Literal.FalseLiteral
+        case c LessThan d if planEqual(a, d) && planEqual(b, c) => Literal.FalseLiteral
+        case c LessThan d if planEqual(a, c) && planEqual(b, d) => Literal.FalseLiteral
+
+        case c LessThanOrEqual d if planEqual(b, d) && planEqual(a, c) => Literal.TrueLiteral
 
 Review comment:
   these are wrong if the expression return `null`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org