You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/09/06 14:44:54 UTC

[GitHub] [spark] tanelk commented on a change in pull request #27518: [SPARK-30768][SQL] Constraints inferred from inequality attributes

tanelk commented on a change in pull request #27518:
URL: https://github.com/apache/spark/pull/27518#discussion_r484077005



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
##########
@@ -78,6 +91,72 @@ trait ConstraintHelper {
     inferredConstraints -- constraints
   }
 
+  /**
+   * Infers an additional set of constraints from a given set of inequality constraints.
+   * For e.g., if an operator has constraints of the form (`a > b`, `b > 5`), this returns an
+   * additional constraint of the form `a > 5`.
+   */
+  def inferInequalityConstraints(constraints: Set[Expression]): Set[Expression] = {
+    val binaryComparisons = constraints.filter {
+      case _: GreaterThan => true
+      case _: GreaterThanOrEqual => true
+      case _: LessThan => true
+      case _: LessThanOrEqual => true
+      case _: EqualTo => true

Review comment:
       `EqualTo` should not be needed here, as the `inferEqualityConstraints` should cover all cases including it.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
##########
@@ -78,6 +91,72 @@ trait ConstraintHelper {
     inferredConstraints -- constraints
   }
 
+  /**
+   * Infers an additional set of constraints from a given set of inequality constraints.
+   * For e.g., if an operator has constraints of the form (`a > b`, `b > 5`), this returns an
+   * additional constraint of the form `a > 5`.
+   */
+  def inferInequalityConstraints(constraints: Set[Expression]): Set[Expression] = {
+    val binaryComparisons = constraints.filter {
+      case _: GreaterThan => true
+      case _: GreaterThanOrEqual => true
+      case _: LessThan => true
+      case _: LessThanOrEqual => true
+      case _: EqualTo => true
+      case _ => false
+    }
+
+    val greaterThans = binaryComparisons.map {
+      case EqualTo(l, r) if l.foldable => EqualTo(r, l)
+      case LessThan(l, r) => GreaterThan(r, l)
+      case LessThanOrEqual(l, r) => GreaterThanOrEqual(r, l)
+      case other => other
+    }
+
+    val lessThans = binaryComparisons.map {
+      case EqualTo(l, r) if l.foldable => EqualTo(r, l)
+      case GreaterThan(l, r) => LessThan(r, l)
+      case GreaterThanOrEqual(l, r) => LessThanOrEqual(r, l)
+      case other => other
+    }
+
+    var inferredConstraints = Set.empty[Expression]
+    greaterThans.foreach {
+      case op @ BinaryComparison(source: Attribute, destination: Expression)
+        if destination.foldable =>

Review comment:
       I think that the foldability is not needed here. The new constraints do not have to only involve constants, but also any attribute. 

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
##########
@@ -78,6 +91,72 @@ trait ConstraintHelper {
     inferredConstraints -- constraints
   }
 
+  /**
+   * Infers an additional set of constraints from a given set of inequality constraints.
+   * For e.g., if an operator has constraints of the form (`a > b`, `b > 5`), this returns an
+   * additional constraint of the form `a > 5`.
+   */
+  def inferInequalityConstraints(constraints: Set[Expression]): Set[Expression] = {
+    val binaryComparisons = constraints.filter {
+      case _: GreaterThan => true
+      case _: GreaterThanOrEqual => true
+      case _: LessThan => true
+      case _: LessThanOrEqual => true
+      case _: EqualTo => true
+      case _ => false
+    }
+
+    val greaterThans = binaryComparisons.map {
+      case EqualTo(l, r) if l.foldable => EqualTo(r, l)
+      case LessThan(l, r) => GreaterThan(r, l)
+      case LessThanOrEqual(l, r) => GreaterThanOrEqual(r, l)
+      case other => other
+    }
+
+    val lessThans = binaryComparisons.map {
+      case EqualTo(l, r) if l.foldable => EqualTo(r, l)
+      case GreaterThan(l, r) => LessThan(r, l)
+      case GreaterThanOrEqual(l, r) => LessThanOrEqual(r, l)
+      case other => other
+    }

Review comment:
       Doesn't this duplicate the `greaterThans` block? 
   Here you have `a < b < c` and in the other block you have `c > b > a` 

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
##########
@@ -55,12 +55,25 @@ trait QueryPlanConstraints extends ConstraintHelper { self: LogicalPlan =>
 
 trait ConstraintHelper {
 
+  /**
+   * Infers an additional set of constraints from a given set of constraints.
+   */
+  def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = {
+    var inferred = inferEqualityConstraints(constraints)
+    var lastInequalityInferred = Set.empty[Expression]
+    do {
+      lastInequalityInferred = inferInequalityConstraints(constraints ++ inferred)
+      inferred ++= lastInequalityInferred
+    } while (lastInequalityInferred.nonEmpty)

Review comment:
       Could you hit a infinite loop with non deterministic filters? As they are never semantically equal to any other expression (including themselves). I hit that problem in #29650, where I was also working on constraint inference , but from `EqualNullSafe`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org