You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/31 13:07:16 UTC

[GitHub] [arrow-datafusion] alamb commented on a change in pull request #1401: Combine `simplify` and `Simplifier`, check nullability during rewrites

alamb commented on a change in pull request #1401:
URL: https://github.com/apache/arrow-datafusion/pull/1401#discussion_r777001074



##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -900,28 +809,99 @@ mod tests {
         );
         let expected = expr.clone();
 
-        assert_eq!(simplify(&expr), expected);
+        assert_eq!(simplify(expr), expected);
     }
 
     #[test]
     fn test_simplify_or_and() {
-        // (c > 5) OR ((d < 6) AND (c > 5) -- can remove
-        let expr = binary_expr(
-            col("c2").gt(lit(5)),
+        let l = col("c2").gt(lit(5));

Review comment:
       additional coverage showing that this rewrite is not possible unless you know `c1 < 6` can not be null

##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -554,212 +416,250 @@ impl<'a> Simplifier<'a> {
         false
     }
 
-    fn boolean_folding_for_or(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool OR bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE or expr (including NULL) = TRUE
-            Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))),
-            // FALSE or expr (including NULL) = expr
-            Some(false) => *bool_expr,
-            None => match *bool_expr {
-                // NULL or TRUE = TRUE
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(true)))
-                }
-                // NULL or FALSE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or expr can be either NULL or TRUE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    }
-                }
-            },
+    /// Returns true if expr is nullable
+    fn nullable(&self, expr: &Expr) -> Result<bool> {
+        for schema in &self.schemas {
+            if let Ok(res) = expr.nullable(schema.as_ref()) {
+                return Ok(res);
+            }
+            // expr may be from another input, so keep trying
         }
-    }
 
-    fn boolean_folding_for_and(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool AND bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE and expr (including NULL) = expr
-            Some(true) => *bool_expr,
-            // FALSE and expr (including NULL) = FALSE
-            Some(false) => Expr::Literal(ScalarValue::Boolean(Some(false))),
-            None => match *bool_expr {
-                // NULL and TRUE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and FALSE = FALSE
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(false)))
-                }
-                // NULL and NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and expr can either be NULL or FALSE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    }
-                }
-            },
-        }
+        // This means we weren't able to compule `Expr::nullable` with
+        // any input schemas, signalling a problem
+        Err(DataFusionError::Internal(format!(
+            "Could not find find columns in '{}' during simplify",
+            expr
+        )))
     }
 }
 
 impl<'a> ExprRewriter for Simplifier<'a> {
     /// rewrite the expression simplifying any constant expressions
     fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        use Expr::*;
+        use Operator::{And, Divide, Eq, Multiply, NotEq, Or};
+
         let new_expr = match expr {
-            Expr::BinaryExpr { left, op, right } => match op {
-                Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l == r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => *right,
-                            Some(false) => Expr::Not(right),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => *left,
-                            Some(false) => Expr::Not(left),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Eq,
-                        right,
-                    },
-                },
-                Operator::NotEq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l != r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(right),
-                            Some(false) => *right,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(left),
-                            Some(false) => *left,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::NotEq,
-                        right,
-                    },
-                },
-                Operator::Or => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_or(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_or(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    },
-                },
-                Operator::And => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_and(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_and(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    },
-                },
-                _ => Expr::BinaryExpr { left, op, right },
-            },
-            // Not(Not(expr)) --> expr
-            Expr::Not(inner) => {
-                if let Expr::Not(negated_inner) = *inner {
-                    *negated_inner
-                } else {
-                    Expr::Not(inner)
+            //
+            // Rules for Eq
+            //
+
+            // true = A  --> A
+            // false = A --> !A
+            // null = A --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => *right,
+                    Some(false) => Not(right),
+                    None => lit_null(),
+                }
+            }
+            // A = true  --> A
+            // A = false --> !A
+            // A = null --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => *left,
+                    Some(false) => Not(left),
+                    None => lit_null(),
                 }
             }
+
+            //
+            // Rules for NotEq
+            //
+
+            // true != A  --> !A
+            // false != A --> A
+            // null != A --> null
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => Not(right),
+                    Some(false) => *right,
+                    None => lit_null(),
+                }
+            }
+            // A != true  --> !A
+            // A != false --> A
+            // A != null --> null,
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => Not(left),
+                    Some(false) => *left,
+                    None => lit_null(),
+                }
+            }
+
+            //
+            // Rules for OR
+            //
+
+            // true OR A --> true (even if A is null)
+            BinaryExpr {
+                left,
+                op: Or,
+                right: _,
+            } if is_true(&left) => *left,
+            // false OR A --> A
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if is_false(&left) => *right,
+            // A OR true --> true (even if A is null)
+            BinaryExpr {
+                left: _,
+                op: Or,
+                right,
+            } if is_true(&right) => *right,
+            // A OR false --> A
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if is_false(&right) => *left,
+            // (..A..) OR A --> (..A..)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if expr_contains(&left, &right, Or) => *left,
+            // A OR (..A..) --> (..A..)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if expr_contains(&right, &left, Or) => *right,
+            // A OR (A AND B) --> A (if B not null)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if !self.nullable(&right)? && is_op_with(And, &right, &left) => *left,

Review comment:
       on master the `nullable()` check is not performed, leading to incorrect results in some cases

##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -109,160 +110,27 @@ fn is_false(expr: &Expr) -> bool {
     }
 }
 
-fn simplify(expr: &Expr) -> Expr {
-    match expr {

Review comment:
       the non redundant rules are now found in `rewrite` below

##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -847,24 +747,33 @@ mod tests {
         let expr_b = binary_expr(lit(1), Operator::Multiply, col("c2"));
         let expected = col("c2");
 
-        assert_eq!(simplify(&expr_a), expected);
-        assert_eq!(simplify(&expr_b), expected);
+        assert_eq!(simplify(expr_a), expected);
+        assert_eq!(simplify(expr_b), expected);
     }
 
     #[test]
     fn test_simplify_divide_by_one() {
         let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
         let expected = col("c2");
 
-        assert_eq!(simplify(&expr), expected);
+        assert_eq!(simplify(expr), expected);
     }
 
     #[test]
     fn test_simplify_divide_by_same() {
         let expr = binary_expr(col("c2"), Operator::Divide, col("c2"));
+        // if c2 is null, c2 / c2 = null, so can't simplify

Review comment:
       new test coverage showing one can't simplify `A / A` unless you know `A` isn't null

##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -554,212 +416,250 @@ impl<'a> Simplifier<'a> {
         false
     }
 
-    fn boolean_folding_for_or(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool OR bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE or expr (including NULL) = TRUE
-            Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))),
-            // FALSE or expr (including NULL) = expr
-            Some(false) => *bool_expr,
-            None => match *bool_expr {
-                // NULL or TRUE = TRUE
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(true)))
-                }
-                // NULL or FALSE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or expr can be either NULL or TRUE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    }
-                }
-            },
+    /// Returns true if expr is nullable
+    fn nullable(&self, expr: &Expr) -> Result<bool> {
+        for schema in &self.schemas {
+            if let Ok(res) = expr.nullable(schema.as_ref()) {
+                return Ok(res);
+            }
+            // expr may be from another input, so keep trying
         }
-    }
 
-    fn boolean_folding_for_and(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool AND bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE and expr (including NULL) = expr
-            Some(true) => *bool_expr,
-            // FALSE and expr (including NULL) = FALSE
-            Some(false) => Expr::Literal(ScalarValue::Boolean(Some(false))),
-            None => match *bool_expr {
-                // NULL and TRUE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and FALSE = FALSE
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(false)))
-                }
-                // NULL and NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and expr can either be NULL or FALSE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    }
-                }
-            },
-        }
+        // This means we weren't able to compule `Expr::nullable` with
+        // any input schemas, signalling a problem
+        Err(DataFusionError::Internal(format!(
+            "Could not find find columns in '{}' during simplify",
+            expr
+        )))
     }
 }
 
 impl<'a> ExprRewriter for Simplifier<'a> {
     /// rewrite the expression simplifying any constant expressions
     fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        use Expr::*;
+        use Operator::{And, Divide, Eq, Multiply, NotEq, Or};
+
         let new_expr = match expr {
-            Expr::BinaryExpr { left, op, right } => match op {
-                Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l == r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => *right,
-                            Some(false) => Expr::Not(right),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => *left,
-                            Some(false) => Expr::Not(left),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Eq,
-                        right,
-                    },
-                },
-                Operator::NotEq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l != r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(right),
-                            Some(false) => *right,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(left),
-                            Some(false) => *left,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::NotEq,
-                        right,
-                    },
-                },
-                Operator::Or => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_or(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_or(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    },
-                },
-                Operator::And => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_and(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_and(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    },
-                },
-                _ => Expr::BinaryExpr { left, op, right },
-            },
-            // Not(Not(expr)) --> expr
-            Expr::Not(inner) => {
-                if let Expr::Not(negated_inner) = *inner {
-                    *negated_inner
-                } else {
-                    Expr::Not(inner)
+            //
+            // Rules for Eq
+            //
+
+            // true = A  --> A
+            // false = A --> !A
+            // null = A --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => *right,
+                    Some(false) => Not(right),
+                    None => lit_null(),
+                }
+            }
+            // A = true  --> A
+            // A = false --> !A
+            // A = null --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => *left,
+                    Some(false) => Not(left),
+                    None => lit_null(),
                 }
             }
+
+            //
+            // Rules for NotEq
+            //
+
+            // true != A  --> !A
+            // false != A --> A
+            // null != A --> null
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => Not(right),
+                    Some(false) => *right,
+                    None => lit_null(),
+                }
+            }
+            // A != true  --> !A
+            // A != false --> A
+            // A != null --> null,
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => Not(left),
+                    Some(false) => *left,
+                    None => lit_null(),
+                }
+            }
+
+            //
+            // Rules for OR
+            //
+
+            // true OR A --> true (even if A is null)

Review comment:
       These first 4 cases are now what was in `boolean_folding_for_or` 

##########
File path: datafusion/src/optimizer/simplify_expressions.rs
##########
@@ -554,212 +416,250 @@ impl<'a> Simplifier<'a> {
         false
     }
 
-    fn boolean_folding_for_or(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool OR bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE or expr (including NULL) = TRUE
-            Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))),
-            // FALSE or expr (including NULL) = expr
-            Some(false) => *bool_expr,
-            None => match *bool_expr {
-                // NULL or TRUE = TRUE
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(true)))
-                }
-                // NULL or FALSE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL or expr can be either NULL or TRUE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    }
-                }
-            },
+    /// Returns true if expr is nullable
+    fn nullable(&self, expr: &Expr) -> Result<bool> {
+        for schema in &self.schemas {
+            if let Ok(res) = expr.nullable(schema.as_ref()) {
+                return Ok(res);
+            }
+            // expr may be from another input, so keep trying
         }
-    }
 
-    fn boolean_folding_for_and(
-        const_bool: &Option<bool>,
-        bool_expr: Box<Expr>,
-        left_right_order: bool,
-    ) -> Expr {
-        // See if we can fold 'const_bool AND bool_expr' to a constant boolean
-        match const_bool {
-            // TRUE and expr (including NULL) = expr
-            Some(true) => *bool_expr,
-            // FALSE and expr (including NULL) = FALSE
-            Some(false) => Expr::Literal(ScalarValue::Boolean(Some(false))),
-            None => match *bool_expr {
-                // NULL and TRUE = NULL
-                Expr::Literal(ScalarValue::Boolean(Some(true))) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and FALSE = FALSE
-                Expr::Literal(ScalarValue::Boolean(Some(false))) => {
-                    Expr::Literal(ScalarValue::Boolean(Some(false)))
-                }
-                // NULL and NULL = NULL
-                Expr::Literal(ScalarValue::Boolean(None)) => {
-                    Expr::Literal(ScalarValue::Boolean(None))
-                }
-                // NULL and expr can either be NULL or FALSE
-                // So let us not rewrite it
-                _ => {
-                    let mut left =
-                        Box::new(Expr::Literal(ScalarValue::Boolean(*const_bool)));
-                    let mut right = bool_expr;
-                    if !left_right_order {
-                        std::mem::swap(&mut left, &mut right);
-                    }
-
-                    Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    }
-                }
-            },
-        }
+        // This means we weren't able to compule `Expr::nullable` with
+        // any input schemas, signalling a problem
+        Err(DataFusionError::Internal(format!(
+            "Could not find find columns in '{}' during simplify",
+            expr
+        )))
     }
 }
 
 impl<'a> ExprRewriter for Simplifier<'a> {
     /// rewrite the expression simplifying any constant expressions
     fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        use Expr::*;
+        use Operator::{And, Divide, Eq, Multiply, NotEq, Or};
+
         let new_expr = match expr {
-            Expr::BinaryExpr { left, op, right } => match op {
-                Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l == r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => *right,
-                            Some(false) => Expr::Not(right),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => *left,
-                            Some(false) => Expr::Not(left),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Eq,
-                        right,
-                    },
-                },
-                Operator::NotEq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l != r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(right),
-                            Some(false) => *right,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(left),
-                            Some(false) => *left,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::NotEq,
-                        right,
-                    },
-                },
-                Operator::Or => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_or(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_or(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Or,
-                        right,
-                    },
-                },
-                Operator::And => match (left.as_ref(), right.as_ref()) {
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        Self::boolean_folding_for_and(b, right, true)
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        Self::boolean_folding_for_and(b, left, false)
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::And,
-                        right,
-                    },
-                },
-                _ => Expr::BinaryExpr { left, op, right },
-            },
-            // Not(Not(expr)) --> expr
-            Expr::Not(inner) => {
-                if let Expr::Not(negated_inner) = *inner {
-                    *negated_inner
-                } else {
-                    Expr::Not(inner)
+            //
+            // Rules for Eq
+            //
+
+            // true = A  --> A
+            // false = A --> !A
+            // null = A --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => *right,
+                    Some(false) => Not(right),
+                    None => lit_null(),
+                }
+            }
+            // A = true  --> A
+            // A = false --> !A
+            // A = null --> null
+            BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => *left,
+                    Some(false) => Not(left),
+                    None => lit_null(),
                 }
             }
+
+            //
+            // Rules for NotEq
+            //
+
+            // true != A  --> !A
+            // false != A --> A
+            // null != A --> null
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&left) && self.is_boolean_type(&right) => {
+                match as_bool_lit(*left) {
+                    Some(true) => Not(right),
+                    Some(false) => *right,
+                    None => lit_null(),
+                }
+            }
+            // A != true  --> !A
+            // A != false --> A
+            // A != null --> null,
+            BinaryExpr {
+                left,
+                op: NotEq,
+                right,
+            } if is_bool_lit(&right) && self.is_boolean_type(&left) => {
+                match as_bool_lit(*right) {
+                    Some(true) => Not(left),
+                    Some(false) => *left,
+                    None => lit_null(),
+                }
+            }
+
+            //
+            // Rules for OR
+            //
+
+            // true OR A --> true (even if A is null)
+            BinaryExpr {
+                left,
+                op: Or,
+                right: _,
+            } if is_true(&left) => *left,
+            // false OR A --> A
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if is_false(&left) => *right,
+            // A OR true --> true (even if A is null)
+            BinaryExpr {
+                left: _,
+                op: Or,
+                right,
+            } if is_true(&right) => *right,
+            // A OR false --> A
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if is_false(&right) => *left,
+            // (..A..) OR A --> (..A..)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if expr_contains(&left, &right, Or) => *left,
+            // A OR (..A..) --> (..A..)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if expr_contains(&right, &left, Or) => *right,
+            // A OR (A AND B) --> A (if B not null)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if !self.nullable(&right)? && is_op_with(And, &right, &left) => *left,
+            // (A AND B) OR A --> A (if B not null)
+            BinaryExpr {
+                left,
+                op: Or,
+                right,
+            } if !self.nullable(&left)? && is_op_with(And, &left, &right) => *right,
+
+            //
+            // Rules for AND
+            //
+
+            // true AND A --> A
+            BinaryExpr {

Review comment:
       These first 4 cases are now what was in `boolean_folding_for_and` 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org