You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/01 20:29:00 UTC

[arrow-datafusion] branch master updated: Convert boolean case expressions to boolean logic (#1719)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new b9a8f15  Convert boolean case expressions to boolean logic (#1719)
b9a8f15 is described below

commit b9a8f151893b2487ee4d5e9018af71713565e26d
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Feb 1 20:28:56 2022 +0000

    Convert boolean case expressions to boolean logic (#1719)
    
    * Convert boolean case expressions to boolean logic
    
    * Review feedback
---
 datafusion/src/optimizer/simplify_expressions.rs | 123 ++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 3 deletions(-)

diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs
index 6f5235e..00739cc 100644
--- a/datafusion/src/optimizer/simplify_expressions.rs
+++ b/datafusion/src/optimizer/simplify_expressions.rs
@@ -655,6 +655,54 @@ impl<'a> ExprRewriter for Simplifier<'a> {
                 _ => unreachable!(),
             },
 
+            //
+            // Rules for Case
+            //
+
+            // CASE
+            //   WHEN X THEN A
+            //   WHEN Y THEN B
+            //   ...
+            //   ELSE Q
+            // END
+            //
+            // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
+            //
+            // Note: the rationale for this rewrite is that the expr can then be further
+            // simplified using the existing rules for AND/OR
+            Case {
+                expr: None,
+                when_then_expr,
+                else_expr,
+            } if !when_then_expr.is_empty()
+                && when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
+                && self.is_boolean_type(&when_then_expr[0].1) =>
+            {
+                // The disjunction of all the when predicates encountered so far
+                let mut filter_expr = lit(false);
+                // The disjunction of all the cases
+                let mut out_expr = lit(false);
+
+                for (when, then) in when_then_expr {
+                    let case_expr = when
+                        .as_ref()
+                        .clone()
+                        .and(filter_expr.clone().not())
+                        .and(*then);
+
+                    out_expr = out_expr.or(case_expr);
+                    filter_expr = filter_expr.or(*when);
+                }
+
+                if let Some(else_expr) = else_expr {
+                    let case_expr = filter_expr.not().and(*else_expr);
+                    out_expr = out_expr.or(case_expr);
+                }
+
+                // Do a first pass at simplification
+                out_expr.rewrite(self)?
+            }
+
             expr => {
                 // no additional rewrites possible
                 expr
@@ -1169,6 +1217,8 @@ mod tests {
             .expect("expected to simplify")
             .rewrite(&mut const_evaluator)
             .expect("expected to const evaluate")
+            .rewrite(&mut rewriter)
+            .expect("expected to simplify")
     }
 
     fn expr_test_schema() -> DFSchemaRef {
@@ -1285,6 +1335,11 @@ mod tests {
 
     #[test]
     fn simplify_expr_case_when_then_else() {
+        // CASE WHERE c2 != false THEN "ok" == "not_ok" ELSE c2 == true
+        // -->
+        // CASE WHERE c2 THEN false ELSE c2
+        // -->
+        // false
         assert_eq!(
             simplify(Expr::Case {
                 expr: None,
@@ -1294,11 +1349,73 @@ mod tests {
                 )],
                 else_expr: Some(Box::new(col("c2").eq(lit(true)))),
             }),
-            Expr::Case {
+            col("c2").not().and(col("c2")) // #1716
+        );
+
+        // CASE WHERE c2 != false THEN "ok" == "ok" ELSE c2
+        // -->
+        // CASE WHERE c2 THEN true ELSE c2
+        // -->
+        // c2
+        assert_eq!(
+            simplify(Expr::Case {
                 expr: None,
-                when_then_expr: vec![(Box::new(col("c2")), Box::new(lit(false)))],
+                when_then_expr: vec![(
+                    Box::new(col("c2").not_eq(lit(false))),
+                    Box::new(lit("ok").eq(lit("ok"))),
+                )],
+                else_expr: Some(Box::new(col("c2").eq(lit(true)))),
+            }),
+            col("c2").or(col("c2").not().and(col("c2"))) // #1716
+        );
+
+        // CASE WHERE ISNULL(c2) THEN true ELSE c2
+        // -->
+        // ISNULL(c2) OR c2
+        assert_eq!(
+            simplify(Expr::Case {
+                expr: None,
+                when_then_expr: vec![(
+                    Box::new(col("c2").is_null()),
+                    Box::new(lit(true)),
+                )],
                 else_expr: Some(Box::new(col("c2"))),
-            }
+            }),
+            col("c2")
+                .is_null()
+                .or(col("c2").is_null().not().and(col("c2")))
+        );
+
+        // CASE WHERE c1 then true WHERE c2 then false ELSE true
+        // --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
+        // --> c1 OR (NOT(c1 OR c2))
+        // --> NOT(c1) AND c2
+        assert_eq!(
+            simplify(Expr::Case {
+                expr: None,
+                when_then_expr: vec![
+                    (Box::new(col("c1")), Box::new(lit(true)),),
+                    (Box::new(col("c2")), Box::new(lit(false)),)
+                ],
+                else_expr: Some(Box::new(lit(true))),
+            }),
+            col("c1").or(col("c1").or(col("c2")).not())
+        );
+
+        // CASE WHERE c1 then true WHERE c2 then true ELSE false
+        // --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
+        // --> c1 OR (NOT(c1) AND c2)
+        // --> c1 OR c2
+        assert_eq!(
+            simplify(Expr::Case {
+                expr: None,
+                when_then_expr: vec![
+                    (Box::new(col("c1")), Box::new(lit(true)),),
+                    (Box::new(col("c2")), Box::new(lit(false)),)
+                ],
+                else_expr: Some(Box::new(lit(true))),
+            }),
+            col("c1").or(col("c1").or(col("c2")).not())
         );
     }