You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/01 20:29:00 UTC
[arrow-datafusion] branch master updated: Convert boolean case expressions to boolean logic (#1719)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new b9a8f15 Convert boolean case expressions to boolean logic (#1719)
b9a8f15 is described below
commit b9a8f151893b2487ee4d5e9018af71713565e26d
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Feb 1 20:28:56 2022 +0000
Convert boolean case expressions to boolean logic (#1719)
* Convert boolean case expressions to boolean logic
* Review feedback
---
datafusion/src/optimizer/simplify_expressions.rs | 123 ++++++++++++++++++++++-
1 file changed, 120 insertions(+), 3 deletions(-)
diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs
index 6f5235e..00739cc 100644
--- a/datafusion/src/optimizer/simplify_expressions.rs
+++ b/datafusion/src/optimizer/simplify_expressions.rs
@@ -655,6 +655,54 @@ impl<'a> ExprRewriter for Simplifier<'a> {
_ => unreachable!(),
},
+ //
+ // Rules for Case
+ //
+
+ // CASE
+ // WHEN X THEN A
+ // WHEN Y THEN B
+ // ...
+ // ELSE Q
+ // END
+ //
+ // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
+ //
+ // Note: the rationale for this rewrite is that the expr can then be further
+ // simplified using the existing rules for AND/OR
+ Case {
+ expr: None,
+ when_then_expr,
+ else_expr,
+ } if !when_then_expr.is_empty()
+ && when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
+ && self.is_boolean_type(&when_then_expr[0].1) =>
+ {
+ // The disjunction of all the when predicates encountered so far
+ let mut filter_expr = lit(false);
+ // The disjunction of all the cases
+ let mut out_expr = lit(false);
+
+ for (when, then) in when_then_expr {
+ let case_expr = when
+ .as_ref()
+ .clone()
+ .and(filter_expr.clone().not())
+ .and(*then);
+
+ out_expr = out_expr.or(case_expr);
+ filter_expr = filter_expr.or(*when);
+ }
+
+ if let Some(else_expr) = else_expr {
+ let case_expr = filter_expr.not().and(*else_expr);
+ out_expr = out_expr.or(case_expr);
+ }
+
+ // Do a first pass at simplification
+ out_expr.rewrite(self)?
+ }
+
expr => {
// no additional rewrites possible
expr
@@ -1169,6 +1217,8 @@ mod tests {
.expect("expected to simplify")
.rewrite(&mut const_evaluator)
.expect("expected to const evaluate")
+ .rewrite(&mut rewriter)
+ .expect("expected to simplify")
}
fn expr_test_schema() -> DFSchemaRef {
@@ -1285,6 +1335,11 @@ mod tests {
#[test]
fn simplify_expr_case_when_then_else() {
+ // CASE WHERE c2 != false THEN "ok" == "not_ok" ELSE c2 == true
+ // -->
+ // CASE WHERE c2 THEN false ELSE c2
+ // -->
+ // false
assert_eq!(
simplify(Expr::Case {
expr: None,
@@ -1294,11 +1349,73 @@ mod tests {
)],
else_expr: Some(Box::new(col("c2").eq(lit(true)))),
}),
- Expr::Case {
+ col("c2").not().and(col("c2")) // #1716
+ );
+
+ // CASE WHERE c2 != false THEN "ok" == "ok" ELSE c2
+ // -->
+ // CASE WHERE c2 THEN true ELSE c2
+ // -->
+ // c2
+ assert_eq!(
+ simplify(Expr::Case {
expr: None,
- when_then_expr: vec![(Box::new(col("c2")), Box::new(lit(false)))],
+ when_then_expr: vec![(
+ Box::new(col("c2").not_eq(lit(false))),
+ Box::new(lit("ok").eq(lit("ok"))),
+ )],
+ else_expr: Some(Box::new(col("c2").eq(lit(true)))),
+ }),
+ col("c2").or(col("c2").not().and(col("c2"))) // #1716
+ );
+
+ // CASE WHERE ISNULL(c2) THEN true ELSE c2
+ // -->
+ // ISNULL(c2) OR c2
+ assert_eq!(
+ simplify(Expr::Case {
+ expr: None,
+ when_then_expr: vec![(
+ Box::new(col("c2").is_null()),
+ Box::new(lit(true)),
+ )],
else_expr: Some(Box::new(col("c2"))),
- }
+ }),
+ col("c2")
+ .is_null()
+ .or(col("c2").is_null().not().and(col("c2")))
+ );
+
+ // CASE WHERE c1 then true WHERE c2 then false ELSE true
+ // --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
+ // --> c1 OR (NOT(c1 OR c2))
+ // --> NOT(c1) AND c2
+ assert_eq!(
+ simplify(Expr::Case {
+ expr: None,
+ when_then_expr: vec![
+ (Box::new(col("c1")), Box::new(lit(true)),),
+ (Box::new(col("c2")), Box::new(lit(false)),)
+ ],
+ else_expr: Some(Box::new(lit(true))),
+ }),
+ col("c1").or(col("c1").or(col("c2")).not())
+ );
+
+ // CASE WHERE c1 then true WHERE c2 then true ELSE false
+ // --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
+ // --> c1 OR (NOT(c1) AND c2)
+ // --> c1 OR c2
+ assert_eq!(
+ simplify(Expr::Case {
+ expr: None,
+ when_then_expr: vec![
+ (Box::new(col("c1")), Box::new(lit(true)),),
+ (Box::new(col("c2")), Box::new(lit(false)),)
+ ],
+ else_expr: Some(Box::new(lit(true))),
+ }),
+ col("c1").or(col("c1").or(col("c2")).not())
);
}