You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/04/12 06:16:52 UTC

[arrow-datafusion] branch main updated: minor: Add `Expr::between` to clean up boilerplate (#5967)

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new f0d544f6e6 minor: Add `Expr::between` to clean up boilerplate (#5967)
f0d544f6e6 is described below

commit f0d544f6e659218cc86ff9ef70a91705d996c705
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Wed Apr 12 02:16:45 2023 -0400

    minor: Add `Expr::between` to clean up boilerplate (#5967)
    
    * Minor: Add Expr::between and clean up boilerplate
    
    * clean up some more
    
    * clippy
---
 datafusion/expr/src/expr.rs                        | 20 ++++++++++
 datafusion/optimizer/src/analyzer/type_coercion.rs | 43 +++++++---------------
 .../src/simplify_expressions/expr_simplifier.rs    | 28 ++------------
 .../src/simplify_expressions/simplify_exprs.rs     | 16 ++------
 4 files changed, 40 insertions(+), 67 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 13973bd349..d89589a30a 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -775,6 +775,26 @@ impl Expr {
         Expr::IsNotUnknown(Box::new(self))
     }
 
+    /// return `self BETWEEN low AND high`
+    pub fn between(self, low: Expr, high: Expr) -> Expr {
+        Expr::Between(Between::new(
+            Box::new(self),
+            false,
+            Box::new(low),
+            Box::new(high),
+        ))
+    }
+
+    /// return `self NOT BETWEEN low AND high`
+    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
+        Expr::Between(Between::new(
+            Box::new(self),
+            true,
+            Box::new(low),
+            Box::new(high),
+        ))
+    }
+
     pub fn try_into_col(&self) -> Result<Column> {
         match self {
             Expr::Column(it) => Ok(it.clone()),
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 11228ee8ec..659b9fe164 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -729,14 +729,13 @@ mod test {
     use arrow::datatypes::{DataType, TimeUnit};
 
     use datafusion_common::tree_node::TreeNode;
-    use datafusion_common::ScalarValue::Utf8;
     use datafusion_common::{DFField, DFSchema, DFSchemaRef, Result, ScalarValue};
     use datafusion_expr::expr::{self, Like};
     use datafusion_expr::{
         cast, col, concat, concat_ws, create_udaf, is_true,
-        AccumulatorFunctionImplementation, AggregateFunction, AggregateUDF, Between,
-        BinaryExpr, BuiltinScalarFunction, Case, Cast, ColumnarValue, ExprSchemable,
-        Filter, Operator, StateTypeFunction, Subquery,
+        AccumulatorFunctionImplementation, AggregateFunction, AggregateUDF, BinaryExpr,
+        BuiltinScalarFunction, Case, ColumnarValue, ExprSchemable, Filter, Operator,
+        StateTypeFunction, Subquery,
     };
     use datafusion_expr::{
         lit,
@@ -1013,20 +1012,12 @@ mod test {
 
     #[test]
     fn between_case() -> Result<()> {
-        let expr = Expr::Between(Between::new(
-            Box::new(col("a")),
-            false,
-            Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
+        let expr = col("a").between(
+            lit("2002-05-08"),
             // (cast('2002-05-08' as date) + interval '1 months')
-            Box::new(Expr::BinaryExpr(BinaryExpr {
-                left: Box::new(Expr::Cast(Cast {
-                    expr: Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
-                    data_type: DataType::Date32,
-                })),
-                op: Operator::Plus,
-                right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(1)))),
-            })),
-        ));
+            cast(lit("2002-05-08"), DataType::Date32)
+                + lit(ScalarValue::new_interval_ym(0, 1)),
+        );
         let empty = empty_with_type(DataType::Utf8);
         let plan = LogicalPlan::Filter(Filter::try_new(expr, empty)?);
         let expected =
@@ -1037,20 +1028,12 @@ mod test {
 
     #[test]
     fn between_infer_cheap_type() -> Result<()> {
-        let expr = Expr::Between(Between::new(
-            Box::new(col("a")),
-            false,
+        let expr = col("a").between(
             // (cast('2002-05-08' as date) + interval '1 months')
-            Box::new(Expr::BinaryExpr(BinaryExpr {
-                left: Box::new(Expr::Cast(Cast {
-                    expr: Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
-                    data_type: DataType::Date32,
-                })),
-                op: Operator::Plus,
-                right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(1)))),
-            })),
-            Box::new(Expr::Literal(Utf8(Some("2002-12-08".to_string())))),
-        ));
+            cast(lit("2002-05-08"), DataType::Date32)
+                + lit(ScalarValue::new_interval_ym(0, 1)),
+            lit("2002-12-08"),
+        );
         let empty = empty_with_type(DataType::Utf8);
         let plan = LogicalPlan::Filter(Filter::try_new(expr, empty)?);
         // TODO: we should cast col(a).
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 69f2fa3484..20bee905c4 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -2787,12 +2787,7 @@ mod tests {
         // ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
         // it can be either NULL or  TRUE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
         // and should not be rewritten
-        let expr = Expr::Between(Between::new(
-            Box::new(col("c1")),
-            false,
-            Box::new(lit(0)),
-            Box::new(lit(10)),
-        ));
+        let expr = col("c1").between(lit(0), lit(10));
         let expr = expr.or(lit_bool_null());
         let result = simplify(expr);
 
@@ -2901,12 +2896,7 @@ mod tests {
         // c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
         // it can be either NULL or FALSE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
         // and the Boolean(NULL) should remain
-        let expr = Expr::Between(Between::new(
-            Box::new(col("c1")),
-            false,
-            Box::new(lit(0)),
-            Box::new(lit(10)),
-        ));
+        let expr = col("c1").between(lit(0), lit(10));
         let expr = expr.and(lit_bool_null());
         let result = simplify(expr);
 
@@ -2920,24 +2910,14 @@ mod tests {
     #[test]
     fn simplify_expr_between() {
         // c2 between 3 and 4 is c2 >= 3 and c2 <= 4
-        let expr = Expr::Between(Between::new(
-            Box::new(col("c2")),
-            false,
-            Box::new(lit(3)),
-            Box::new(lit(4)),
-        ));
+        let expr = col("c2").between(lit(3), lit(4));
         assert_eq!(
             simplify(expr),
             and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
         );
 
         // c2 not between 3 and 4 is c2 < 3 or c2 > 4
-        let expr = Expr::Between(Between::new(
-            Box::new(col("c2")),
-            true,
-            Box::new(lit(3)),
-            Box::new(lit(4)),
-        ));
+        let expr = col("c2").not_between(lit(3), lit(4));
         assert_eq!(
             simplify(expr),
             or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index c5196e07ee..9591563f02 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -127,7 +127,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
     use chrono::{DateTime, TimeZone, Utc};
     use datafusion_common::ScalarValue;
-    use datafusion_expr::{or, Between, BinaryExpr, Cast, Operator};
+    use datafusion_expr::{or, BinaryExpr, Cast, Operator};
 
     use crate::OptimizerContext;
     use datafusion_expr::logical_plan::table_scan;
@@ -672,12 +672,7 @@ mod tests {
     #[test]
     fn simplify_not_between() -> Result<()> {
         let table_scan = test_table_scan();
-        let qual = Expr::Between(Between::new(
-            Box::new(col("d")),
-            false,
-            Box::new(lit(1)),
-            Box::new(lit(10)),
-        ));
+        let qual = col("d").between(lit(1), lit(10));
 
         let plan = LogicalPlanBuilder::from(table_scan)
             .filter(qual.not())?
@@ -691,12 +686,7 @@ mod tests {
     #[test]
     fn simplify_not_not_between() -> Result<()> {
         let table_scan = test_table_scan();
-        let qual = Expr::Between(Between::new(
-            Box::new(col("d")),
-            true,
-            Box::new(lit(1)),
-            Box::new(lit(10)),
-        ));
+        let qual = col("d").not_between(lit(1), lit(10));
 
         let plan = LogicalPlanBuilder::from(table_scan)
             .filter(qual.not())?