You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/04/12 06:16:52 UTC
[arrow-datafusion] branch main updated: minor: Add `Expr::between` to clean up boilerplate (#5967)
This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f0d544f6e6 minor: Add `Expr::between` to clean up boilerplate (#5967)
f0d544f6e6 is described below
commit f0d544f6e659218cc86ff9ef70a91705d996c705
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Wed Apr 12 02:16:45 2023 -0400
minor: Add `Expr::between` to clean up boilerplate (#5967)
* Minor: Add Expr::between and clean up boilerplate
* clean up some more
* clippy
---
datafusion/expr/src/expr.rs | 20 ++++++++++
datafusion/optimizer/src/analyzer/type_coercion.rs | 43 +++++++---------------
.../src/simplify_expressions/expr_simplifier.rs | 28 ++------------
.../src/simplify_expressions/simplify_exprs.rs | 16 ++------
4 files changed, 40 insertions(+), 67 deletions(-)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 13973bd349..d89589a30a 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -775,6 +775,26 @@ impl Expr {
Expr::IsNotUnknown(Box::new(self))
}
+ /// return `self BETWEEN low AND high`
+ pub fn between(self, low: Expr, high: Expr) -> Expr {
+ Expr::Between(Between::new(
+ Box::new(self),
+ false,
+ Box::new(low),
+ Box::new(high),
+ ))
+ }
+
+ /// return `self NOT BETWEEN low AND high`
+ pub fn not_between(self, low: Expr, high: Expr) -> Expr {
+ Expr::Between(Between::new(
+ Box::new(self),
+ true,
+ Box::new(low),
+ Box::new(high),
+ ))
+ }
+
pub fn try_into_col(&self) -> Result<Column> {
match self {
Expr::Column(it) => Ok(it.clone()),
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 11228ee8ec..659b9fe164 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -729,14 +729,13 @@ mod test {
use arrow::datatypes::{DataType, TimeUnit};
use datafusion_common::tree_node::TreeNode;
- use datafusion_common::ScalarValue::Utf8;
use datafusion_common::{DFField, DFSchema, DFSchemaRef, Result, ScalarValue};
use datafusion_expr::expr::{self, Like};
use datafusion_expr::{
cast, col, concat, concat_ws, create_udaf, is_true,
- AccumulatorFunctionImplementation, AggregateFunction, AggregateUDF, Between,
- BinaryExpr, BuiltinScalarFunction, Case, Cast, ColumnarValue, ExprSchemable,
- Filter, Operator, StateTypeFunction, Subquery,
+ AccumulatorFunctionImplementation, AggregateFunction, AggregateUDF, BinaryExpr,
+ BuiltinScalarFunction, Case, ColumnarValue, ExprSchemable, Filter, Operator,
+ StateTypeFunction, Subquery,
};
use datafusion_expr::{
lit,
@@ -1013,20 +1012,12 @@ mod test {
#[test]
fn between_case() -> Result<()> {
- let expr = Expr::Between(Between::new(
- Box::new(col("a")),
- false,
- Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
+ let expr = col("a").between(
+ lit("2002-05-08"),
// (cast('2002-05-08' as date) + interval '1 months')
- Box::new(Expr::BinaryExpr(BinaryExpr {
- left: Box::new(Expr::Cast(Cast {
- expr: Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
- data_type: DataType::Date32,
- })),
- op: Operator::Plus,
- right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(1)))),
- })),
- ));
+ cast(lit("2002-05-08"), DataType::Date32)
+ + lit(ScalarValue::new_interval_ym(0, 1)),
+ );
let empty = empty_with_type(DataType::Utf8);
let plan = LogicalPlan::Filter(Filter::try_new(expr, empty)?);
let expected =
@@ -1037,20 +1028,12 @@ mod test {
#[test]
fn between_infer_cheap_type() -> Result<()> {
- let expr = Expr::Between(Between::new(
- Box::new(col("a")),
- false,
+ let expr = col("a").between(
// (cast('2002-05-08' as date) + interval '1 months')
- Box::new(Expr::BinaryExpr(BinaryExpr {
- left: Box::new(Expr::Cast(Cast {
- expr: Box::new(Expr::Literal(Utf8(Some("2002-05-08".to_string())))),
- data_type: DataType::Date32,
- })),
- op: Operator::Plus,
- right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(1)))),
- })),
- Box::new(Expr::Literal(Utf8(Some("2002-12-08".to_string())))),
- ));
+ cast(lit("2002-05-08"), DataType::Date32)
+ + lit(ScalarValue::new_interval_ym(0, 1)),
+ lit("2002-12-08"),
+ );
let empty = empty_with_type(DataType::Utf8);
let plan = LogicalPlan::Filter(Filter::try_new(expr, empty)?);
// TODO: we should cast col(a).
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 69f2fa3484..20bee905c4 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -2787,12 +2787,7 @@ mod tests {
// ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
// it can be either NULL or TRUE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
// and should not be rewritten
- let expr = Expr::Between(Between::new(
- Box::new(col("c1")),
- false,
- Box::new(lit(0)),
- Box::new(lit(10)),
- ));
+ let expr = col("c1").between(lit(0), lit(10));
let expr = expr.or(lit_bool_null());
let result = simplify(expr);
@@ -2901,12 +2896,7 @@ mod tests {
// c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
// it can be either NULL or FALSE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
// and the Boolean(NULL) should remain
- let expr = Expr::Between(Between::new(
- Box::new(col("c1")),
- false,
- Box::new(lit(0)),
- Box::new(lit(10)),
- ));
+ let expr = col("c1").between(lit(0), lit(10));
let expr = expr.and(lit_bool_null());
let result = simplify(expr);
@@ -2920,24 +2910,14 @@ mod tests {
#[test]
fn simplify_expr_between() {
// c2 between 3 and 4 is c2 >= 3 and c2 <= 4
- let expr = Expr::Between(Between::new(
- Box::new(col("c2")),
- false,
- Box::new(lit(3)),
- Box::new(lit(4)),
- ));
+ let expr = col("c2").between(lit(3), lit(4));
assert_eq!(
simplify(expr),
and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
);
// c2 not between 3 and 4 is c2 < 3 or c2 > 4
- let expr = Expr::Between(Between::new(
- Box::new(col("c2")),
- true,
- Box::new(lit(3)),
- Box::new(lit(4)),
- ));
+ let expr = col("c2").not_between(lit(3), lit(4));
assert_eq!(
simplify(expr),
or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index c5196e07ee..9591563f02 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -127,7 +127,7 @@ mod tests {
use arrow::datatypes::{DataType, Field, Schema};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::ScalarValue;
- use datafusion_expr::{or, Between, BinaryExpr, Cast, Operator};
+ use datafusion_expr::{or, BinaryExpr, Cast, Operator};
use crate::OptimizerContext;
use datafusion_expr::logical_plan::table_scan;
@@ -672,12 +672,7 @@ mod tests {
#[test]
fn simplify_not_between() -> Result<()> {
let table_scan = test_table_scan();
- let qual = Expr::Between(Between::new(
- Box::new(col("d")),
- false,
- Box::new(lit(1)),
- Box::new(lit(10)),
- ));
+ let qual = col("d").between(lit(1), lit(10));
let plan = LogicalPlanBuilder::from(table_scan)
.filter(qual.not())?
@@ -691,12 +686,7 @@ mod tests {
#[test]
fn simplify_not_not_between() -> Result<()> {
let table_scan = test_table_scan();
- let qual = Expr::Between(Between::new(
- Box::new(col("d")),
- true,
- Box::new(lit(1)),
- Box::new(lit(10)),
- ));
+ let qual = col("d").not_between(lit(1), lit(10));
let plan = LogicalPlanBuilder::from(table_scan)
.filter(qual.not())?