You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/05/30 14:12:04 UTC
[arrow-datafusion] branch main updated: Use `std::ops` traits for `Exprs` rather than custom function names (#6465)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 33e7dd26ae Use `std::ops` traits for `Exprs` rather than custom function names (#6465)
33e7dd26ae is described below
commit 33e7dd26aee03986dc78a898d6b9d7c7c55496f0
Author: Louis Gariépy <68...@users.noreply.github.com>
AuthorDate: Tue May 30 10:11:57 2023 -0400
Use `std::ops` traits for `Exprs` rather than custom function names (#6465)
* Move `Not` impl to `operator` module.
* Remove duplicate operator methods in favor of the corresponding trait.
---------
Co-authored-by: Louis Gariépy <lo...@gmail.com>
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion/core/src/datasource/listing/helpers.rs | 2 +
datafusion/core/src/physical_optimizer/pruning.rs | 5 +-
.../file_format/parquet/row_groups.rs | 9 +--
datafusion/core/src/physical_plan/planner.rs | 1 +
datafusion/expr/src/expr.rs | 72 ----------------------
datafusion/expr/src/operator.rs | 43 +++++++++++++
.../src/simplify_expressions/expr_simplifier.rs | 16 +++--
.../src/simplify_expressions/simplify_exprs.rs | 2 +
8 files changed, 64 insertions(+), 86 deletions(-)
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index 7a0dd25351..efe1f7b59a 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -416,6 +416,8 @@ where
#[cfg(test)]
mod tests {
+ use std::ops::Not;
+
use futures::StreamExt;
use crate::logical_expr::{case, col, lit};
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index a5f5b635c5..bab9c24720 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -947,6 +947,7 @@ mod tests {
use datafusion_physical_expr::create_physical_expr;
use datafusion_physical_expr::execution_props::ExecutionProps;
use std::collections::HashMap;
+ use std::ops::{Not, Rem};
#[derive(Debug)]
/// Mock statistic provider for tests
@@ -1523,9 +1524,7 @@ mod tests {
Field::new("c2", DataType::Int32, false),
]);
// test OR operator joining supported c1 < 1 expression and unsupported c2 % 2 = 0 expression
- let expr = col("c1")
- .lt(lit(1))
- .or(col("c2").modulus(lit(2)).eq(lit(0)));
+ let expr = col("c1").lt(lit(1)).or(col("c2").rem(lit(2)).eq(lit(0)));
let expected_expr = "true";
let predicate_expr = test_build_predicate_expression(
&expr,
diff --git a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
index efdc168db3..0cbb5d9465 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
@@ -261,6 +261,7 @@ mod tests {
file::{metadata::RowGroupMetaData, statistics::Statistics as ParquetStatistics},
schema::types::SchemaDescPtr,
};
+ use std::ops::Rem;
use std::sync::Arc;
struct PrimitiveTypeField {
@@ -371,9 +372,7 @@ mod tests {
Field::new("c1", DataType::Int32, false),
Field::new("c2", DataType::Int32, false),
]));
- let expr = col("c1")
- .gt(lit(15))
- .and(col("c2").modulus(lit(2)).eq(lit(0)));
+ let expr = col("c1").gt(lit(15)).and(col("c2").rem(lit(2)).eq(lit(0)));
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr, schema.clone()).unwrap();
@@ -407,9 +406,7 @@ mod tests {
// if conditions in predicate are joined with OR and an unsupported expression is used
// this bypasses the entire predicate expression and no row groups are filtered out
- let expr = col("c1")
- .gt(lit(15))
- .or(col("c2").modulus(lit(2)).eq(lit(0)));
+ let expr = col("c1").gt(lit(15)).or(col("c2").rem(lit(2)).eq(lit(0)));
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr, schema).unwrap();
diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index 7577f29e86..3532a6949a 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -1948,6 +1948,7 @@ mod tests {
use fmt::Debug;
use std::collections::HashMap;
use std::convert::TryFrom;
+ use std::ops::Not;
use std::{any::Any, fmt};
fn make_session_state() -> SessionState {
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 1f7ee74a02..86480f9a96 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -32,7 +32,6 @@ use std::collections::HashSet;
use std::fmt;
use std::fmt::{Display, Formatter, Write};
use std::hash::{BuildHasher, Hash, Hasher};
-use std::ops::Not;
use std::sync::Arc;
/// `Expr` is a central struct of DataFusion's query API, and
@@ -740,44 +739,6 @@ impl Expr {
binary_expr(self, Operator::Or, other)
}
- /// Return `self & other`
- pub fn bitwise_and(self, other: Expr) -> Expr {
- binary_expr(self, Operator::BitwiseAnd, other)
- }
-
- /// Return `self | other`
- pub fn bitwise_or(self, other: Expr) -> Expr {
- binary_expr(self, Operator::BitwiseOr, other)
- }
-
- /// Return `self ^ other`
- pub fn bitwise_xor(self, other: Expr) -> Expr {
- binary_expr(self, Operator::BitwiseXor, other)
- }
-
- /// Return `self >> other`
- pub fn bitwise_shift_right(self, other: Expr) -> Expr {
- binary_expr(self, Operator::BitwiseShiftRight, other)
- }
-
- /// Return `self << other`
- pub fn bitwise_shift_left(self, other: Expr) -> Expr {
- binary_expr(self, Operator::BitwiseShiftLeft, other)
- }
-
- /// Return `!self`
- // TODO(clippy): May be solved by https://github.com/apache/arrow-datafusion/issues/6437
- #[allow(clippy::should_implement_trait)]
- pub fn not(self) -> Expr {
- !self
- }
-
- /// Calculate the modulus of two expressions.
- /// Return `self % other`
- pub fn modulus(self, other: Expr) -> Expr {
- binary_expr(self, Operator::Modulo, other)
- }
-
/// Return `self LIKE other`
pub fn like(self, other: Expr) -> Expr {
Expr::Like(Like::new(false, Box::new(self), Box::new(other), None))
@@ -908,34 +869,6 @@ impl Expr {
}
}
-impl Not for Expr {
- type Output = Self;
-
- fn not(self) -> Self::Output {
- match self {
- Expr::Like(Like {
- negated,
- expr,
- pattern,
- escape_char,
- }) => Expr::Like(Like::new(!negated, expr, pattern, escape_char)),
- Expr::ILike(Like {
- negated,
- expr,
- pattern,
- escape_char,
- }) => Expr::ILike(Like::new(!negated, expr, pattern, escape_char)),
- Expr::SimilarTo(Like {
- negated,
- expr,
- pattern,
- escape_char,
- }) => Expr::SimilarTo(Like::new(!negated, expr, pattern, escape_char)),
- _ => Expr::Not(Box::new(self)),
- }
- }
-}
-
/// Format expressions for display as part of a logical plan. In many cases, this will produce
/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
impl fmt::Display for Expr {
@@ -1547,11 +1480,6 @@ mod test {
Ok(())
}
- #[test]
- fn test_not() {
- assert_eq!(lit(1).not(), !lit(1));
- }
-
#[test]
fn test_partial_ord() {
// Test validates that partial ord is defined for Expr using hashes, not
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index d554c5cebb..686a681e36 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -19,8 +19,10 @@
use crate::expr_fn::binary_expr;
use crate::Expr;
+use crate::Like;
use std::fmt;
use std::ops;
+use std::ops::Not;
/// Operators applied to expressions
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -349,52 +351,93 @@ impl ops::Neg for Expr {
}
}
+impl Not for Expr {
+ type Output = Self;
+
+ fn not(self) -> Self::Output {
+ match self {
+ Expr::Like(Like {
+ negated,
+ expr,
+ pattern,
+ escape_char,
+ }) => Expr::Like(Like::new(!negated, expr, pattern, escape_char)),
+ Expr::ILike(Like {
+ negated,
+ expr,
+ pattern,
+ escape_char,
+ }) => Expr::ILike(Like::new(!negated, expr, pattern, escape_char)),
+ Expr::SimilarTo(Like {
+ negated,
+ expr,
+ pattern,
+ escape_char,
+ }) => Expr::SimilarTo(Like::new(!negated, expr, pattern, escape_char)),
+ _ => Expr::Not(Box::new(self)),
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use crate::lit;
#[test]
fn test_operators() {
+ // Add
assert_eq!(
format!("{:?}", lit(1u32) + lit(2u32)),
"UInt32(1) + UInt32(2)"
);
+ // Sub
assert_eq!(
format!("{:?}", lit(1u32) - lit(2u32)),
"UInt32(1) - UInt32(2)"
);
+ // Mul
assert_eq!(
format!("{:?}", lit(1u32) * lit(2u32)),
"UInt32(1) * UInt32(2)"
);
+ // Div
assert_eq!(
format!("{:?}", lit(1u32) / lit(2u32)),
"UInt32(1) / UInt32(2)"
);
+ // Rem
assert_eq!(
format!("{:?}", lit(1u32) % lit(2u32)),
"UInt32(1) % UInt32(2)"
);
+ // BitAnd
assert_eq!(
format!("{:?}", lit(1u32) & lit(2u32)),
"UInt32(1) & UInt32(2)"
);
+ // BitOr
assert_eq!(
format!("{:?}", lit(1u32) | lit(2u32)),
"UInt32(1) | UInt32(2)"
);
+ // BitXor
assert_eq!(
format!("{:?}", lit(1u32) ^ lit(2u32)),
"UInt32(1) # UInt32(2)"
);
+ // Shl
assert_eq!(
format!("{:?}", lit(1u32) << lit(2u32)),
"UInt32(1) << UInt32(2)"
);
+ // Shr
assert_eq!(
format!("{:?}", lit(1u32) >> lit(2u32)),
"UInt32(1) >> UInt32(2)"
);
+ // Neg
assert_eq!(format!("{:?}", -lit(1u32)), "(- UInt32(1))");
+ // Not
+ assert_eq!(format!("{:?}", !lit(1u32)), "NOT UInt32(1)");
}
}
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 75f50aa3c5..a8d6876a23 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -17,6 +17,8 @@
//! Expression simplification API
+use std::ops::Not;
+
use super::utils::*;
use crate::analyzer::type_coercion::TypeCoercionRewriter;
use crate::simplify_expressions::regex::simplify_regex_expr;
@@ -1176,7 +1178,11 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
#[cfg(test)]
mod tests {
- use std::{collections::HashMap, sync::Arc};
+ use std::{
+ collections::HashMap,
+ ops::{BitAnd, BitOr, BitXor},
+ sync::Arc,
+ };
use crate::simplify_expressions::{
utils::for_test::{cast_to_int64_expr, now_expr, to_timestamp_expr},
@@ -2044,7 +2050,7 @@ mod tests {
#[test]
fn test_simplify_simple_bitwise_and() {
// (c2 > 5) & (c2 > 5) -> (c2 > 5)
- let expr = (col("c2").gt(lit(5))).bitwise_and(col("c2").gt(lit(5)));
+ let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
let expected = col("c2").gt(lit(5));
assert_eq!(simplify(expr), expected);
@@ -2053,7 +2059,7 @@ mod tests {
#[test]
fn test_simplify_simple_bitwise_or() {
// (c2 > 5) | (c2 > 5) -> (c2 > 5)
- let expr = (col("c2").gt(lit(5))).bitwise_or(col("c2").gt(lit(5)));
+ let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
let expected = col("c2").gt(lit(5));
assert_eq!(simplify(expr), expected);
@@ -2062,13 +2068,13 @@ mod tests {
#[test]
fn test_simplify_simple_bitwise_xor() {
// c4 ^ c4 -> 0
- let expr = (col("c4")).bitwise_xor(col("c4"));
+ let expr = (col("c4")).bitxor(col("c4"));
let expected = lit(0u32);
assert_eq!(simplify(expr), expected);
// c3 ^ c3 -> 0
- let expr = col("c3").bitwise_xor(col("c3"));
+ let expr = col("c3").bitxor(col("c3"));
let expected = lit(0i64);
assert_eq!(simplify(expr), expected);
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 4285017813..6b0496a0cc 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -120,6 +120,8 @@ impl SimplifyExpressions {
#[cfg(test)]
mod tests {
+ use std::ops::Not;
+
use crate::simplify_expressions::utils::for_test::{
cast_to_int64_expr, now_expr, to_timestamp_expr,
};