You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/05/30 14:12:04 UTC

[arrow-datafusion] branch main updated: Use `std::ops` traits for `Exprs` rather than custom function names (#6465)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 33e7dd26ae Use `std::ops` traits for `Exprs` rather than custom function names (#6465)
33e7dd26ae is described below

commit 33e7dd26aee03986dc78a898d6b9d7c7c55496f0
Author: Louis Gariépy <68...@users.noreply.github.com>
AuthorDate: Tue May 30 10:11:57 2023 -0400

    Use `std::ops` traits for `Exprs` rather than custom function names (#6465)
    
    * Move `Not` impl to `operator` module.
    
    * Remove duplicate operator methods in favor of the corresponding trait.
    
    ---------
    
    Co-authored-by: Louis Gariépy <lo...@gmail.com>
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
 datafusion/core/src/datasource/listing/helpers.rs  |  2 +
 datafusion/core/src/physical_optimizer/pruning.rs  |  5 +-
 .../file_format/parquet/row_groups.rs              |  9 +--
 datafusion/core/src/physical_plan/planner.rs       |  1 +
 datafusion/expr/src/expr.rs                        | 72 ----------------------
 datafusion/expr/src/operator.rs                    | 43 +++++++++++++
 .../src/simplify_expressions/expr_simplifier.rs    | 16 +++--
 .../src/simplify_expressions/simplify_exprs.rs     |  2 +
 8 files changed, 64 insertions(+), 86 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index 7a0dd25351..efe1f7b59a 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -416,6 +416,8 @@ where
 
 #[cfg(test)]
 mod tests {
+    use std::ops::Not;
+
     use futures::StreamExt;
 
     use crate::logical_expr::{case, col, lit};
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index a5f5b635c5..bab9c24720 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -947,6 +947,7 @@ mod tests {
     use datafusion_physical_expr::create_physical_expr;
     use datafusion_physical_expr::execution_props::ExecutionProps;
     use std::collections::HashMap;
+    use std::ops::{Not, Rem};
 
     #[derive(Debug)]
     /// Mock statistic provider for tests
@@ -1523,9 +1524,7 @@ mod tests {
             Field::new("c2", DataType::Int32, false),
         ]);
         // test OR operator joining supported c1 < 1 expression and unsupported c2 % 2 = 0 expression
-        let expr = col("c1")
-            .lt(lit(1))
-            .or(col("c2").modulus(lit(2)).eq(lit(0)));
+        let expr = col("c1").lt(lit(1)).or(col("c2").rem(lit(2)).eq(lit(0)));
         let expected_expr = "true";
         let predicate_expr = test_build_predicate_expression(
             &expr,
diff --git a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
index efdc168db3..0cbb5d9465 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs
@@ -261,6 +261,7 @@ mod tests {
         file::{metadata::RowGroupMetaData, statistics::Statistics as ParquetStatistics},
         schema::types::SchemaDescPtr,
     };
+    use std::ops::Rem;
     use std::sync::Arc;
 
     struct PrimitiveTypeField {
@@ -371,9 +372,7 @@ mod tests {
             Field::new("c1", DataType::Int32, false),
             Field::new("c2", DataType::Int32, false),
         ]));
-        let expr = col("c1")
-            .gt(lit(15))
-            .and(col("c2").modulus(lit(2)).eq(lit(0)));
+        let expr = col("c1").gt(lit(15)).and(col("c2").rem(lit(2)).eq(lit(0)));
         let expr = logical2physical(&expr, &schema);
         let pruning_predicate = PruningPredicate::try_new(expr, schema.clone()).unwrap();
 
@@ -407,9 +406,7 @@ mod tests {
 
         // if conditions in predicate are joined with OR and an unsupported expression is used
         // this bypasses the entire predicate expression and no row groups are filtered out
-        let expr = col("c1")
-            .gt(lit(15))
-            .or(col("c2").modulus(lit(2)).eq(lit(0)));
+        let expr = col("c1").gt(lit(15)).or(col("c2").rem(lit(2)).eq(lit(0)));
         let expr = logical2physical(&expr, &schema);
         let pruning_predicate = PruningPredicate::try_new(expr, schema).unwrap();
 
diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index 7577f29e86..3532a6949a 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -1948,6 +1948,7 @@ mod tests {
     use fmt::Debug;
     use std::collections::HashMap;
     use std::convert::TryFrom;
+    use std::ops::Not;
     use std::{any::Any, fmt};
 
     fn make_session_state() -> SessionState {
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 1f7ee74a02..86480f9a96 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -32,7 +32,6 @@ use std::collections::HashSet;
 use std::fmt;
 use std::fmt::{Display, Formatter, Write};
 use std::hash::{BuildHasher, Hash, Hasher};
-use std::ops::Not;
 use std::sync::Arc;
 
 /// `Expr` is a central struct of DataFusion's query API, and
@@ -740,44 +739,6 @@ impl Expr {
         binary_expr(self, Operator::Or, other)
     }
 
-    /// Return `self & other`
-    pub fn bitwise_and(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::BitwiseAnd, other)
-    }
-
-    /// Return `self | other`
-    pub fn bitwise_or(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::BitwiseOr, other)
-    }
-
-    /// Return `self ^ other`
-    pub fn bitwise_xor(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::BitwiseXor, other)
-    }
-
-    /// Return `self >> other`
-    pub fn bitwise_shift_right(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::BitwiseShiftRight, other)
-    }
-
-    /// Return `self << other`
-    pub fn bitwise_shift_left(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::BitwiseShiftLeft, other)
-    }
-
-    /// Return `!self`
-    // TODO(clippy): May be solved by https://github.com/apache/arrow-datafusion/issues/6437
-    #[allow(clippy::should_implement_trait)]
-    pub fn not(self) -> Expr {
-        !self
-    }
-
-    /// Calculate the modulus of two expressions.
-    /// Return `self % other`
-    pub fn modulus(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Modulo, other)
-    }
-
     /// Return `self LIKE other`
     pub fn like(self, other: Expr) -> Expr {
         Expr::Like(Like::new(false, Box::new(self), Box::new(other), None))
@@ -908,34 +869,6 @@ impl Expr {
     }
 }
 
-impl Not for Expr {
-    type Output = Self;
-
-    fn not(self) -> Self::Output {
-        match self {
-            Expr::Like(Like {
-                negated,
-                expr,
-                pattern,
-                escape_char,
-            }) => Expr::Like(Like::new(!negated, expr, pattern, escape_char)),
-            Expr::ILike(Like {
-                negated,
-                expr,
-                pattern,
-                escape_char,
-            }) => Expr::ILike(Like::new(!negated, expr, pattern, escape_char)),
-            Expr::SimilarTo(Like {
-                negated,
-                expr,
-                pattern,
-                escape_char,
-            }) => Expr::SimilarTo(Like::new(!negated, expr, pattern, escape_char)),
-            _ => Expr::Not(Box::new(self)),
-        }
-    }
-}
-
 /// Format expressions for display as part of a logical plan. In many cases, this will produce
 /// similar output to `Expr.name()` except that column names will be prefixed with '#'.
 impl fmt::Display for Expr {
@@ -1547,11 +1480,6 @@ mod test {
         Ok(())
     }
 
-    #[test]
-    fn test_not() {
-        assert_eq!(lit(1).not(), !lit(1));
-    }
-
     #[test]
     fn test_partial_ord() {
         // Test validates that partial ord is defined for Expr using hashes, not
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index d554c5cebb..686a681e36 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -19,8 +19,10 @@
 
 use crate::expr_fn::binary_expr;
 use crate::Expr;
+use crate::Like;
 use std::fmt;
 use std::ops;
+use std::ops::Not;
 
 /// Operators applied to expressions
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -349,52 +351,93 @@ impl ops::Neg for Expr {
     }
 }
 
+impl Not for Expr {
+    type Output = Self;
+
+    fn not(self) -> Self::Output {
+        match self {
+            Expr::Like(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+            }) => Expr::Like(Like::new(!negated, expr, pattern, escape_char)),
+            Expr::ILike(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+            }) => Expr::ILike(Like::new(!negated, expr, pattern, escape_char)),
+            Expr::SimilarTo(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+            }) => Expr::SimilarTo(Like::new(!negated, expr, pattern, escape_char)),
+            _ => Expr::Not(Box::new(self)),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::lit;
 
     #[test]
     fn test_operators() {
+        // Add
         assert_eq!(
             format!("{:?}", lit(1u32) + lit(2u32)),
             "UInt32(1) + UInt32(2)"
         );
+        // Sub
         assert_eq!(
             format!("{:?}", lit(1u32) - lit(2u32)),
             "UInt32(1) - UInt32(2)"
         );
+        // Mul
         assert_eq!(
             format!("{:?}", lit(1u32) * lit(2u32)),
             "UInt32(1) * UInt32(2)"
         );
+        // Div
         assert_eq!(
             format!("{:?}", lit(1u32) / lit(2u32)),
             "UInt32(1) / UInt32(2)"
         );
+        // Rem
         assert_eq!(
             format!("{:?}", lit(1u32) % lit(2u32)),
             "UInt32(1) % UInt32(2)"
         );
+        // BitAnd
         assert_eq!(
             format!("{:?}", lit(1u32) & lit(2u32)),
             "UInt32(1) & UInt32(2)"
         );
+        // BitOr
         assert_eq!(
             format!("{:?}", lit(1u32) | lit(2u32)),
             "UInt32(1) | UInt32(2)"
         );
+        // BitXor
         assert_eq!(
             format!("{:?}", lit(1u32) ^ lit(2u32)),
             "UInt32(1) # UInt32(2)"
         );
+        // Shl
         assert_eq!(
             format!("{:?}", lit(1u32) << lit(2u32)),
             "UInt32(1) << UInt32(2)"
         );
+        // Shr
         assert_eq!(
             format!("{:?}", lit(1u32) >> lit(2u32)),
             "UInt32(1) >> UInt32(2)"
         );
+        // Neg
         assert_eq!(format!("{:?}", -lit(1u32)), "(- UInt32(1))");
+        // Not
+        assert_eq!(format!("{:?}", !lit(1u32)), "NOT UInt32(1)");
     }
 }
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 75f50aa3c5..a8d6876a23 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -17,6 +17,8 @@
 
 //! Expression simplification API
 
+use std::ops::Not;
+
 use super::utils::*;
 use crate::analyzer::type_coercion::TypeCoercionRewriter;
 use crate::simplify_expressions::regex::simplify_regex_expr;
@@ -1176,7 +1178,11 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
 
 #[cfg(test)]
 mod tests {
-    use std::{collections::HashMap, sync::Arc};
+    use std::{
+        collections::HashMap,
+        ops::{BitAnd, BitOr, BitXor},
+        sync::Arc,
+    };
 
     use crate::simplify_expressions::{
         utils::for_test::{cast_to_int64_expr, now_expr, to_timestamp_expr},
@@ -2044,7 +2050,7 @@ mod tests {
     #[test]
     fn test_simplify_simple_bitwise_and() {
         // (c2 > 5) & (c2 > 5) -> (c2 > 5)
-        let expr = (col("c2").gt(lit(5))).bitwise_and(col("c2").gt(lit(5)));
+        let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
         let expected = col("c2").gt(lit(5));
 
         assert_eq!(simplify(expr), expected);
@@ -2053,7 +2059,7 @@ mod tests {
     #[test]
     fn test_simplify_simple_bitwise_or() {
         // (c2 > 5) | (c2 > 5) -> (c2 > 5)
-        let expr = (col("c2").gt(lit(5))).bitwise_or(col("c2").gt(lit(5)));
+        let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
         let expected = col("c2").gt(lit(5));
 
         assert_eq!(simplify(expr), expected);
@@ -2062,13 +2068,13 @@ mod tests {
     #[test]
     fn test_simplify_simple_bitwise_xor() {
         // c4 ^ c4 -> 0
-        let expr = (col("c4")).bitwise_xor(col("c4"));
+        let expr = (col("c4")).bitxor(col("c4"));
         let expected = lit(0u32);
 
         assert_eq!(simplify(expr), expected);
 
         // c3 ^ c3 -> 0
-        let expr = col("c3").bitwise_xor(col("c3"));
+        let expr = col("c3").bitxor(col("c3"));
         let expected = lit(0i64);
 
         assert_eq!(simplify(expr), expected);
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 4285017813..6b0496a0cc 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -120,6 +120,8 @@ impl SimplifyExpressions {
 
 #[cfg(test)]
 mod tests {
+    use std::ops::Not;
+
     use crate::simplify_expressions::utils::for_test::{
         cast_to_int64_expr, now_expr, to_timestamp_expr,
     };