You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/04/21 20:32:42 UTC
[arrow-datafusion] branch master updated: Move case/when expressions to expr crate (#2311)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 0c51a4933 Move case/when expressions to expr crate (#2311)
0c51a4933 is described below
commit 0c51a493382f81feedb61a999d8e246b8d3bbe62
Author: Andy Grove <ag...@apache.org>
AuthorDate: Thu Apr 21 14:32:38 2022 -0600
Move case/when expressions to expr crate (#2311)
---
datafusion/core/src/logical_plan/builder.rs | 2 +-
datafusion/core/src/logical_plan/expr.rs | 161 +--------------------
datafusion/core/src/logical_plan/mod.rs | 3 +-
datafusion/expr/Cargo.toml | 4 +-
datafusion/expr/README.md | 2 +-
datafusion/expr/src/conditional_expressions.rs | 107 ++++++++++++++
datafusion/expr/src/expr_fn.rs | 13 +-
.../src/logical_plan => expr/src}/expr_schema.rs | 67 ++++++++-
datafusion/expr/src/lib.rs | 17 ++-
9 files changed, 203 insertions(+), 173 deletions(-)
diff --git a/datafusion/core/src/logical_plan/builder.rs b/datafusion/core/src/logical_plan/builder.rs
index e417b0dc1..6b08b5beb 100644
--- a/datafusion/core/src/logical_plan/builder.rs
+++ b/datafusion/core/src/logical_plan/builder.rs
@@ -23,7 +23,7 @@ use crate::datasource::{
MemTable, TableProvider,
};
use crate::error::{DataFusionError, Result};
-use crate::logical_plan::expr_schema::ExprSchemable;
+use crate::logical_expr::ExprSchemable;
use crate::logical_plan::plan::{
Aggregate, Analyze, EmptyRelation, Explain, Filter, Join, Projection, Sort,
SubqueryAlias, TableScan, ToStringifiedPlan, Union, Window,
diff --git a/datafusion/core/src/logical_plan/expr.rs b/datafusion/core/src/logical_plan/expr.rs
index 2c9113696..8935170cf 100644
--- a/datafusion/core/src/logical_plan/expr.rs
+++ b/datafusion/core/src/logical_plan/expr.rs
@@ -23,7 +23,6 @@ use crate::error::Result;
use crate::logical_plan::ExprSchemable;
use crate::logical_plan::{DFField, DFSchema};
use arrow::datatypes::DataType;
-use datafusion_common::DataFusionError;
pub use datafusion_common::{Column, ExprSchema};
pub use datafusion_expr::expr_fn::*;
use datafusion_expr::AccumulatorFunctionImplementation;
@@ -35,97 +34,8 @@ use datafusion_expr::{AggregateUDF, ScalarUDF};
use datafusion_expr::{
ReturnTypeFunction, ScalarFunctionImplementation, Signature, Volatility,
};
-use std::collections::HashSet;
use std::sync::Arc;
-/// Helper struct for building [Expr::Case]
-pub struct CaseBuilder {
- expr: Option<Box<Expr>>,
- when_expr: Vec<Expr>,
- then_expr: Vec<Expr>,
- else_expr: Option<Box<Expr>>,
-}
-
-impl CaseBuilder {
- pub fn when(&mut self, when: Expr, then: Expr) -> CaseBuilder {
- self.when_expr.push(when);
- self.then_expr.push(then);
- CaseBuilder {
- expr: self.expr.clone(),
- when_expr: self.when_expr.clone(),
- then_expr: self.then_expr.clone(),
- else_expr: self.else_expr.clone(),
- }
- }
- pub fn otherwise(&mut self, else_expr: Expr) -> Result<Expr> {
- self.else_expr = Some(Box::new(else_expr));
- self.build()
- }
-
- pub fn end(&self) -> Result<Expr> {
- self.build()
- }
-
- fn build(&self) -> Result<Expr> {
- // collect all "then" expressions
- let mut then_expr = self.then_expr.clone();
- if let Some(e) = &self.else_expr {
- then_expr.push(e.as_ref().to_owned());
- }
-
- let then_types: Vec<DataType> = then_expr
- .iter()
- .map(|e| match e {
- Expr::Literal(_) => e.get_type(&DFSchema::empty()),
- _ => Ok(DataType::Null),
- })
- .collect::<Result<Vec<_>>>()?;
-
- if then_types.contains(&DataType::Null) {
- // cannot verify types until execution type
- } else {
- let unique_types: HashSet<&DataType> = then_types.iter().collect();
- if unique_types.len() != 1 {
- return Err(DataFusionError::Plan(format!(
- "CASE expression 'then' values had multiple data types: {:?}",
- unique_types
- )));
- }
- }
-
- Ok(Expr::Case {
- expr: self.expr.clone(),
- when_then_expr: self
- .when_expr
- .iter()
- .zip(self.then_expr.iter())
- .map(|(w, t)| (Box::new(w.clone()), Box::new(t.clone())))
- .collect(),
- else_expr: self.else_expr.clone(),
- })
- }
-}
-
-/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
-pub fn case(expr: Expr) -> CaseBuilder {
- CaseBuilder {
- expr: Some(Box::new(expr)),
- when_expr: vec![],
- then_expr: vec![],
- else_expr: None,
- }
-}
-
-/// Create a CASE WHEN statement with boolean WHEN expressions and no base expression.
-pub fn when(when: Expr, then: Expr) -> CaseBuilder {
- CaseBuilder {
- expr: None,
- when_expr: vec![when],
- then_expr: vec![then],
- else_expr: None,
- }
-}
-
/// Combines an array of filter expressions into a single filter expression
/// consisting of the input filter expressions joined with logical AND.
/// Returns None if the filters array is empty.
@@ -248,26 +158,10 @@ pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) -> Result<Expr> {
#[cfg(test)]
mod tests {
- use super::super::{col, lit, when};
+ use super::super::{col, lit};
use super::*;
use datafusion_expr::expr_fn::binary_expr;
- #[test]
- fn case_when_same_literal_then_types() -> Result<()> {
- let _ = when(col("state").eq(lit("CO")), lit(303))
- .when(col("state").eq(lit("NY")), lit(212))
- .end()?;
- Ok(())
- }
-
- #[test]
- fn case_when_different_literal_then_types() {
- let maybe_expr = when(col("state").eq(lit("CO")), lit(303))
- .when(col("state").eq(lit("NY")), lit("212"))
- .end();
- assert!(maybe_expr.is_err());
- }
-
#[test]
fn digest_function_definitions() {
if let Expr::ScalarFunction { fun, args } = digest(col("tableA.a"), lit("md5")) {
@@ -301,57 +195,4 @@ mod tests {
combine_filters(&[filter1.clone(), filter2.clone(), filter3.clone()]);
assert_eq!(result, Some(and(and(filter1, filter2), filter3)));
}
-
- #[test]
- fn expr_schema_nullability() {
- let expr = col("foo").eq(lit(1));
- assert!(!expr.nullable(&MockExprSchema::new()).unwrap());
- assert!(expr
- .nullable(&MockExprSchema::new().with_nullable(true))
- .unwrap());
- }
-
- #[test]
- fn expr_schema_data_type() {
- let expr = col("foo");
- assert_eq!(
- DataType::Utf8,
- expr.get_type(&MockExprSchema::new().with_data_type(DataType::Utf8))
- .unwrap()
- );
- }
-
- struct MockExprSchema {
- nullable: bool,
- data_type: DataType,
- }
-
- impl MockExprSchema {
- fn new() -> Self {
- Self {
- nullable: false,
- data_type: DataType::Null,
- }
- }
-
- fn with_nullable(mut self, nullable: bool) -> Self {
- self.nullable = nullable;
- self
- }
-
- fn with_data_type(mut self, data_type: DataType) -> Self {
- self.data_type = data_type;
- self
- }
- }
-
- impl ExprSchema for MockExprSchema {
- fn nullable(&self, _col: &Column) -> Result<bool> {
- Ok(self.nullable)
- }
-
- fn data_type(&self, _col: &Column) -> Result<&DataType> {
- Ok(&self.data_type)
- }
- }
}
diff --git a/datafusion/core/src/logical_plan/mod.rs b/datafusion/core/src/logical_plan/mod.rs
index 5ce0ff911..488a2b954 100644
--- a/datafusion/core/src/logical_plan/mod.rs
+++ b/datafusion/core/src/logical_plan/mod.rs
@@ -25,7 +25,6 @@ pub(crate) mod builder;
mod dfschema;
mod expr;
mod expr_rewriter;
-mod expr_schema;
mod expr_simplier;
mod expr_visitor;
mod operators;
@@ -38,6 +37,7 @@ pub use builder::{
pub use datafusion_expr::expr_fn::binary_expr;
pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema};
+pub use crate::logical_expr::ExprSchemable;
pub use expr::{
abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col,
@@ -55,7 +55,6 @@ pub use expr_rewriter::{
normalize_col, normalize_cols, replace_col, rewrite_sort_cols_by_aggs,
unnormalize_col, unnormalize_cols, ExprRewritable, ExprRewriter, RewriteRecursion,
};
-pub use expr_schema::ExprSchemable;
pub use expr_simplier::{ExprSimplifiable, SimplifyInfo};
pub use expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion};
pub use operators::Operator;
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 7459490ef..4095d4ebc 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -17,14 +17,14 @@
[package]
name = "datafusion-expr"
-description = "Logical expression representation for DataFusion query engine"
+description = "Logical plan and expression representation for DataFusion query engine"
version = "7.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "../README.md"
authors = ["Apache Arrow <de...@arrow.apache.org>"]
license = "Apache-2.0"
-keywords = [ "arrow", "query", "sql" ]
+keywords = [ "datafusion", "logical", "plan", "expressions" ]
edition = "2021"
rust-version = "1.59"
diff --git a/datafusion/expr/README.md b/datafusion/expr/README.md
index 25ac79c22..6ce82347c 100644
--- a/datafusion/expr/README.md
+++ b/datafusion/expr/README.md
@@ -17,7 +17,7 @@
under the License.
-->
-# DataFusion Expr
+# DataFusion Logical Plan and Expressions
This is an internal module for fundamental expression types of [DataFusion][df].
diff --git a/datafusion/expr/src/conditional_expressions.rs b/datafusion/expr/src/conditional_expressions.rs
index bafa2c724..0c5104a4b 100644
--- a/datafusion/expr/src/conditional_expressions.rs
+++ b/datafusion/expr/src/conditional_expressions.rs
@@ -15,7 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+///! Conditional expressions
+use crate::{expr_schema::ExprSchemable, Expr};
use arrow::datatypes::DataType;
+use datafusion_common::{DFSchema, DataFusionError, Result};
+use std::collections::HashSet;
/// Currently supported types by the coalesce function.
/// The order of these types correspond to the order on which coercion applies
@@ -35,3 +39,106 @@ pub static SUPPORTED_COALESCE_TYPES: &[DataType] = &[
DataType::Utf8,
DataType::LargeUtf8,
];
+
+/// Helper struct for building [Expr::Case]
+pub struct CaseBuilder {
+ expr: Option<Box<Expr>>,
+ when_expr: Vec<Expr>,
+ then_expr: Vec<Expr>,
+ else_expr: Option<Box<Expr>>,
+}
+
+impl CaseBuilder {
+ pub fn new(
+ expr: Option<Box<Expr>>,
+ when_expr: Vec<Expr>,
+ then_expr: Vec<Expr>,
+ else_expr: Option<Box<Expr>>,
+ ) -> Self {
+ Self {
+ expr,
+ when_expr,
+ then_expr,
+ else_expr,
+ }
+ }
+ pub fn when(&mut self, when: Expr, then: Expr) -> CaseBuilder {
+ self.when_expr.push(when);
+ self.then_expr.push(then);
+ CaseBuilder {
+ expr: self.expr.clone(),
+ when_expr: self.when_expr.clone(),
+ then_expr: self.then_expr.clone(),
+ else_expr: self.else_expr.clone(),
+ }
+ }
+ pub fn otherwise(&mut self, else_expr: Expr) -> Result<Expr> {
+ self.else_expr = Some(Box::new(else_expr));
+ self.build()
+ }
+
+ pub fn end(&self) -> Result<Expr> {
+ self.build()
+ }
+
+ fn build(&self) -> Result<Expr> {
+ // collect all "then" expressions
+ let mut then_expr = self.then_expr.clone();
+ if let Some(e) = &self.else_expr {
+ then_expr.push(e.as_ref().to_owned());
+ }
+
+ let then_types: Vec<DataType> = then_expr
+ .iter()
+ .map(|e| match e {
+ Expr::Literal(_) => e.get_type(&DFSchema::empty()),
+ _ => Ok(DataType::Null),
+ })
+ .collect::<Result<Vec<_>>>()?;
+
+ if then_types.contains(&DataType::Null) {
+ // cannot verify types until execution type
+ } else {
+ let unique_types: HashSet<&DataType> = then_types.iter().collect();
+ if unique_types.len() != 1 {
+ return Err(DataFusionError::Plan(format!(
+ "CASE expression 'then' values had multiple data types: {:?}",
+ unique_types
+ )));
+ }
+ }
+
+ Ok(Expr::Case {
+ expr: self.expr.clone(),
+ when_then_expr: self
+ .when_expr
+ .iter()
+ .zip(self.then_expr.iter())
+ .map(|(w, t)| (Box::new(w.clone()), Box::new(t.clone())))
+ .collect(),
+ else_expr: self.else_expr.clone(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::{col, lit, when};
+
+ #[test]
+ fn case_when_same_literal_then_types() -> Result<()> {
+ let _ = when(col("state").eq(lit("CO")), lit(303))
+ .when(col("state").eq(lit("NY")), lit(212))
+ .end()?;
+ Ok(())
+ }
+
+ #[test]
+ fn case_when_different_literal_then_types() {
+ let maybe_expr = when(col("state").eq(lit("CO")), lit(303))
+ .when(col("state").eq(lit("NY")), lit("212"))
+ .end();
+ assert!(maybe_expr.is_err());
+ }
+}
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index d9cd6a552..a723f5306 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -15,8 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-//! Expr fn module contains the functional definitions for expressions.
+//! Functions for creating logical expressions
+use crate::conditional_expressions::CaseBuilder;
use crate::{aggregate_function, built_in_function, lit, Expr, Operator};
/// Create a column expression based on a qualified or unqualified column name
@@ -306,6 +307,16 @@ pub fn coalesce(args: Vec<Expr>) -> Expr {
}
}
+/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
+pub fn case(expr: Expr) -> CaseBuilder {
+ CaseBuilder::new(Some(Box::new(expr)), vec![], vec![], None)
+}
+
+/// Create a CASE WHEN statement with boolean WHEN expressions and no base expression.
+pub fn when(when: Expr, then: Expr) -> CaseBuilder {
+ CaseBuilder::new(None, vec![when], vec![then], None)
+}
+
#[cfg(test)]
mod test {
use super::*;
diff --git a/datafusion/core/src/logical_plan/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
similarity index 85%
rename from datafusion/core/src/logical_plan/expr_schema.rs
rename to datafusion/expr/src/expr_schema.rs
index e0cef2979..a216281b3 100644
--- a/datafusion/core/src/logical_plan/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -16,12 +16,12 @@
// under the License.
use super::Expr;
-use crate::logical_expr::{aggregate_function, function, window_function};
+use crate::binary_rule::binary_operator_data_type;
+use crate::field_util::get_indexed_field;
+use crate::{aggregate_function, function, window_function};
use arrow::compute::can_cast_types;
use arrow::datatypes::DataType;
use datafusion_common::{DFField, DFSchema, DataFusionError, ExprSchema, Result};
-use datafusion_expr::binary_rule::binary_operator_data_type;
-use datafusion_expr::field_util::get_indexed_field;
/// trait to allow expr to typable with respect to a schema
pub trait ExprSchemable {
@@ -236,3 +236,64 @@ impl ExprSchemable for Expr {
}
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::{col, lit};
+ use arrow::datatypes::DataType;
+ use datafusion_common::Column;
+
+ #[test]
+ fn expr_schema_nullability() {
+ let expr = col("foo").eq(lit(1));
+ assert!(!expr.nullable(&MockExprSchema::new()).unwrap());
+ assert!(expr
+ .nullable(&MockExprSchema::new().with_nullable(true))
+ .unwrap());
+ }
+
+ #[test]
+ fn expr_schema_data_type() {
+ let expr = col("foo");
+ assert_eq!(
+ DataType::Utf8,
+ expr.get_type(&MockExprSchema::new().with_data_type(DataType::Utf8))
+ .unwrap()
+ );
+ }
+
+ struct MockExprSchema {
+ nullable: bool,
+ data_type: DataType,
+ }
+
+ impl MockExprSchema {
+ fn new() -> Self {
+ Self {
+ nullable: false,
+ data_type: DataType::Null,
+ }
+ }
+
+ fn with_nullable(mut self, nullable: bool) -> Self {
+ self.nullable = nullable;
+ self
+ }
+
+ fn with_data_type(mut self, data_type: DataType) -> Self {
+ self.data_type = data_type;
+ self
+ }
+ }
+
+ impl ExprSchema for MockExprSchema {
+ fn nullable(&self, _col: &Column) -> Result<bool> {
+ Ok(self.nullable)
+ }
+
+ fn data_type(&self, _col: &Column) -> Result<&DataType> {
+ Ok(&self.data_type)
+ }
+ }
+}
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 7586dbf60..b1e822077 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -24,6 +24,7 @@ mod columnar_value;
pub mod conditional_expressions;
pub mod expr;
pub mod expr_fn;
+pub mod expr_schema;
pub mod field_util;
pub mod function;
mod literal;
@@ -43,12 +44,24 @@ pub use aggregate_function::AggregateFunction;
pub use built_in_function::BuiltinScalarFunction;
pub use columnar_value::{ColumnarValue, NullColumnarValue};
pub use expr::Expr;
-pub use expr_fn::{col, sum};
+pub use expr_fn::{
+ abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
+ avg, bit_length, btrim, case, ceil, character_length, chr, coalesce, col, concat,
+ concat_expr, concat_ws, concat_ws_expr, cos, count, count_distinct, date_part,
+ date_trunc, digest, exp, floor, in_list, initcap, left, length, ln, log10, log2,
+ lower, lpad, ltrim, max, md5, min, now, now_expr, nullif, octet_length, or, random,
+ regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad, rtrim,
+ sha224, sha256, sha384, sha512, signum, sin, split_part, sqrt, starts_with, strpos,
+ substr, sum, tan, to_hex, to_timestamp_micros, to_timestamp_millis,
+ to_timestamp_seconds, translate, trim, trunc, upper, when,
+};
+pub use expr_schema::ExprSchemable;
pub use function::{
AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
StateTypeFunction,
};
pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
+pub use logical_plan::{LogicalPlan, PlanVisitor};
pub use nullif::SUPPORTED_NULLIF_TYPES;
pub use operator::Operator;
pub use signature::{Signature, TypeSignature, Volatility};
@@ -57,5 +70,3 @@ pub use udaf::AggregateUDF;
pub use udf::ScalarUDF;
pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits};
pub use window_function::{BuiltInWindowFunction, WindowFunction};
-
-pub use logical_plan::{LogicalPlan, PlanVisitor};