You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/05/26 17:20:42 UTC
[arrow-datafusion] branch main updated: feat: eliminate useless join | convert inner to outer when condition is true (#6443)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a9f0c7afe2 feat: eliminate useless join | convert inner to outer when condition is true (#6443)
a9f0c7afe2 is described below
commit a9f0c7afe2ea3bb14bfcceedfcd33cfe4b954035
Author: jakevin <ja...@gmail.com>
AuthorDate: Sat May 27 01:20:37 2023 +0800
feat: eliminate useless join | convert inner to outer when condition is true (#6443)
* minor
* feat: eliminate useless join | convert inner to outer when condition is true
---
datafusion/core/tests/simplification.rs | 2 +-
.../tests/sqllogictests/test_files/explain.slt | 2 +
.../core/tests/sqllogictests/test_files/join.slt | 21 ++++
datafusion/expr/src/field_util.rs | 7 +-
datafusion/expr/src/logical_plan/display.rs | 6 +-
datafusion/expr/src/type_coercion/binary.rs | 9 +-
datafusion/optimizer/src/eliminate_join.rs | 121 +++++++++++++++++++++
datafusion/optimizer/src/lib.rs | 1 +
datafusion/optimizer/src/optimizer.rs | 2 +
9 files changed, 152 insertions(+), 19 deletions(-)
diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs
index 434a0ddbf1..b6d856b2d9 100644
--- a/datafusion/core/tests/simplification.rs
+++ b/datafusion/core/tests/simplification.rs
@@ -27,7 +27,7 @@ use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyInfo};
/// about the expressions.
///
/// You can provide that information using DataFusion [DFSchema]
-/// objects or from some other implemention
+/// objects or from some other implementation
struct MyInfo {
/// The input schema
schema: DFSchema,
diff --git a/datafusion/core/tests/sqllogictests/test_files/explain.slt b/datafusion/core/tests/sqllogictests/test_files/explain.slt
index 7656dcea63..75002fecb1 100644
--- a/datafusion/core/tests/sqllogictests/test_files/explain.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/explain.slt
@@ -147,6 +147,7 @@ analyzed_logical_plan SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
+logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
@@ -175,6 +176,7 @@ logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
+logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
diff --git a/datafusion/core/tests/sqllogictests/test_files/join.slt b/datafusion/core/tests/sqllogictests/test_files/join.slt
index 2f49c5b97c..283ff57a98 100644
--- a/datafusion/core/tests/sqllogictests/test_files/join.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/join.slt
@@ -551,6 +551,27 @@ with t1 as (select 1 as col1, 'asd' as col2),
t2 as (select 1 as col3, 'sdf' as col4)
select col2, col4 from t1 full outer join t2 on col1 = col3
+# test eliminate join when condition is false
+query TT
+explain select * from t1 join t2 on false;
+----
+logical_plan EmptyRelation
+physical_plan EmptyExec: produce_one_row=false
+
+# test covert inner join to cross join when condition is true
+query TT
+explain select * from t1 inner join t2 on true;
+----
+logical_plan
+CrossJoin:
+--TableScan: t1 projection=[t1_id, t1_name, t1_int]
+--TableScan: t2 projection=[t2_id, t2_name, t2_int]
+physical_plan
+CrossJoinExec
+--CoalescePartitionsExec
+----MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+
statement ok
drop table IF EXISTS t1;
diff --git a/datafusion/expr/src/field_util.rs b/datafusion/expr/src/field_util.rs
index 8f5d6a6e34..feb96928c1 100644
--- a/datafusion/expr/src/field_util.rs
+++ b/datafusion/expr/src/field_util.rs
@@ -37,12 +37,7 @@ pub fn get_indexed_field(data_type: &DataType, key: &ScalarValue) -> Result<Fiel
))
} else {
let field = fields.iter().find(|f| f.name() == s);
- match field {
- None => Err(DataFusionError::Plan(format!(
- "Field {s} not found in struct"
- ))),
- Some(f) => Ok(f.as_ref().clone()),
- }
+ field.ok_or(DataFusionError::Plan(format!("Field {s} not found in struct"))).map(|f| f.as_ref().clone())
}
}
(DataType::Struct(_), _) => Err(DataFusionError::Plan(
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index 68f5674780..c82689b2cc 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -242,10 +242,8 @@ impl<'a, 'b> TreeNodeVisitor for GraphvizVisitor<'a, 'b> {
// always be non-empty as pre_visit always pushes
// So it should always be Ok(true)
let res = self.parent_ids.pop();
- match res {
- Some(_) => Ok(VisitRecursion::Continue),
- None => Err(DataFusionError::Internal("Fail to format".to_string())),
- }
+ res.ok_or(DataFusionError::Internal("Fail to format".to_string()))
+ .map(|_| VisitRecursion::Continue)
}
}
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 962434d652..86c5c15f14 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -203,14 +203,7 @@ pub fn coerce_types(
};
// re-write the error message of failed coercions to include the operator's information
- match result {
- None => Err(DataFusionError::Plan(
- format!(
- "{lhs_type:?} {op} {rhs_type:?} can't be evaluated because there isn't a common type to coerce the types to"
- ),
- )),
- Some(t) => Ok(t)
- }
+ result.ok_or(DataFusionError::Plan(format!("{lhs_type:?} {op} {rhs_type:?} can't be evaluated because there isn't a common type to coerce the types to")))
}
/// Coercion rules for mathematics operators between decimal and non-decimal types.
diff --git a/datafusion/optimizer/src/eliminate_join.rs b/datafusion/optimizer/src/eliminate_join.rs
new file mode 100644
index 0000000000..00abcdcc68
--- /dev/null
+++ b/datafusion/optimizer/src/eliminate_join.rs
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::optimizer::ApplyOrder;
+use crate::{OptimizerConfig, OptimizerRule};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::JoinType::Inner;
+use datafusion_expr::{
+ logical_plan::{EmptyRelation, LogicalPlan},
+ CrossJoin, Expr,
+};
+
+/// Eliminates joins when inner join condition is false.
+/// Replaces joins when inner join condition is true with a cross join.
+#[derive(Default)]
+pub struct EliminateJoin;
+
+impl EliminateJoin {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl OptimizerRule for EliminateJoin {
+ fn try_optimize(
+ &self,
+ plan: &LogicalPlan,
+ _config: &dyn OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ match plan {
+ LogicalPlan::Join(join) if join.join_type == Inner && join.on.is_empty() => {
+ match join.filter {
+ Some(Expr::Literal(ScalarValue::Boolean(Some(true)))) => {
+ Ok(Some(LogicalPlan::CrossJoin(CrossJoin {
+ left: join.left.clone(),
+ right: join.right.clone(),
+ schema: join.schema.clone(),
+ })))
+ }
+ Some(Expr::Literal(ScalarValue::Boolean(Some(false)))) => {
+ Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation {
+ produce_one_row: false,
+ schema: join.schema.clone(),
+ })))
+ }
+ _ => Ok(None),
+ }
+ }
+ _ => Ok(None),
+ }
+ }
+
+ fn name(&self) -> &str {
+ "eliminate_join"
+ }
+
+ fn apply_order(&self) -> Option<ApplyOrder> {
+ Some(ApplyOrder::TopDown)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::eliminate_join::EliminateJoin;
+ use crate::test::*;
+ use datafusion_common::{Column, Result, ScalarValue};
+ use datafusion_expr::JoinType::Inner;
+ use datafusion_expr::{logical_plan::builder::LogicalPlanBuilder, Expr, LogicalPlan};
+ use std::sync::Arc;
+
+ fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> {
+ assert_optimized_plan_eq(Arc::new(EliminateJoin::new()), plan, expected)
+ }
+
+ #[test]
+ fn join_on_false() -> Result<()> {
+ let plan = LogicalPlanBuilder::empty(false)
+ .join(
+ LogicalPlanBuilder::empty(false).build()?,
+ Inner,
+ (Vec::<Column>::new(), Vec::<Column>::new()),
+ Some(Expr::Literal(ScalarValue::Boolean(Some(false)))),
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_optimized_plan_equal(&plan, expected)
+ }
+
+ #[test]
+ fn join_on_true() -> Result<()> {
+ let plan = LogicalPlanBuilder::empty(false)
+ .join(
+ LogicalPlanBuilder::empty(false).build()?,
+ Inner,
+ (Vec::<Column>::new(), Vec::<Column>::new()),
+ Some(Expr::Literal(ScalarValue::Boolean(Some(true)))),
+ )?
+ .build()?;
+
+ let expected = "\
+ CrossJoin:\
+ \n EmptyRelation\
+ \n EmptyRelation";
+ assert_optimized_plan_equal(&plan, expected)
+ }
+}
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 2af5edbd9f..b8217e0ac7 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -22,6 +22,7 @@ pub mod decorrelate_predicate_subquery;
pub mod eliminate_cross_join;
pub mod eliminate_duplicated_expr;
pub mod eliminate_filter;
+pub mod eliminate_join;
pub mod eliminate_limit;
pub mod eliminate_outer_join;
pub mod eliminate_project;
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 284280a3a5..f2e6c340d7 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -22,6 +22,7 @@ use crate::decorrelate_predicate_subquery::DecorrelatePredicateSubquery;
use crate::eliminate_cross_join::EliminateCrossJoin;
use crate::eliminate_duplicated_expr::EliminateDuplicatedExpr;
use crate::eliminate_filter::EliminateFilter;
+use crate::eliminate_join::EliminateJoin;
use crate::eliminate_limit::EliminateLimit;
use crate::eliminate_outer_join::EliminateOuterJoin;
use crate::eliminate_project::EliminateProjection;
@@ -210,6 +211,7 @@ impl Optimizer {
Arc::new(SimplifyExpressions::new()),
Arc::new(UnwrapCastInComparison::new()),
Arc::new(ReplaceDistinctWithAggregate::new()),
+ Arc::new(EliminateJoin::new()),
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),