You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/11/12 09:56:45 UTC

[GitHub] [arrow-datafusion] jackwener commented on a diff in pull request #4185: Reimplement `Eliminate cross join`

jackwener commented on code in PR #4185:
URL: https://github.com/apache/arrow-datafusion/pull/4185#discussion_r1020741009


##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -44,143 +44,243 @@ impl ReduceCrossJoin {
     }
 }
 
+/// Attempt to reorder join tp reduce cross joins to inner joins.
+/// for queries:
+/// 'select ... from a, b where a.x = b.y and b.xx = 100;'
+/// 'select ... from a, b where (a.x = b.y and b.xx = 100) or (a.x = b.y and b.xx = 200);'
+/// 'select ... from a, b, c where (a.x = b.y and b.xx = 100 and a.z = c.z)
+/// or (a.x = b.y and b.xx = 200 and a.z=c.z);'
+/// For above queries, the join predicate is available in filters and they are moved to
+/// join nodes appropriately
+/// This fix helps to improve the performance of TPCH Q19. issue#78
+///
 impl OptimizerRule for ReduceCrossJoin {
     fn optimize(
         &self,
         plan: &LogicalPlan,
         _optimizer_config: &mut OptimizerConfig,
     ) -> Result<LogicalPlan> {
-        let mut possible_join_keys: Vec<(Column, Column)> = vec![];
-        let mut all_join_keys = HashSet::new();
+        match plan {
+            LogicalPlan::Filter(filter) => {
+                let mut input = (**filter.input()).clone();
+
+                // optimize children.
+                input = self.optimize(&input, _optimizer_config)?;
+
+                let mut possible_join_keys: Vec<(Column, Column)> = vec![];
+                let mut all_inputs: Vec<LogicalPlan> = vec![];
+                match &input {
+                    LogicalPlan::Join(join) => {
+                        if join.join_type != JoinType::Inner {
+                            return utils::optimize_children(
+                                self,
+                                plan,
+                                _optimizer_config,
+                            );
+                        }
+                        collect_all_inputs_from_inner(
+                            join,
+                            &mut possible_join_keys,
+                            &mut all_inputs,
+                        );
+                    }
+                    LogicalPlan::CrossJoin(join) => {
+                        collect_all_inputs_from_cross(
+                            join,
+                            &mut possible_join_keys,
+                            &mut all_inputs,
+                        );
+                    }
+                    _ => {
+                        let new_exprs = plan.expressions();
+                        let new_inputs = [input];
+                        return from_plan(plan, &new_exprs, &new_inputs);
+                    }
+                }
+
+                let predicate = filter.predicate();
+                // join keys are handled locally
+                let mut all_join_keys: HashSet<(Column, Column)> = HashSet::new();
 
-        reduce_cross_join(self, plan, &mut possible_join_keys, &mut all_join_keys)
+                extract_possible_join_keys(predicate, &mut possible_join_keys);
+
+                let mut left = all_inputs.remove(0);
+                while all_inputs.len() > 0 {
+                    left = find_inner_join(
+                        &left,
+                        &mut all_inputs,
+                        &mut possible_join_keys,
+                        &mut all_join_keys,
+                    )?;
+                }
+
+                // TODO: it's need to be discussed for Schema.
+                // if plan.schema() != left.schema() {
+                //     left = LogicalPlan::Projection(Projection::new_from_schema(
+                //         Arc::new(plan.clone()),
+                //         plan.schema().clone(),
+                //     ));
+                // }

Review Comment:
   It's need to be discussed about `Schema`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org