You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/08/03 20:54:46 UTC

[GitHub] [arrow-datafusion] alamb commented on a change in pull request #797: Better join order resolution logic

alamb commented on a change in pull request #797:
URL: https://github.com/apache/arrow-datafusion/pull/797#discussion_r682092227



##########
File path: datafusion/src/logical_plan/builder.rs
##########
@@ -287,16 +287,125 @@ impl LogicalPlanBuilder {
                 .into_iter()
                 .zip(join_keys.1.into_iter())
                 .map(|(l, r)| {
-                    let mut swap = false;
                     let l = l.into();
-                    let left_key = l.clone().normalize(&self.plan).or_else(|_| {
-                        swap = true;
-                        l.normalize(right)
-                    });
-                    if swap {
-                        (r.into().normalize(&self.plan), left_key)
-                    } else {
-                        (left_key, r.into().normalize(right))
+                    let r = r.into();
+                    let lr = l.relation.clone();
+                    let rr = r.relation.clone();
+
+                    match (lr, rr) {
+                        (Some(lr), Some(rr)) => {
+                            let l_is_left =
+                                self.plan.all_schemas().iter().any(|schema| {
+                                    schema.fields().iter().any(|field| {
+                                        field.qualifier().unwrap() == &lr
+                                            && field.name() == &l.name
+                                    })
+                                });
+                            let l_is_right = right.all_schemas().iter().any(|schema| {
+                                schema.fields().iter().any(|field| {
+                                    field.qualifier().unwrap() == &lr
+                                        && field.name() == &l.name
+                                })
+                            });
+                            let r_is_left =
+                                self.plan.all_schemas().iter().any(|schema| {
+                                    schema.fields().iter().any(|field| {
+                                        field.qualifier().unwrap() == &rr
+                                            && field.name() == &r.name
+                                    })
+                                });
+                            let r_is_right = right.all_schemas().iter().any(|schema| {
+                                schema.fields().iter().any(|field| {
+                                    field.qualifier().unwrap() == &rr
+                                        && field.name() == &r.name
+                                })
+                            });
+                            match (l_is_left, l_is_right, r_is_left, r_is_right) {
+                                (true, _, _, true) => (Ok(l), Ok(r)),
+                                (_, true, true, _) => (Ok(r), Ok(l)),
+                                (_, _, _, _) => (
+                                    Err(DataFusionError::Plan(format!(

Review comment:
       couldn't this error also happen if the column was found in both the left and right input?

##########
File path: datafusion/tests/sql.rs
##########
@@ -1730,6 +1730,17 @@ async fn equijoin() -> Result<()> {
         let actual = execute(&mut ctx, sql).await;
         assert_eq!(expected, actual);
     }
+
+    let mut ctx = create_join_context_qualified()?;
+    let equivalent_sql = [
+        "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a",
+        "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a",

Review comment:
       What about something like
   
   ```rust
           "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON a = t1.a ORDER BY t1.a",
   ```
   
   ? I would expect it to given an error about ambiguous relation name

##########
File path: datafusion/src/logical_plan/builder.rs
##########
@@ -287,16 +287,125 @@ impl LogicalPlanBuilder {
                 .into_iter()
                 .zip(join_keys.1.into_iter())
                 .map(|(l, r)| {
-                    let mut swap = false;
                     let l = l.into();
-                    let left_key = l.clone().normalize(&self.plan).or_else(|_| {
-                        swap = true;
-                        l.normalize(right)
-                    });
-                    if swap {
-                        (r.into().normalize(&self.plan), left_key)
-                    } else {
-                        (left_key, r.into().normalize(right))
+                    let r = r.into();
+                    let lr = l.relation.clone();
+                    let rr = r.relation.clone();
+
+                    match (lr, rr) {
+                        (Some(lr), Some(rr)) => {
+                            let l_is_left =
+                                self.plan.all_schemas().iter().any(|schema| {
+                                    schema.fields().iter().any(|field| {
+                                        field.qualifier().unwrap() == &lr

Review comment:
       How are we ensuring that `field.qualifier()` is not `None` (aka how do we know that `unwrap()` won't panic?
   
   Maybe this would be safer if expressed something like
   
   ```rust
   field.qualifier().map(|q| q == &lr).unwrap_or(false)
   ```
   
   ?

##########
File path: datafusion/src/logical_plan/builder.rs
##########
@@ -287,16 +287,125 @@ impl LogicalPlanBuilder {
                 .into_iter()
                 .zip(join_keys.1.into_iter())
                 .map(|(l, r)| {
-                    let mut swap = false;
                     let l = l.into();
-                    let left_key = l.clone().normalize(&self.plan).or_else(|_| {
-                        swap = true;
-                        l.normalize(right)
-                    });
-                    if swap {
-                        (r.into().normalize(&self.plan), left_key)
-                    } else {
-                        (left_key, r.into().normalize(right))
+                    let r = r.into();
+                    let lr = l.relation.clone();
+                    let rr = r.relation.clone();
+
+                    match (lr, rr) {
+                        (Some(lr), Some(rr)) => {
+                            let l_is_left =
+                                self.plan.all_schemas().iter().any(|schema| {
+                                    schema.fields().iter().any(|field| {

Review comment:
       It would be cool if we could avoid the repetition here




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org