You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/05/24 18:58:32 UTC

[arrow-datafusion] branch main updated: Fix UNION ALL aliasing with more complex queries (#6417)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a50fb46328 Fix UNION ALL aliasing with more complex queries (#6417)
a50fb46328 is described below

commit a50fb46328d83bef560912d05ac19fe73159a2b0
Author: comphead <co...@users.noreply.github.com>
AuthorDate: Wed May 24 11:58:27 2023 -0700

    Fix UNION ALL aliasing with more complex queries (#6417)
    
    * Fix UNION ALL aliasing with more complex queries
    
    * remove unalias for column
    
    * fmt
---
 .../core/tests/sqllogictests/test_files/union.slt  | 35 +++++++++++++++++++++-
 datafusion/expr/src/logical_plan/builder.rs        | 16 +++++-----
 datafusion/sql/tests/integration_test.rs           |  4 +--
 3 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/union.slt b/datafusion/core/tests/sqllogictests/test_files/union.slt
index a031d7ef9e..5779d5153e 100644
--- a/datafusion/core/tests/sqllogictests/test_files/union.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/union.slt
@@ -447,7 +447,7 @@ drop table t1
 statement ok
 drop table t2
 
-# test UNION ALL aliases correctly with all aliased
+# test UNION ALL aliases correctly with aliased columns
 query TT
 explain select 1 a group by a union all select 2 b union all select 3 c
 ----
@@ -473,3 +473,36 @@ UnionExec
 ----EmptyExec: produce_one_row=true
 --ProjectionExec: expr=[3 as a]
 ----EmptyExec: produce_one_row=true
+
+# test UNION ALL aliases correctly with aliased subquery
+query TT
+explain select count(*) count, n from (select 5 as n) a group by n
+union all
+select x, y from (select 1 as x , max(10) as y) b
+----
+logical_plan
+Union
+--Projection: COUNT(UInt8(1)) AS count, a.n
+----Aggregate: groupBy=[[a.n]], aggr=[[COUNT(UInt8(1))]]
+------SubqueryAlias: a
+--------Projection: Int64(5) AS n
+----------EmptyRelation
+--Projection: b.x AS count, b.y AS n
+----SubqueryAlias: b
+------Projection: Int64(1) AS x, MAX(Int64(10)) AS y
+--------Aggregate: groupBy=[[]], aggr=[[MAX(Int64(10))]]
+----------EmptyRelation
+physical_plan
+UnionExec
+--ProjectionExec: expr=[COUNT(UInt8(1))@1 as count, n@0 as n]
+----AggregateExec: mode=FinalPartitioned, gby=[n@0 as n], aggr=[COUNT(UInt8(1))]
+------CoalesceBatchesExec: target_batch_size=8192
+--------RepartitionExec: partitioning=Hash([Column { name: "n", index: 0 }], 4), input_partitions=4
+----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------AggregateExec: mode=Partial, gby=[n@0 as n], aggr=[COUNT(UInt8(1))]
+--------------ProjectionExec: expr=[5 as n]
+----------------EmptyExec: produce_one_row=true
+--ProjectionExec: expr=[x@0 as count, y@1 as n]
+----ProjectionExec: expr=[1 as x, MAX(Int64(10))@0 as y]
+------AggregateExec: mode=Single, gby=[], aggr=[MAX(Int64(10))]
+--------EmptyExec: produce_one_row=true
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index b3076778be..741ed3a24b 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1133,16 +1133,18 @@ pub fn project_with_column_index(
         .into_iter()
         .enumerate()
         .map(|(i, e)| match e {
-            alias @ Expr::Alias { .. }
-                if &alias.display_name().unwrap() != schema.field(i).name() =>
-            {
-                alias.unalias().alias(schema.field(i).name())
+            Expr::Alias(_, ref name) if name != schema.field(i).name() => {
+                e.unalias().alias(schema.field(i).name())
             }
-            ignore_alias @ Expr::Alias { .. } => ignore_alias,
-            ignore_col @ Expr::Column { .. } => ignore_col,
-            expr => expr.alias(schema.field(i).name()),
+            Expr::Column(Column {
+                relation: _,
+                ref name,
+            }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
+            Expr::Alias { .. } | Expr::Column { .. } => e,
+            _ => e.alias(schema.field(i).name()),
         })
         .collect::<Vec<_>>();
+
     Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
         alias_expr, input, schema,
     )?))
diff --git a/datafusion/sql/tests/integration_test.rs b/datafusion/sql/tests/integration_test.rs
index e1acfcd7c5..452761454a 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -1964,7 +1964,7 @@ fn union_with_different_column_names() {
     let expected = "Union\
             \n  Projection: orders.order_id\
             \n    TableScan: orders\
-            \n  Projection: orders.customer_id\
+            \n  Projection: orders.customer_id AS order_id\
             \n    TableScan: orders";
     quick_test(sql, expected);
 }
@@ -2064,7 +2064,7 @@ fn union_with_binary_expr_and_cast() {
         \n      SubqueryAlias: x\
         \n        Projection: Int64(1) AS a\
         \n          EmptyRelation\
-        \n  Projection: Float64(2.1) + x.a\
+        \n  Projection: Float64(2.1) + x.a AS Float64(0) + x.a\
         \n    Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\
         \n      SubqueryAlias: x\
         \n        Projection: Int64(1) AS a\