You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ho...@apache.org on 2021/08/31 20:15:32 UTC
[arrow-datafusion] branch master updated: Change compound column
field name rules (#952)
This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 7932cb9 Change compound column field name rules (#952)
7932cb9 is described below
commit 7932cb9373192ce2754b39c1f82f22c8a56b7266
Author: Ruihang Xia <wa...@gmail.com>
AuthorDate: Wed Sep 1 04:15:25 2021 +0800
Change compound column field name rules (#952)
* change physical name semantic
* replace expect output in context.rs
* replace expect output in sql & dataframe_impl
* add spec entry
* replace expect output in lib doc & planner
---
datafusion/src/execution/context.rs | 418 +++++++++++------------
datafusion/src/execution/dataframe_impl.rs | 18 +-
datafusion/src/lib.rs | 20 +-
datafusion/src/physical_plan/planner.rs | 75 ++--
datafusion/tests/sql.rs | 252 +++++++-------
docs/specification/output-field-name-semantic.md | 24 +-
6 files changed, 412 insertions(+), 395 deletions(-)
diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs
index 2e6a7a4..da6de04 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -1493,15 +1493,15 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+----+----+---------+-----------+---------+---------+---------+",
- "| c1 | c2 | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |",
- "+----+----+---------+-----------+---------+---------+---------+",
- "| 0 | 1 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 2 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 3 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 4 | 220 | 40 | 10 | 1 | 5.5 |",
- "| 0 | 5 | 220 | 40 | 10 | 1 | 5.5 |",
- "+----+----+---------+-----------+---------+---------+---------+",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
+ "| c1 | c2 | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
+ "| 0 | 1 | 220 | 40 | 10 | 1 | 5.5 |",
+ "| 0 | 2 | 220 | 40 | 10 | 1 | 5.5 |",
+ "| 0 | 3 | 220 | 40 | 10 | 1 | 5.5 |",
+ "| 0 | 4 | 220 | 40 | 10 | 1 | 5.5 |",
+ "| 0 | 5 | 220 | 40 | 10 | 1 | 5.5 |",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
];
// window function shall respect ordering
@@ -1536,15 +1536,15 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+",
- "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(c2) | LAST_VALUE(c2) | NTH_VALUE(c2,Int64(2)) | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |",
- "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+",
- "| 0 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 |",
- "| 0 | 2 | 2 | 1 | 2 | 2 | 3 | 2 | 2 | 1 | 1.5 |",
- "| 0 | 3 | 3 | 1 | 3 | 2 | 6 | 3 | 3 | 1 | 2 |",
- "| 0 | 4 | 4 | 1 | 4 | 2 | 10 | 4 | 4 | 1 | 2.5 |",
- "| 0 | 5 | 5 | 1 | 5 | 2 | 15 | 5 | 5 | 1 | 3 |",
- "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+",
+ "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+",
+ "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(test.c2) | LAST_VALUE(test.c2) | NTH_VALUE(test.c2,Int64(2)) | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |",
+ "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+",
+ "| 0 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 |",
+ "| 0 | 2 | 2 | 1 | 2 | 2 | 3 | 2 | 2 | 1 | 1.5 |",
+ "| 0 | 3 | 3 | 1 | 3 | 2 | 6 | 3 | 3 | 1 | 2 |",
+ "| 0 | 4 | 4 | 1 | 4 | 2 | 10 | 4 | 4 | 1 | 2.5 |",
+ "| 0 | 5 | 5 | 1 | 5 | 2 | 15 | 5 | 5 | 1 | 3 |",
+ "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+",
];
// window function shall respect ordering
@@ -1571,15 +1571,15 @@ mod tests {
.await?;
let expected = vec![
- "+----+----+---------+-----------+---------+---------+---------+",
- "| c1 | c2 | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |",
- "+----+----+---------+-----------+---------+---------+---------+",
- "| 0 | 1 | 4 | 4 | 1 | 1 | 1 |",
- "| 0 | 2 | 8 | 4 | 2 | 2 | 2 |",
- "| 0 | 3 | 12 | 4 | 3 | 3 | 3 |",
- "| 0 | 4 | 16 | 4 | 4 | 4 | 4 |",
- "| 0 | 5 | 20 | 4 | 5 | 5 | 5 |",
- "+----+----+---------+-----------+---------+---------+---------+",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
+ "| c1 | c2 | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
+ "| 0 | 1 | 4 | 4 | 1 | 1 | 1 |",
+ "| 0 | 2 | 8 | 4 | 2 | 2 | 2 |",
+ "| 0 | 3 | 12 | 4 | 3 | 3 | 3 |",
+ "| 0 | 4 | 16 | 4 | 4 | 4 | 4 |",
+ "| 0 | 5 | 20 | 4 | 5 | 5 | 5 |",
+ "+----+----+--------------+----------------+--------------+--------------+--------------+",
];
// window function shall respect ordering
@@ -1610,15 +1610,15 @@ mod tests {
.await?;
let expected = vec![
- "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+",
- "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(c2 Plus c1) | LAST_VALUE(c2 Plus c1) | NTH_VALUE(c2 Plus c1,Int64(1)) | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |",
- "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+",
- "| 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
- "| 0 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 |",
- "| 0 | 3 | 1 | 3 | 3 | 3 | 3 | 1 | 3 | 3 | 3 |",
- "| 0 | 4 | 1 | 4 | 4 | 4 | 4 | 1 | 4 | 4 | 4 |",
- "| 0 | 5 | 1 | 5 | 5 | 5 | 5 | 1 | 5 | 5 | 5 |",
- "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+",
+ "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+",
+ "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(test.c2 Plus test.c1) | LAST_VALUE(test.c2 Plus test.c1) | NTH_VALUE(test.c2 Plus test.c1,Int64(1)) | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |",
+ "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+",
+ "| 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
+ "| 0 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 |",
+ "| 0 | 3 | 1 | 3 | 3 | 3 | 3 | 1 | 3 | 3 | 3 |",
+ "| 0 | 4 | 1 | 4 | 4 | 4 | 4 | 1 | 4 | 4 | 4 |",
+ "| 0 | 5 | 1 | 5 | 5 | 5 | 5 | 1 | 5 | 5 | 5 |",
+ "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+",
];
// window function shall respect ordering
@@ -1632,11 +1632,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+",
- "| SUM(c1) | SUM(c2) |",
- "+---------+---------+",
- "| 60 | 220 |",
- "+---------+---------+",
+ "+--------------+--------------+",
+ "| SUM(test.c1) | SUM(test.c2) |",
+ "+--------------+--------------+",
+ "| 60 | 220 |",
+ "+--------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1653,11 +1653,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+",
- "| SUM(c1) | SUM(c2) |",
- "+---------+---------+",
- "| | |",
- "+---------+---------+",
+ "+--------------+--------------+",
+ "| SUM(test.c1) | SUM(test.c2) |",
+ "+--------------+--------------+",
+ "| | |",
+ "+--------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1670,11 +1670,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+",
- "| AVG(c1) | AVG(c2) |",
- "+---------+---------+",
- "| 1.5 | 5.5 |",
- "+---------+---------+",
+ "+--------------+--------------+",
+ "| AVG(test.c1) | AVG(test.c2) |",
+ "+--------------+--------------+",
+ "| 1.5 | 5.5 |",
+ "+--------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1687,11 +1687,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+",
- "| MAX(c1) | MAX(c2) |",
- "+---------+---------+",
- "| 3 | 10 |",
- "+---------+---------+",
+ "+--------------+--------------+",
+ "| MAX(test.c1) | MAX(test.c2) |",
+ "+--------------+--------------+",
+ "| 3 | 10 |",
+ "+--------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1704,11 +1704,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+",
- "| MIN(c1) | MIN(c2) |",
- "+---------+---------+",
- "| 0 | 1 |",
- "+---------+---------+",
+ "+--------------+--------------+",
+ "| MIN(test.c1) | MIN(test.c2) |",
+ "+--------------+--------------+",
+ "| 0 | 1 |",
+ "+--------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1720,14 +1720,14 @@ mod tests {
let results = execute("SELECT c1, SUM(c2) FROM test GROUP BY c1", 4).await?;
let expected = vec![
- "+----+---------+",
- "| c1 | SUM(c2) |",
- "+----+---------+",
- "| 0 | 55 |",
- "| 1 | 55 |",
- "| 2 | 55 |",
- "| 3 | 55 |",
- "+----+---------+",
+ "+----+--------------+",
+ "| c1 | SUM(test.c2) |",
+ "+----+--------------+",
+ "| 0 | 55 |",
+ "| 1 | 55 |",
+ "| 2 | 55 |",
+ "| 3 | 55 |",
+ "+----+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1739,14 +1739,14 @@ mod tests {
let results = execute("SELECT c1, AVG(c2) FROM test GROUP BY c1", 4).await?;
let expected = vec![
- "+----+---------+",
- "| c1 | AVG(c2) |",
- "+----+---------+",
- "| 0 | 5.5 |",
- "| 1 | 5.5 |",
- "| 2 | 5.5 |",
- "| 3 | 5.5 |",
- "+----+---------+",
+ "+----+--------------+",
+ "| c1 | AVG(test.c2) |",
+ "+----+--------------+",
+ "| 0 | 5.5 |",
+ "| 1 | 5.5 |",
+ "| 2 | 5.5 |",
+ "| 3 | 5.5 |",
+ "+----+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1790,14 +1790,14 @@ mod tests {
let results = execute("SELECT c1, MAX(c2) FROM test GROUP BY c1", 4).await?;
let expected = vec![
- "+----+---------+",
- "| c1 | MAX(c2) |",
- "+----+---------+",
- "| 0 | 10 |",
- "| 1 | 10 |",
- "| 2 | 10 |",
- "| 3 | 10 |",
- "+----+---------+",
+ "+----+--------------+",
+ "| c1 | MAX(test.c2) |",
+ "+----+--------------+",
+ "| 0 | 10 |",
+ "| 1 | 10 |",
+ "| 2 | 10 |",
+ "| 3 | 10 |",
+ "+----+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1809,14 +1809,14 @@ mod tests {
let results = execute("SELECT c1, MIN(c2) FROM test GROUP BY c1", 4).await?;
let expected = vec![
- "+----+---------+",
- "| c1 | MIN(c2) |",
- "+----+---------+",
- "| 0 | 1 |",
- "| 1 | 1 |",
- "| 2 | 1 |",
- "| 3 | 1 |",
- "+----+---------+",
+ "+----+--------------+",
+ "| c1 | MIN(test.c2) |",
+ "+----+--------------+",
+ "| 0 | 1 |",
+ "| 1 | 1 |",
+ "| 2 | 1 |",
+ "| 3 | 1 |",
+ "+----+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1857,11 +1857,11 @@ mod tests {
.unwrap();
let expected = vec![
- "+--------------+---------------+---------------+-------------+",
- "| COUNT(nanos) | COUNT(micros) | COUNT(millis) | COUNT(secs) |",
- "+--------------+---------------+---------------+-------------+",
- "| 3 | 3 | 3 | 3 |",
- "+--------------+---------------+---------------+-------------+",
+ "+----------------+-----------------+-----------------+---------------+",
+ "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) |",
+ "+----------------+-----------------+-----------------+---------------+",
+ "| 3 | 3 | 3 | 3 |",
+ "+----------------+-----------------+-----------------+---------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -1884,7 +1884,7 @@ mod tests {
let expected = vec![
"+----------------------------+----------------------------+-------------------------+---------------------+",
- "| MIN(nanos) | MIN(micros) | MIN(millis) | MIN(secs) |",
+ "| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |",
"+----------------------------+----------------------------+-------------------------+---------------------+",
"| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 |",
"+----------------------------+----------------------------+-------------------------+---------------------+",
@@ -1910,7 +1910,7 @@ mod tests {
let expected = vec![
"+-------------------------+-------------------------+-------------------------+---------------------+",
- "| MAX(nanos) | MAX(micros) | MAX(millis) | MAX(secs) |",
+ "| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |",
"+-------------------------+-------------------------+-------------------------+---------------------+",
"| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 |",
"+-------------------------+-------------------------+-------------------------+---------------------+",
@@ -1961,11 +1961,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+-----------+-----------+",
- "| COUNT(c1) | COUNT(c2) |",
- "+-----------+-----------+",
- "| 10 | 10 |",
- "+-----------+-----------+",
+ "+----------------+----------------+",
+ "| COUNT(test.c1) | COUNT(test.c2) |",
+ "+----------------+----------------+",
+ "| 10 | 10 |",
+ "+----------------+----------------+",
];
assert_batches_sorted_eq!(expected, &results);
Ok(())
@@ -1977,11 +1977,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+-----------+-----------+",
- "| COUNT(c1) | COUNT(c2) |",
- "+-----------+-----------+",
- "| 40 | 40 |",
- "+-----------+-----------+",
+ "+----------------+----------------+",
+ "| COUNT(test.c1) | COUNT(test.c2) |",
+ "+----------------+----------------+",
+ "| 40 | 40 |",
+ "+----------------+----------------+",
];
assert_batches_sorted_eq!(expected, &results);
Ok(())
@@ -1992,14 +1992,14 @@ mod tests {
let results = execute("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?;
let expected = vec![
- "+----+-----------+",
- "| c1 | COUNT(c2) |",
- "+----+-----------+",
- "| 0 | 10 |",
- "| 1 | 10 |",
- "| 2 | 10 |",
- "| 3 | 10 |",
- "+----+-----------+",
+ "+----+----------------+",
+ "| c1 | COUNT(test.c2) |",
+ "+----+----------------+",
+ "| 0 | 10 |",
+ "| 1 | 10 |",
+ "| 2 | 10 |",
+ "| 3 | 10 |",
+ "+----+----------------+",
];
assert_batches_sorted_eq!(expected, &results);
Ok(())
@@ -2043,12 +2043,12 @@ mod tests {
).await?;
let expected = vec![
- "+---------------------+---------+",
- "| week | SUM(c2) |",
- "+---------------------+---------+",
- "| 2020-12-07 00:00:00 | 24 |",
- "| 2020-12-14 00:00:00 | 156 |",
- "+---------------------+---------+",
+ "+---------------------+--------------+",
+ "| week | SUM(test.c2) |",
+ "+---------------------+--------------+",
+ "| 2020-12-07 00:00:00 | 24 |",
+ "| 2020-12-14 00:00:00 | 156 |",
+ "+---------------------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -2094,13 +2094,13 @@ mod tests {
.expect("ran plan correctly");
let expected = vec![
- "+-----+------------+",
- "| str | COUNT(val) |",
- "+-----+------------+",
- "| A | 4 |",
- "| B | 1 |",
- "| C | 1 |",
- "+-----+------------+",
+ "+-----+--------------+",
+ "| str | COUNT(t.val) |",
+ "+-----+--------------+",
+ "| A | 4 |",
+ "| B | 1 |",
+ "| C | 1 |",
+ "+-----+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
}
@@ -2145,13 +2145,13 @@ mod tests {
.expect("ran plan correctly");
let expected = vec![
- "+------+------------+",
- "| dict | COUNT(val) |",
- "+------+------------+",
- "| A | 4 |",
- "| B | 1 |",
- "| C | 1 |",
- "+------+------------+",
+ "+------+--------------+",
+ "| dict | COUNT(t.val) |",
+ "+------+--------------+",
+ "| A | 4 |",
+ "| B | 1 |",
+ "| C | 1 |",
+ "+------+--------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -2162,13 +2162,13 @@ mod tests {
.expect("ran plan correctly");
let expected = vec![
- "+-----+-------------+",
- "| val | COUNT(dict) |",
- "+-----+-------------+",
- "| 1 | 3 |",
- "| 2 | 2 |",
- "| 4 | 1 |",
- "+-----+-------------+",
+ "+-----+---------------+",
+ "| val | COUNT(t.dict) |",
+ "+-----+---------------+",
+ "| 1 | 3 |",
+ "| 2 | 2 |",
+ "| 4 | 1 |",
+ "+-----+---------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -2181,13 +2181,13 @@ mod tests {
.expect("ran plan correctly");
let expected = vec![
- "+-----+----------------------+",
- "| val | COUNT(DISTINCT dict) |",
- "+-----+----------------------+",
- "| 1 | 2 |",
- "| 2 | 2 |",
- "| 4 | 1 |",
- "+-----+----------------------+",
+ "+-----+------------------------+",
+ "| val | COUNT(DISTINCT t.dict) |",
+ "+-----+------------------------+",
+ "| 1 | 2 |",
+ "| 2 | 2 |",
+ "| 4 | 1 |",
+ "+-----+------------------------+",
];
assert_batches_sorted_eq!(expected, &results);
}
@@ -2286,13 +2286,13 @@ mod tests {
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
let expected = vec![
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
- "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
- "| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
- "| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
- "| c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
+ "| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
+ "| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
+ "| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
+ "| c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -2312,13 +2312,13 @@ mod tests {
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
let expected = vec![
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
- "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
- "| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |",
- "| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |",
- "| c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
- "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
+ "| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
+ "| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |",
+ "| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |",
+ "| c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
+ "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
];
assert_batches_sorted_eq!(expected, &results);
@@ -2433,11 +2433,11 @@ mod tests {
.unwrap();
let expected = vec![
- "+---------+",
- "| sqrt(i) |",
- "+---------+",
- "| 1 |",
- "+---------+",
+ "+-----------+",
+ "| sqrt(t.i) |",
+ "+-----------+",
+ "| 1 |",
+ "+-----------+",
];
let results = plan_and_collect(&mut ctx, "SELECT sqrt(i) FROM t")
@@ -2521,11 +2521,11 @@ mod tests {
let provider = MemTable::try_new(schema, vec![vec![batch]]).unwrap();
ctx.register_table("t", Arc::new(provider)).unwrap();
let expected = vec![
- "+---------+",
- "| sqrt(v) |",
- "+---------+",
- "| 1 |",
- "+---------+",
+ "+-----------+",
+ "| sqrt(t.v) |",
+ "+-----------+",
+ "| 1 |",
+ "+-----------+",
];
let results = plan_and_collect(&mut ctx, "SELECT sqrt(v) FROM t")
.await
@@ -2564,11 +2564,11 @@ mod tests {
let result = plan_and_collect(&mut ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?;
let expected = vec![
- "+------------+",
- "| MY_FUNC(i) |",
- "+------------+",
- "| 1 |",
- "+------------+",
+ "+--------------+",
+ "| MY_FUNC(t.i) |",
+ "+--------------+",
+ "| 1 |",
+ "+--------------+",
];
assert_batches_eq!(expected, &result);
@@ -2582,11 +2582,11 @@ mod tests {
.unwrap();
let expected = vec![
- "+--------+",
- "| MAX(i) |",
- "+--------+",
- "| 1 |",
- "+--------+",
+ "+----------+",
+ "| MAX(t.i) |",
+ "+----------+",
+ "| 1 |",
+ "+----------+",
];
let results = plan_and_collect(&mut ctx, "SELECT max(i) FROM t")
@@ -2645,11 +2645,11 @@ mod tests {
let result = plan_and_collect(&mut ctx, "SELECT \"MY_AVG\"(i) FROM t").await?;
let expected = vec![
- "+-----------+",
- "| MY_AVG(i) |",
- "+-----------+",
- "| 1 |",
- "+-----------+",
+ "+-------------+",
+ "| MY_AVG(t.i) |",
+ "+-------------+",
+ "| 1 |",
+ "+-------------+",
];
assert_batches_eq!(expected, &result);
@@ -2745,11 +2745,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+---------+---------+-----------------+",
- "| SUM(c1) | SUM(c2) | COUNT(UInt8(1)) |",
- "+---------+---------+-----------------+",
- "| 10 | 110 | 20 |",
- "+---------+---------+-----------------+",
+ "+--------------+--------------+-----------------+",
+ "| SUM(test.c1) | SUM(test.c2) | COUNT(UInt8(1)) |",
+ "+--------------+--------------+-----------------+",
+ "| 10 | 110 | 20 |",
+ "+--------------+--------------+-----------------+",
];
assert_batches_eq!(expected, &results);
@@ -2864,14 +2864,14 @@ mod tests {
let result = collect(plan).await?;
let expected = vec![
- "+-----+-----+-------------+",
- "| a | b | my_add(a,b) |",
- "+-----+-----+-------------+",
- "| 1 | 2 | 3 |",
- "| 10 | 12 | 22 |",
- "| 10 | 12 | 22 |",
- "| 100 | 120 | 220 |",
- "+-----+-----+-------------+",
+ "+-----+-----+-----------------+",
+ "| a | b | my_add(t.a,t.b) |",
+ "+-----+-----+-----------------+",
+ "| 1 | 2 | 3 |",
+ "| 10 | 12 | 22 |",
+ "| 10 | 12 | 22 |",
+ "| 100 | 120 | 220 |",
+ "+-----+-----+-----------------+",
];
assert_batches_eq!(expected, &result);
@@ -2974,11 +2974,11 @@ mod tests {
let result = plan_and_collect(&mut ctx, "SELECT MY_AVG(a) FROM t").await?;
let expected = vec![
- "+-----------+",
- "| my_avg(a) |",
- "+-----------+",
- "| 3 |",
- "+-----------+",
+ "+-------------+",
+ "| my_avg(t.a) |",
+ "+-------------+",
+ "| 3 |",
+ "+-------------+",
];
assert_batches_eq!(expected, &result);
diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs
index 5e1a4f4..724a3f8 100644
--- a/datafusion/src/execution/dataframe_impl.rs
+++ b/datafusion/src/execution/dataframe_impl.rs
@@ -277,15 +277,15 @@ mod tests {
assert_batches_sorted_eq!(
vec![
- "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+",
- "| c1 | MIN(c12) | MAX(c12) | AVG(c12) | SUM(c12) | COUNT(c12) | COUNT(DISTINCT c12) |",
- "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+",
- "| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
- "| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
- "| c | 0.0494924465469434 | 0.991517828651004 | 0.6600456536439784 | 13.860958726523545 | 21 | 21 |",
- "| d | 0.061029375346466685 | 0.9748360509016578 | 0.48855379387549824 | 8.793968289758968 | 18 | 18 |",
- "| e | 0.01479305307777301 | 0.9965400387585364 | 0.48600669271341534 | 10.206140546981722 | 21 | 21 |",
- "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+",
+ "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
+ "| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | SUM(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |",
+ "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
+ "| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
+ "| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
+ "| c | 0.0494924465469434 | 0.991517828651004 | 0.6600456536439784 | 13.860958726523545 | 21 | 21 |",
+ "| d | 0.061029375346466685 | 0.9748360509016578 | 0.48855379387549824 | 8.793968289758968 | 18 | 18 |",
+ "| e | 0.01479305307777301 | 0.9965400387585364 | 0.48600669271341534 | 10.206140546981722 | 21 | 21 |",
+ "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
],
&df
);
diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs
index 93ff7e7..eac9b5f 100644
--- a/datafusion/src/lib.rs
+++ b/datafusion/src/lib.rs
@@ -60,11 +60,11 @@
//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
//!
//! let expected = vec![
-//! "+---+--------+",
-//! "| a | MIN(b) |",
-//! "+---+--------+",
-//! "| 1 | 2 |",
-//! "+---+--------+"
+//! "+---+--------------------------+",
+//! "| a | MIN(tests/example.csv.b) |",
+//! "+---+--------------------------+",
+//! "| 1 | 2 |",
+//! "+---+--------------------------+"
//! ];
//!
//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
@@ -95,11 +95,11 @@
//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
//!
//! let expected = vec![
-//! "+---+--------+",
-//! "| a | MIN(b) |",
-//! "+---+--------+",
-//! "| 1 | 2 |",
-//! "+---+--------+"
+//! "+---+----------------+",
+//! "| a | MIN(example.b) |",
+//! "+---+----------------+",
+//! "| 1 | 2 |",
+//! "+---+----------------+"
//! ];
//!
//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs
index 1cc3625..d499174 100644
--- a/datafusion/src/physical_plan/planner.rs
+++ b/datafusion/src/physical_plan/planner.rs
@@ -62,11 +62,10 @@ fn create_function_physical_name(
fun: &str,
distinct: bool,
args: &[Expr],
- input_schema: &DFSchema,
) -> Result<String> {
let names: Vec<String> = args
.iter()
- .map(|e| physical_name(e, input_schema))
+ .map(|e| create_physical_name(e, false))
.collect::<Result<_>>()?;
let distinct_str = match distinct {
@@ -76,15 +75,25 @@ fn create_function_physical_name(
Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
}
-fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
+fn physical_name(e: &Expr) -> Result<String> {
+ create_physical_name(e, true)
+}
+
+fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
match e {
- Expr::Column(c) => Ok(c.name.clone()),
+ Expr::Column(c) => {
+ if is_first_expr {
+ Ok(c.name.clone())
+ } else {
+ Ok(c.flat_name())
+ }
+ }
Expr::Alias(_, name) => Ok(name.clone()),
Expr::ScalarVariable(variable_names) => Ok(variable_names.join(".")),
Expr::Literal(value) => Ok(format!("{:?}", value)),
Expr::BinaryExpr { left, op, right } => {
- let left = physical_name(left, input_schema)?;
- let right = physical_name(right, input_schema)?;
+ let left = create_physical_name(left, false)?;
+ let right = create_physical_name(right, false)?;
Ok(format!("{} {:?} {}", left, op, right))
}
Expr::Case {
@@ -106,50 +115,48 @@ fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
Ok(name)
}
Expr::Cast { expr, data_type } => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("CAST({} AS {:?})", expr, data_type))
}
Expr::TryCast { expr, data_type } => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("TRY_CAST({} AS {:?})", expr, data_type))
}
Expr::Not(expr) => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("NOT {}", expr))
}
Expr::Negative(expr) => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("(- {})", expr))
}
Expr::IsNull(expr) => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("{} IS NULL", expr))
}
Expr::IsNotNull(expr) => {
- let expr = physical_name(expr, input_schema)?;
+ let expr = create_physical_name(expr, false)?;
Ok(format!("{} IS NOT NULL", expr))
}
Expr::ScalarFunction { fun, args, .. } => {
- create_function_physical_name(&fun.to_string(), false, args, input_schema)
+ create_function_physical_name(&fun.to_string(), false, args)
}
Expr::ScalarUDF { fun, args, .. } => {
- create_function_physical_name(&fun.name, false, args, input_schema)
+ create_function_physical_name(&fun.name, false, args)
}
Expr::WindowFunction { fun, args, .. } => {
- create_function_physical_name(&fun.to_string(), false, args, input_schema)
+ create_function_physical_name(&fun.to_string(), false, args)
}
Expr::AggregateFunction {
fun,
distinct,
args,
..
- } => {
- create_function_physical_name(&fun.to_string(), *distinct, args, input_schema)
- }
+ } => create_function_physical_name(&fun.to_string(), *distinct, args),
Expr::AggregateUDF { fun, args } => {
let mut names = Vec::with_capacity(args.len());
for e in args {
- names.push(physical_name(e, input_schema)?);
+ names.push(create_physical_name(e, false)?);
}
Ok(format!("{}({})", fun.name, names.join(",")))
}
@@ -158,8 +165,8 @@ fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
list,
negated,
} => {
- let expr = physical_name(expr, input_schema)?;
- let list = list.iter().map(|expr| physical_name(expr, input_schema));
+ let expr = create_physical_name(expr, false)?;
+ let list = list.iter().map(|expr| create_physical_name(expr, false));
if *negated {
Ok(format!("{} NOT IN ({:?})", expr, list))
} else {
@@ -444,7 +451,7 @@ impl DefaultPhysicalPlanner {
&physical_input_schema,
ctx_state,
),
- physical_name(e, logical_input_schema),
+ physical_name(e),
))
})
.collect::<Result<Vec<_>>>()?;
@@ -545,10 +552,10 @@ impl DefaultPhysicalPlanner {
}
// logical column is not a derived column, safe to pass along to
// physical_name
- Err(_) => physical_name(e, input_schema),
+ Err(_) => physical_name(e),
}
} else {
- physical_name(e, input_schema)
+ physical_name(e)
};
tuple_err((
@@ -1192,7 +1199,7 @@ impl DefaultPhysicalPlanner {
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
let (name, e) = match e {
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
- _ => (physical_name(e, logical_input_schema)?, e),
+ _ => (physical_name(e)?, e),
};
self.create_window_expr_with_name(
e,
@@ -1271,7 +1278,7 @@ impl DefaultPhysicalPlanner {
// unpack aliased logical expressions, e.g. "sum(col) as total"
let (name, e) = match e {
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
- _ => (physical_name(e, logical_input_schema)?, e),
+ _ => (physical_name(e)?, e),
};
self.create_aggregate_expr_with_name(
@@ -1629,16 +1636,24 @@ mod tests {
let path = format!("{}/csv/aggregate_test_100.csv", testdata);
let options = CsvReadOptions::new().schema_infer_max_records(100);
- let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)?
- .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
- .build()?;
+ let logical_plan = LogicalPlanBuilder::scan_csv_with_name(
+ path,
+ options,
+ None,
+ "aggregate_test_100",
+ )?
+ .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
+ .build()?;
let execution_plan = plan(&logical_plan)?;
let final_hash_agg = execution_plan
.as_any()
.downcast_ref::<HashAggregateExec>()
.expect("hash aggregate");
- assert_eq!("SUM(c2)", final_hash_agg.schema().field(1).name());
+ assert_eq!(
+ "SUM(aggregate_test_100.c2)",
+ final_hash_agg.schema().field(1).name()
+ );
// we need access to the input to the partial aggregate so that other projects can
// implement serde
assert_eq!("c2", final_hash_agg.input_schema().field(1).name());
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 03c5cbb..40cd38f 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -166,18 +166,18 @@ async fn parquet_query() {
let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+--------------------------+",
- "| id | CAST(string_col AS Utf8) |",
- "+----+--------------------------+",
- "| 4 | 0 |",
- "| 5 | 1 |",
- "| 6 | 0 |",
- "| 7 | 1 |",
- "| 2 | 0 |",
- "| 3 | 1 |",
- "| 0 | 0 |",
- "| 1 | 1 |",
- "+----+--------------------------+",
+ "+----+-----------------------------------------+",
+ "| id | CAST(alltypes_plain.string_col AS Utf8) |",
+ "+----+-----------------------------------------+",
+ "| 4 | 0 |",
+ "| 5 | 1 |",
+ "| 6 | 0 |",
+ "| 7 | 1 |",
+ "| 2 | 0 |",
+ "| 3 | 1 |",
+ "| 0 | 0 |",
+ "| 1 | 1 |",
+ "+----+-----------------------------------------+",
];
assert_batches_eq!(expected, &actual);
@@ -338,11 +338,11 @@ async fn csv_count_star() -> Result<()> {
let sql = "SELECT COUNT(*), COUNT(1) AS c, COUNT(c1) FROM aggregate_test_100";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+-----------------+-----+-----------+",
- "| COUNT(UInt8(1)) | c | COUNT(c1) |",
- "+-----------------+-----+-----------+",
- "| 100 | 100 | 100 |",
- "+-----------------+-----+-----------+",
+ "+-----------------+-----+------------------------------+",
+ "| COUNT(UInt8(1)) | c | COUNT(aggregate_test_100.c1) |",
+ "+-----------------+-----+------------------------------+",
+ "| 100 | 100 | 100 |",
+ "+-----------------+-----+------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -442,15 +442,15 @@ async fn csv_query_group_by_int_min_max() -> Result<()> {
let sql = "SELECT c2, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c2";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+----------------------+--------------------+",
- "| c2 | MIN(c12) | MAX(c12) |",
- "+----+----------------------+--------------------+",
- "| 1 | 0.05636955101974106 | 0.9965400387585364 |",
- "| 2 | 0.16301110515739792 | 0.991517828651004 |",
- "| 3 | 0.047343434291126085 | 0.9293883502480845 |",
- "| 4 | 0.02182578039211991 | 0.9237877978193884 |",
- "| 5 | 0.01479305307777301 | 0.9723580396501548 |",
- "+----+----------------------+--------------------+",
+ "+----+-----------------------------+-----------------------------+",
+ "| c2 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) |",
+ "+----+-----------------------------+-----------------------------+",
+ "| 1 | 0.05636955101974106 | 0.9965400387585364 |",
+ "| 2 | 0.16301110515739792 | 0.991517828651004 |",
+ "| 3 | 0.047343434291126085 | 0.9293883502480845 |",
+ "| 4 | 0.02182578039211991 | 0.9237877978193884 |",
+ "| 5 | 0.01479305307777301 | 0.9723580396501548 |",
+ "+----+-----------------------------+-----------------------------+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -666,35 +666,35 @@ async fn csv_query_group_by_two_columns() -> Result<()> {
let sql = "SELECT c1, c2, MIN(c3) FROM aggregate_test_100 GROUP BY c1, c2";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+----+---------+",
- "| c1 | c2 | MIN(c3) |",
- "+----+----+---------+",
- "| a | 1 | -85 |",
- "| a | 2 | -48 |",
- "| a | 3 | -72 |",
- "| a | 4 | -101 |",
- "| a | 5 | -101 |",
- "| b | 1 | 12 |",
- "| b | 2 | -60 |",
- "| b | 3 | -101 |",
- "| b | 4 | -117 |",
- "| b | 5 | -82 |",
- "| c | 1 | -24 |",
- "| c | 2 | -117 |",
- "| c | 3 | -2 |",
- "| c | 4 | -90 |",
- "| c | 5 | -94 |",
- "| d | 1 | -99 |",
- "| d | 2 | 93 |",
- "| d | 3 | -76 |",
- "| d | 4 | 5 |",
- "| d | 5 | -59 |",
- "| e | 1 | 36 |",
- "| e | 2 | -61 |",
- "| e | 3 | -95 |",
- "| e | 4 | -56 |",
- "| e | 5 | -86 |",
- "+----+----+---------+",
+ "+----+----+----------------------------+",
+ "| c1 | c2 | MIN(aggregate_test_100.c3) |",
+ "+----+----+----------------------------+",
+ "| a | 1 | -85 |",
+ "| a | 2 | -48 |",
+ "| a | 3 | -72 |",
+ "| a | 4 | -101 |",
+ "| a | 5 | -101 |",
+ "| b | 1 | 12 |",
+ "| b | 2 | -60 |",
+ "| b | 3 | -101 |",
+ "| b | 4 | -117 |",
+ "| b | 5 | -82 |",
+ "| c | 1 | -24 |",
+ "| c | 2 | -117 |",
+ "| c | 3 | -2 |",
+ "| c | 4 | -90 |",
+ "| c | 5 | -94 |",
+ "| d | 1 | -99 |",
+ "| d | 2 | 93 |",
+ "| d | 3 | -76 |",
+ "| d | 4 | 5 |",
+ "| d | 5 | -59 |",
+ "| e | 1 | 36 |",
+ "| e | 2 | -61 |",
+ "| e | 3 | -95 |",
+ "| e | 4 | -56 |",
+ "| e | 5 | -86 |",
+ "+----+----+----------------------------+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -890,15 +890,15 @@ async fn csv_query_group_by_avg() -> Result<()> {
let sql = "SELECT c1, avg(c12) FROM aggregate_test_100 GROUP BY c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+---------------------+",
- "| c1 | AVG(c12) |",
- "+----+---------------------+",
- "| a | 0.48754517466109415 |",
- "| b | 0.41040709263815384 |",
- "| c | 0.6600456536439784 |",
- "| d | 0.48855379387549824 |",
- "| e | 0.48600669271341534 |",
- "+----+---------------------+",
+ "+----+-----------------------------+",
+ "| c1 | AVG(aggregate_test_100.c12) |",
+ "+----+-----------------------------+",
+ "| a | 0.48754517466109415 |",
+ "| b | 0.41040709263815384 |",
+ "| c | 0.6600456536439784 |",
+ "| d | 0.48855379387549824 |",
+ "| e | 0.48600669271341534 |",
+ "+----+-----------------------------+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -911,15 +911,15 @@ async fn csv_query_group_by_avg_with_projection() -> Result<()> {
let sql = "SELECT avg(c12), c1 FROM aggregate_test_100 GROUP BY c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+---------------------+----+",
- "| AVG(c12) | c1 |",
- "+---------------------+----+",
- "| 0.41040709263815384 | b |",
- "| 0.48600669271341534 | e |",
- "| 0.48754517466109415 | a |",
- "| 0.48855379387549824 | d |",
- "| 0.6600456536439784 | c |",
- "+---------------------+----+",
+ "+-----------------------------+----+",
+ "| AVG(aggregate_test_100.c12) | c1 |",
+ "+-----------------------------+----+",
+ "| 0.41040709263815384 | b |",
+ "| 0.48600669271341534 | e |",
+ "| 0.48754517466109415 | a |",
+ "| 0.48855379387549824 | d |",
+ "| 0.6600456536439784 | c |",
+ "+-----------------------------+----+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -975,11 +975,11 @@ async fn csv_query_count() -> Result<()> {
let sql = "SELECT count(c12) FROM aggregate_test_100";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+------------+",
- "| COUNT(c12) |",
- "+------------+",
- "| 100 |",
- "+------------+",
+ "+-------------------------------+",
+ "| COUNT(aggregate_test_100.c12) |",
+ "+-------------------------------+",
+ "| 100 |",
+ "+-------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -1002,15 +1002,15 @@ async fn csv_query_window_with_empty_over() -> Result<()> {
limit 5";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+",
- "| c9 | COUNT(c5) | MAX(c5) | MIN(c5) | FIRST_VALUE(c5) | LAST_VALUE(c5) | NTH_VALUE(c5,Int64(2)) |",
- "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+",
- "| 28774375 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
- "| 63044568 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
- "| 141047417 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
- "| 141680161 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
- "| 145294611 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
- "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+",
+ "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| c9 | COUNT(aggregate_test_100.c5) | MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) | FIRST_VALUE(aggregate_test_100.c5) | LAST_VALUE(aggregate_test_100.c5) | NTH_VALUE(aggregate_test_100.c5,Int64(2)) |",
+ "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| 28774375 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
+ "| 63044568 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
+ "| 141047417 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
+ "| 141680161 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
+ "| 145294611 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |",
+ "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -1035,15 +1035,15 @@ async fn csv_query_window_with_partition_by() -> Result<()> {
limit 5";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+",
- "| c9 | SUM(CAST(c4 AS Int32)) | AVG(CAST(c4 AS Int32)) | COUNT(CAST(c4 AS Int32)) | MAX(CAST(c4 AS Int32)) | MIN(CAST(c4 AS Int32)) | FIRST_VALUE(CAST(c4 AS Int32)) | LAST_VALUE(CAST(c4 AS Int32)) | NTH_VALUE(CAST(c4 AS Int32),Int64(2)) |",
- "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+",
- "| 28774375 | -16110 | -16110 | 1 | -16110 | -16110 | -16110 | -16110 | |",
- "| 63044568 | 3917 | 3917 | 1 | 3917 | 3917 | 3917 | 3917 | |",
- "| 141047417 | -38455 | -19227.5 | 2 | -16974 | -21481 | -16974 | -21481 | |",
- "| 141680161 | -1114 | -1114 | 1 | -1114 | -1114 | -1114 | -1114 | |",
- "| 145294611 | 15673 | 15673 | 1 | 15673 | 15673 | 15673 | 15673 | |",
- "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+",
+ "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
+ "| c9 | SUM(CAST(aggregate_test_100.c4 AS Int32)) | AVG(CAST(aggregate_test_100.c4 AS Int32)) | COUNT(CAST(aggregate_test_100.c4 AS Int32)) | MAX(CAST(aggregate_test_100.c4 AS Int32)) | MIN(CAST(aggregate_test_100.c4 AS Int32)) | FIRST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) | LAST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) | NTH_VALUE(CAST(aggregate_test_100.c4 AS Int32),Int64(2)) |",
+ "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
+ "| 28774375 | -16110 | -16110 | 1 | -16110 | -16110 | -16110 | -16110 | |",
+ "| 63044568 | 3917 | 3917 | 1 | 3917 | 3917 | 3917 | 3917 | |",
+ "| 141047417 | -38455 | -19227.5 | 2 | -16974 | -21481 | -16974 | -21481 | |",
+ "| 141680161 | -1114 | -1114 | 1 | -1114 | -1114 | -1114 | -1114 | |",
+ "| 145294611 | 15673 | 15673 | 1 | 15673 | 15673 | 15673 | 15673 | |",
+ "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -1068,15 +1068,15 @@ async fn csv_query_window_with_order_by() -> Result<()> {
limit 5";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+",
- "| c9 | SUM(c5) | AVG(c5) | COUNT(c5) | MAX(c5) | MIN(c5) | FIRST_VALUE(c5) | LAST_VALUE(c5) | NTH_VALUE(c5,Int64(2)) |",
- "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+",
- "| 28774375 | 61035129 | 61035129 | 1 | 61035129 | 61035129 | 61035129 | 61035129 | |",
- "| 63044568 | -47938237 | -23969118.5 | 2 | 61035129 | -108973366 | 61035129 | -108973366 | -108973366 |",
- "| 141047417 | 575165281 | 191721760.33333334 | 3 | 623103518 | -108973366 | 61035129 | 623103518 | -108973366 |",
- "| 141680161 | -1352462829 | -338115707.25 | 4 | 623103518 | -1927628110 | 61035129 | -1927628110 | -108973366 |",
- "| 145294611 | -3251637940 | -650327588 | 5 | 623103518 | -1927628110 | 61035129 | -1899175111 | -108973366 |",
- "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+",
+ "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| c9 | SUM(aggregate_test_100.c5) | AVG(aggregate_test_100.c5) | COUNT(aggregate_test_100.c5) | MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) | FIRST_VALUE(aggregate_test_100.c5) | LAST_VALUE(aggregate_test_100.c5) | NTH_VALUE(aggregate_test_100.c5,Int64(2)) |",
+ "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| 28774375 | 61035129 | 61035129 | 1 | 61035129 | 61035129 | 61035129 | 61035129 | |",
+ "| 63044568 | -47938237 | -23969118.5 | 2 | 61035129 | -108973366 | 61035129 | -108973366 | -108973366 |",
+ "| 141047417 | 575165281 | 191721760.33333334 | 3 | 623103518 | -108973366 | 61035129 | 623103518 | -108973366 |",
+ "| 141680161 | -1352462829 | -338115707.25 | 4 | 623103518 | -1927628110 | 61035129 | -1927628110 | -108973366 |",
+ "| 145294611 | -3251637940 | -650327588 | 5 | 623103518 | -1927628110 | 61035129 | -1899175111 | -108973366 |",
+ "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -1089,15 +1089,15 @@ async fn csv_query_group_by_int_count() -> Result<()> {
let sql = "SELECT c1, count(c12) FROM aggregate_test_100 GROUP BY c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+------------+",
- "| c1 | COUNT(c12) |",
- "+----+------------+",
- "| a | 21 |",
- "| b | 19 |",
- "| c | 21 |",
- "| d | 18 |",
- "| e | 21 |",
- "+----+------------+",
+ "+----+-------------------------------+",
+ "| c1 | COUNT(aggregate_test_100.c12) |",
+ "+----+-------------------------------+",
+ "| a | 21 |",
+ "| b | 19 |",
+ "| c | 21 |",
+ "| d | 18 |",
+ "| e | 21 |",
+ "+----+-------------------------------+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -1131,15 +1131,15 @@ async fn csv_query_group_by_string_min_max() -> Result<()> {
let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- "+----+----------------------+--------------------+",
- "| c1 | MIN(c12) | MAX(c12) |",
- "+----+----------------------+--------------------+",
- "| a | 0.02182578039211991 | 0.9800193410444061 |",
- "| b | 0.04893135681998029 | 0.9185813970744787 |",
- "| c | 0.0494924465469434 | 0.991517828651004 |",
- "| d | 0.061029375346466685 | 0.9748360509016578 |",
- "| e | 0.01479305307777301 | 0.9965400387585364 |",
- "+----+----------------------+--------------------+",
+ "+----+-----------------------------+-----------------------------+",
+ "| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) |",
+ "+----+-----------------------------+-----------------------------+",
+ "| a | 0.02182578039211991 | 0.9800193410444061 |",
+ "| b | 0.04893135681998029 | 0.9185813970744787 |",
+ "| c | 0.0494924465469434 | 0.991517828651004 |",
+ "| d | 0.061029375346466685 | 0.9748360509016578 |",
+ "| e | 0.01479305307777301 | 0.9965400387585364 |",
+ "+----+-----------------------------+-----------------------------+",
];
assert_batches_sorted_eq!(expected, &actual);
Ok(())
@@ -4304,11 +4304,11 @@ async fn test_physical_plan_display_indent() {
"GlobalLimitExec: limit=10",
" SortExec: [the_min@2 DESC]",
" CoalescePartitionsExec",
- " ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(c12), MIN(aggregate_test_100.c12)@2 as the_min]",
- " HashAggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(c12), MIN(c12)]",
+ " ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)@2 as the_min]",
+ " HashAggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" RepartitionExec: partitioning=Hash([Column { name: \"c1\", index: 0 }], 3)",
- " HashAggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(c12), MIN(c12)]",
+ " HashAggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" FilterExec: c12@1 < CAST(10 AS Float64)",
" RepartitionExec: partitioning=RoundRobinBatch(3)",
diff --git a/docs/specification/output-field-name-semantic.md b/docs/specification/output-field-name-semantic.md
index 0407a17..bc0813a 100644
--- a/docs/specification/output-field-name-semantic.md
+++ b/docs/specification/output-field-name-semantic.md
@@ -25,10 +25,12 @@ Datafusion queries planned from both SQL queries and Dataframe APIs.
## Field name rules
-- All field names MUST not contain relation/table qualifier.
+- All bare column field names MUST not contain relation/table qualifier.
- Both `SELECT t1.id`, `SELECT id` and `df.select_columns(&["id"])` SHOULD result in field name: `id`
+- All compound column field names MUST contain relation/table qualifier.
+ - `SELECT foo + bar` SHOULD result in field name: `table.foo PLUS table.bar`
- Function names MUST be converted to lowercase.
- - `SELECT AVG(c1)` SHOULD result in field name: `avg(c1)`
+ - `SELECT AVG(c1)` SHOULD result in field name: `avg(table.c1)`
- Literal string MUST not be wrapped with quotes or double quotes.
- `SELECT 'foo'` SHOULD result in field name: `foo`
- Operator expressions MUST be wrapped with parentheses.
@@ -36,7 +38,7 @@ Datafusion queries planned from both SQL queries and Dataframe APIs.
- Operator and operand MUST be separated by spaces.
- `SELECT 1+2` SHOULD result in field name: `(1 + 2)`
- Function arguments MUST be separated by a comma `,` and a space.
- - `SELECT f(c1,c2)` and `df.select(vec![f.udf("f")?.call(vec![col("c1"), col("c2")])])` SHOULD result in field name: `f(c1, c2)`
+ - `SELECT f(c1,c2)` and `df.select(vec![f.udf("f")?.call(vec![col("c1"), col("c2")])])` SHOULD result in field name: `f(table.c1, table.c2)`
## Appendices
@@ -95,10 +97,10 @@ SELECT ABS(t1.id), abs(-id) FROM t1;
Datafusion Arrow record batches output:
-| abs(id) | abs((- id)) |
-| ------- | ----------- |
-| 1 | 1 |
-| 2 | 2 |
+| abs(t1.id) | abs((- t1.id)) |
+| ---------- | -------------- |
+| 1 | 1 |
+| 2 | 2 |
Spark output:
@@ -138,10 +140,10 @@ SELECT t1.id + ABS(id), ABS(id * t1.id) FROM t1;
Datafusion Arrow record batches output:
-| id + abs(id) | abs(id \* id) |
-| ------------ | ------------- |
-| 2 | 1 |
-| 4 | 4 |
+| t1.id + abs(t1.id) | abs(t1.id \* t1.id) |
+| ------------------ | ------------------- |
+| 2 | 1 |
+| 4 | 4 |
Spark output: