You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/11/09 12:22:12 UTC
[arrow-datafusion] branch master updated: Third batch of updating
sql tests to use assert_batches_eq (#1241)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new e852b42 Third batch of updating sql tests to use assert_batches_eq (#1241)
e852b42 is described below
commit e852b429ca21853240c195fcb932c5d20bc7d07d
Author: Matthew Turner <ma...@outlook.com>
AuthorDate: Tue Nov 9 07:22:06 2021 -0500
Third batch of updating sql tests to use assert_batches_eq (#1241)
* Update tests
* More tests
* Fixup test / cleanups
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion/tests/sql.rs | 277 +++++++++++++++++++++++++++++-------------------
1 file changed, 166 insertions(+), 111 deletions(-)
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 016781c..2924482 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -2296,8 +2296,6 @@ async fn left_join_using() -> Result<()> {
Ok(())
}
-// --- End Test Porting ---
-
#[tokio::test]
async fn equijoin_implicit_syntax() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
@@ -2306,13 +2304,17 @@ async fn equijoin_implicit_syntax() -> Result<()> {
"SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t2_id = t1_id ORDER BY t1_id",
];
let expected = vec![
- vec!["11", "a", "z"],
- vec!["22", "b", "y"],
- vec!["44", "d", "x"],
+ "+-------+---------+---------+",
+ "| t1_id | t1_name | t2_name |",
+ "+-------+---------+---------+",
+ "| 11 | a | z |",
+ "| 22 | b | y |",
+ "| 44 | d | x |",
+ "+-------+---------+---------+",
];
for sql in equivalent_sql.iter() {
- let actual = execute(&mut ctx, sql).await;
- assert_eq!(expected, actual);
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ assert_batches_eq!(expected, &actual);
}
Ok(())
}
@@ -2326,13 +2328,17 @@ async fn equijoin_implicit_syntax_with_filter() -> Result<()> {
AND t1_id = t2_id \
AND t2_id < 99 \
ORDER BY t1_id";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- vec!["11", "a", "z"],
- vec!["22", "b", "y"],
- vec!["44", "d", "x"],
+ "+-------+---------+---------+",
+ "| t1_id | t1_name | t2_name |",
+ "+-------+---------+---------+",
+ "| 11 | a | z |",
+ "| 22 | b | y |",
+ "| 44 | d | x |",
+ "+-------+---------+---------+",
];
- assert_eq!(expected, actual);
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -2341,13 +2347,17 @@ async fn equijoin_implicit_syntax_reversed() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let sql =
"SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t2_id = t1_id ORDER BY t1_id";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- vec!["11", "a", "z"],
- vec!["22", "b", "y"],
- vec!["44", "d", "x"],
+ "+-------+---------+---------+",
+ "| t1_id | t1_name | t2_name |",
+ "+-------+---------+---------+",
+ "| 11 | a | z |",
+ "| 22 | b | y |",
+ "| 44 | d | x |",
+ "+-------+---------+---------+",
];
- assert_eq!(expected, actual);
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -2366,31 +2376,35 @@ async fn cross_join() {
assert_eq!(4 * 4, actual.len());
let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute(&mut ctx, sql).await;
assert_eq!(4 * 4, actual.len());
- assert_eq!(
- actual,
- [
- ["11", "a", "z"],
- ["11", "a", "y"],
- ["11", "a", "x"],
- ["11", "a", "w"],
- ["22", "b", "z"],
- ["22", "b", "y"],
- ["22", "b", "x"],
- ["22", "b", "w"],
- ["33", "c", "z"],
- ["33", "c", "y"],
- ["33", "c", "x"],
- ["33", "c", "w"],
- ["44", "d", "z"],
- ["44", "d", "y"],
- ["44", "d", "x"],
- ["44", "d", "w"]
- ]
- );
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-------+---------+---------+",
+ "| t1_id | t1_name | t2_name |",
+ "+-------+---------+---------+",
+ "| 11 | a | z |",
+ "| 11 | a | y |",
+ "| 11 | a | x |",
+ "| 11 | a | w |",
+ "| 22 | b | z |",
+ "| 22 | b | y |",
+ "| 22 | b | x |",
+ "| 22 | b | w |",
+ "| 33 | c | z |",
+ "| 33 | c | y |",
+ "| 33 | c | x |",
+ "| 33 | c | w |",
+ "| 44 | d | z |",
+ "| 44 | d | y |",
+ "| 44 | d | x |",
+ "| 44 | d | w |",
+ "+-------+---------+---------+",
+ ];
+
+ assert_batches_eq!(expected, &actual);
// Two partitions (from UNION) on the left
let sql = "SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2";
@@ -2413,33 +2427,34 @@ async fn cross_join_unbalanced() {
// the order of the values is not determinisitic, so we need to sort to check the values
let sql =
"SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name";
- let actual = execute(&mut ctx, sql).await;
-
- assert_eq!(
- actual,
- [
- ["11", "a", "z"],
- ["11", "a", "y"],
- ["11", "a", "x"],
- ["11", "a", "w"],
- ["22", "b", "z"],
- ["22", "b", "y"],
- ["22", "b", "x"],
- ["22", "b", "w"],
- ["33", "c", "z"],
- ["33", "c", "y"],
- ["33", "c", "x"],
- ["33", "c", "w"],
- ["44", "d", "z"],
- ["44", "d", "y"],
- ["44", "d", "x"],
- ["44", "d", "w"],
- ["77", "e", "z"],
- ["77", "e", "y"],
- ["77", "e", "x"],
- ["77", "e", "w"]
- ]
- );
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-------+---------+---------+",
+ "| t1_id | t1_name | t2_name |",
+ "+-------+---------+---------+",
+ "| 11 | a | z |",
+ "| 11 | a | y |",
+ "| 11 | a | x |",
+ "| 11 | a | w |",
+ "| 22 | b | z |",
+ "| 22 | b | y |",
+ "| 22 | b | x |",
+ "| 22 | b | w |",
+ "| 33 | c | z |",
+ "| 33 | c | y |",
+ "| 33 | c | x |",
+ "| 33 | c | w |",
+ "| 44 | d | z |",
+ "| 44 | d | y |",
+ "| 44 | d | x |",
+ "| 44 | d | w |",
+ "| 77 | e | z |",
+ "| 77 | e | y |",
+ "| 77 | e | x |",
+ "| 77 | e | w |",
+ "+-------+---------+---------+",
+ ];
+ assert_batches_eq!(expected, &actual);
}
#[tokio::test]
@@ -2710,6 +2725,9 @@ async fn csv_explain() {
let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10";
let actual = execute(&mut ctx, sql).await;
let actual = normalize_vec_for_explain(actual);
+
+ // Note can't use `assert_batches_eq` as the plan needs to be
+ // normalized for filenames and number of cores
let expected = vec![
vec![
"logical_plan",
@@ -2742,7 +2760,6 @@ async fn csv_explain_analyze() {
let sql = "EXPLAIN ANALYZE SELECT count(*), c1 FROM aggregate_test_100 group by c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let formatted = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
- let formatted = normalize_for_explain(&formatted);
// Only test basic plumbing and try to avoid having to change too
// many things. explain_analyze_baseline_metrics covers the values
@@ -2763,7 +2780,6 @@ async fn csv_explain_analyze_verbose() {
"EXPLAIN ANALYZE VERBOSE SELECT count(*), c1 FROM aggregate_test_100 group by c1";
let actual = execute_to_batches(&mut ctx, sql).await;
let formatted = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
- let formatted = normalize_for_explain(&formatted);
let verbose_needle = "Output Rows";
assert_contains!(formatted, verbose_needle);
@@ -2813,7 +2829,6 @@ async fn explain_analyze_baseline_metrics() {
let results = collect(physical_plan.clone()).await.unwrap();
let formatted = arrow::util::pretty::pretty_format_batches(&results).unwrap();
println!("Query Output:\n\n{}", formatted);
- let formatted = normalize_for_explain(&formatted);
assert_metrics!(
&formatted,
@@ -3541,9 +3556,17 @@ async fn query_not() -> Result<()> {
let mut ctx = ExecutionContext::new();
ctx.register_table("test", Arc::new(table))?;
let sql = "SELECT NOT c1 FROM test";
- let actual = execute(&mut ctx, sql).await;
- let expected = vec![vec!["true"], vec!["NULL"], vec!["false"]];
- assert_eq!(expected, actual);
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-------------+",
+ "| NOT test.c1 |",
+ "+-------------+",
+ "| true |",
+ "| |",
+ "| false |",
+ "+-------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3567,17 +3590,22 @@ async fn query_concat() -> Result<()> {
let mut ctx = ExecutionContext::new();
ctx.register_table("test", Arc::new(table))?;
let sql = "SELECT concat(c1, '-hi-', cast(c2 as varchar)) FROM test";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- vec!["-hi-0"],
- vec!["a-hi-1"],
- vec!["aa-hi-"],
- vec!["aaa-hi-3"],
+ "+----------------------------------------------------+",
+ "| concat(test.c1,Utf8(\"-hi-\"),CAST(test.c2 AS Utf8)) |",
+ "+----------------------------------------------------+",
+ "| -hi-0 |",
+ "| a-hi-1 |",
+ "| aa-hi- |",
+ "| aaa-hi-3 |",
+ "+----------------------------------------------------+",
];
- assert_eq!(expected, actual);
+ assert_batches_eq!(expected, &actual);
Ok(())
}
+// Revisit
#[tokio::test]
async fn query_array() -> Result<()> {
let schema = Arc::new(Schema::new(vec![
@@ -3626,27 +3654,24 @@ async fn query_where_neg_num() -> Result<()> {
// Negative numbers do not parse correctly as of Arrow 2.0.0
let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2 and c7 < 10";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
- vec!["7", "45465"],
- vec!["5", "40622"],
- vec!["0", "61069"],
- vec!["2", "20120"],
- vec!["4", "39363"],
+ "+----+-------+",
+ "| c7 | c8 |",
+ "+----+-------+",
+ "| 7 | 45465 |",
+ "| 5 | 40622 |",
+ "| 0 | 61069 |",
+ "| 2 | 20120 |",
+ "| 4 | 39363 |",
+ "+----+-------+",
];
- assert_eq!(expected, actual);
+ assert_batches_eq!(expected, &actual);
// Also check floating point neg numbers
let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2.9 and c7 < 10";
- let actual = execute(&mut ctx, sql).await;
- let expected = vec![
- vec!["7", "45465"],
- vec!["5", "40622"],
- vec!["0", "61069"],
- vec!["2", "20120"],
- vec!["4", "39363"],
- ];
- assert_eq!(expected, actual);
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3656,10 +3681,15 @@ async fn like() -> Result<()> {
register_aggregate_csv_by_sql(&mut ctx).await;
let sql = "SELECT COUNT(c1) FROM aggregate_test_100 WHERE c13 LIKE '%FB%'";
// check that the physical and logical schemas are equal
- let actual = execute(&mut ctx, sql).await;
-
- let expected = vec![vec!["1"]];
- assert_eq!(expected, actual);
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+------------------------------+",
+ "| COUNT(aggregate_test_100.c1) |",
+ "+------------------------------+",
+ "| 1 |",
+ "+------------------------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3712,10 +3742,16 @@ async fn to_timestamp() -> Result<()> {
ctx.register_table("ts_data", make_timestamp_nano_table()?)?;
let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp('2020-09-08T12:00:00+00:00')";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
- let expected = vec![vec!["2"]];
- assert_eq!(expected, actual);
+ let expected = vec![
+ "+-----------------+",
+ "| COUNT(UInt8(1)) |",
+ "+-----------------+",
+ "| 2 |",
+ "+-----------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3728,10 +3764,15 @@ async fn to_timestamp_millis() -> Result<()> {
)?;
let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp_millis('2020-09-08T12:00:00+00:00')";
- let actual = execute(&mut ctx, sql).await;
-
- let expected = vec![vec!["2"]];
- assert_eq!(expected, actual);
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-----------------+",
+ "| COUNT(UInt8(1)) |",
+ "+-----------------+",
+ "| 2 |",
+ "+-----------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3744,10 +3785,16 @@ async fn to_timestamp_micros() -> Result<()> {
)?;
let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp_micros('2020-09-08T12:00:00+00:00')";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
- let expected = vec![vec!["2"]];
- assert_eq!(expected, actual);
+ let expected = vec![
+ "+-----------------+",
+ "| COUNT(UInt8(1)) |",
+ "+-----------------+",
+ "| 2 |",
+ "+-----------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
@@ -3757,13 +3804,21 @@ async fn to_timestamp_seconds() -> Result<()> {
ctx.register_table("ts_data", make_timestamp_table::<TimestampSecondType>()?)?;
let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp_seconds('2020-09-08T12:00:00+00:00')";
- let actual = execute(&mut ctx, sql).await;
+ let actual = execute_to_batches(&mut ctx, sql).await;
- let expected = vec![vec!["2"]];
- assert_eq!(expected, actual);
+ let expected = vec![
+ "+-----------------+",
+ "| COUNT(UInt8(1)) |",
+ "+-----------------+",
+ "| 2 |",
+ "+-----------------+",
+ ];
+ assert_batches_eq!(expected, &actual);
Ok(())
}
+// --- End Test Porting ---
+
#[tokio::test]
async fn count_distinct_timestamps() -> Result<()> {
let mut ctx = ExecutionContext::new();