You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/01/31 01:03:42 UTC

[arrow-datafusion] branch master updated: [sqllogictest] Apply rowsort when there is no explicit order by (#5110)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 71353bb9a [sqllogictest] Apply rowsort when there is no explicit order by (#5110)
71353bb9a is described below

commit 71353bb9ad99a0688a9ae36a5cda77a5ab6af00b
Author: Yevhenii Melnyk <me...@gmail.com>
AuthorDate: Tue Jan 31 02:03:36 2023 +0100

    [sqllogictest] Apply rowsort when there is no explicit order by (#5110)
    
    * [sqllogictest] Apply `rowsort` when there is no explicit `order by`
    
    * Update readme
    
    * Apply prettier
    
    * `nosort` is the default sorting mode
---
 datafusion/core/tests/sqllogictests/README.md      |  20 ++-
 .../tests/sqllogictests/test_files/decimal.slt     | 166 ++++++++++-----------
 .../tests/sqllogictests/test_files/describe.slt    |   4 +-
 .../test_files/information_schema.slt              |  14 +-
 .../core/tests/sqllogictests/test_files/nullif.slt |  31 ++--
 .../test_files/pg_compat/pg_compat_simple.slt      |   2 +-
 .../core/tests/sqllogictests/test_files/select.slt |   4 +-
 .../tests/sqllogictests/test_files/strings.slt     |  18 +--
 8 files changed, 137 insertions(+), 122 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
index e3cfbde2e..afd6f7d3c 100644
--- a/datafusion/core/tests/sqllogictests/README.md
+++ b/datafusion/core/tests/sqllogictests/README.md
@@ -97,17 +97,27 @@ Query records follow the format:
 
 ```sql
 # <test_name>
-query <type_string> <sort_mode> <label>
+query <type_string> <sort_mode>
 <sql_query>
 ----
 <expected_result>
 ```
 
 - `test_name`: Uniquely identify the test name (arrow-datafusion only)
-- `type_string`: A short string that specifies the number of result columns and the expected datatype of each result column. There is one character in the <type_string> for each result column. The characters codes are "T" for a text result, "I" for an integer result, and "R" for a floating-point result.
-- (Optional) `label`: sqllogictest stores a hash of the results of this query under the given label. If the label is reused, then sqllogictest verifies that the results are the same. This can be used to verify that two or more queries in the same test script that are logically equivalent always generate the same output.
-- `expected_result`: In the results section, integer values are rendered as if by printf("%d"). Floating point values are rendered as if by printf("%.3f"). NULL values are rendered as "NULL". Empty strings are rendered as "(empty)". Within non-empty strings, all control characters and unprintable characters are rendered as "@".
-- `sort_mode`: If included, it must be one of "nosort", "rowsort", or "valuesort". The default is "nosort". In nosort mode, the results appear in exactly the order in which they were received from the database engine. The nosort mode should only be used on queries that have an ORDER BY clause or which only have a single row of result, since otherwise the order of results is undefined and might vary from one database engine to another. The "rowsort" mode gathers all output from the databa [...]
+- `type_string`: A short string that specifies the number of result columns and the expected datatype of each result column. There is one character in the <type_string> for each result column. The characters codes are:
+  - "T" for a text result,
+  - "I" for an integer result,
+  - "R" for a floating-point result,
+  - "?" for any other type.
+- `expected_result`: In the results section, some values are converted according to some rules:
+  - floating point values are rounded to the scale of "12",
+  - NULL values are rendered as `NULL`,
+  - empty strings are rendered as `(empty)`,
+  - boolean values are rendered as `true`/`false`,
+  - this list can be not exhaustive, check the `datafusion/core/tests/sqllogictests/src/engines/conversion.rs` for details.
+- `sort_mode`: If included, it must be one of `nosort` (**default**), `rowsort`, or `valuesort`. In `nosort` mode, the results appear in exactly the order in which they were received from the database engine. The `nosort` mode should only be used on queries that have an `ORDER BY` clause or which only have a single row of result, since otherwise the order of results is undefined and might vary from one database engine to another. The `rowsort` mode gathers all output from the database en [...]
+
+> :warning: It is encouraged to either apply `order by`, or use `rowsort` for queries without explicit `order by` clauses.
 
 ##### Example
 
diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
index 7b9eee872..10c11f574 100644
--- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
@@ -54,7 +54,7 @@ select arrow_typeof(c1), arrow_typeof(c5) from decimal_simple where c1 > c5 limi
 Decimal128(10, 6) Decimal128(12, 7)
 
 
-query R
+query R rowsort
 SELECT c1 from decimal_simple;
 ----
 0.00001
@@ -74,7 +74,7 @@ SELECT c1 from decimal_simple;
 0.00005
 
 
-query R
+query R rowsort
 select c1 from decimal_simple where c1 > 0.000030;
 ----
 0.00004
@@ -88,7 +88,7 @@ select c1 from decimal_simple where c1 > 0.000030;
 0.00005
 
 
-query RRI?R
+query RRI?R rowsort
 select * from decimal_simple where c1 > c5;
 ----
 0.00002 0.000000000002 3 false 0.000019
@@ -124,29 +124,29 @@ select arrow_typeof(avg(c1)), avg(c1) from decimal_simple;
 Decimal128(14, 10) 0.0000366666
 
 
-query RRI?R
+query RRI?R rowsort
 select * from decimal_simple where c1=CAST(0.00002 as Decimal(10,8));
 ----
 0.00002 0.000000000002 2 true 0.000025
 0.00002 0.000000000002 3 false 0.000019
 
 
-query RI
+query RI rowsort
 select c2,c3 from decimal_simple where c1!=0.00002;
 ----
 0.000000000001 1
 0.000000000003 4
 0.000000000003 5
 0.000000000003 5
-0.000000000004 5
 0.000000000004 12
 0.000000000004 14
+0.000000000004 5
 0.000000000004 8
-0.000000000005 9
+0.000000000005 1
+0.000000000005 100
 0.000000000005 4
 0.000000000005 8
-0.000000000005 100
-0.000000000005 1
+0.000000000005 9
 
 
 query RRI?R
@@ -155,7 +155,7 @@ select * from decimal_simple where 0.00002 > c1;
 0.00001 0.000000000001 1 true 0.000014
 
 
-query RRI?R
+query RRI?R rowsort
 select * from decimal_simple where c1 <= 0.00002;
 ----
 0.00001 0.000000000001 1 true 0.000014
@@ -163,24 +163,24 @@ select * from decimal_simple where c1 <= 0.00002;
 0.00002 0.000000000002 3 false 0.000019
 
 
-query RRI?R
+query RRI?R rowsort
 select * from decimal_simple where c1 > 0.00002;
 ----
 0.00003 0.000000000003 4 true 0.000032
 0.00003 0.000000000003 5 false 0.000035
 0.00003 0.000000000003 5 true 0.000011
-0.00004 0.000000000004 5 true 0.000044
 0.00004 0.000000000004 12 false 0.00004
 0.00004 0.000000000004 14 true 0.00004
+0.00004 0.000000000004 5 true 0.000044
 0.00004 0.000000000004 8 false 0.000044
-0.00005 0.000000000005 9 true 0.000052
+0.00005 0.000000000005 1 false 0.0001
+0.00005 0.000000000005 100 true 0.000068
 0.00005 0.000000000005 4 true 0.000078
 0.00005 0.000000000005 8 false 0.000033
-0.00005 0.000000000005 100 true 0.000068
-0.00005 0.000000000005 1 false 0.0001
+0.00005 0.000000000005 9 true 0.000052
 
 
-query RRI?R
+query RRI?R rowsort
 select * from decimal_simple where c1 >= 0.00002;
 ----
 0.00002 0.000000000002 2 true 0.000025
@@ -188,15 +188,15 @@ select * from decimal_simple where c1 >= 0.00002;
 0.00003 0.000000000003 4 true 0.000032
 0.00003 0.000000000003 5 false 0.000035
 0.00003 0.000000000003 5 true 0.000011
-0.00004 0.000000000004 5 true 0.000044
 0.00004 0.000000000004 12 false 0.00004
 0.00004 0.000000000004 14 true 0.00004
+0.00004 0.000000000004 5 true 0.000044
 0.00004 0.000000000004 8 false 0.000044
-0.00005 0.000000000005 9 true 0.000052
+0.00005 0.000000000005 1 false 0.0001
+0.00005 0.000000000005 100 true 0.000068
 0.00005 0.000000000005 4 true 0.000078
 0.00005 0.000000000005 8 false 0.000033
-0.00005 0.000000000005 100 true 0.000068
-0.00005 0.000000000005 1 false 0.0001
+0.00005 0.000000000005 9 true 0.000052
 
 
 query ?
@@ -205,7 +205,7 @@ select arrow_typeof(c1+1) from decimal_simple limit 1;
 Decimal128(27, 6)
 
 
-query R
+query R rowsort
 select c1+1 from decimal_simple;
 ----
 1.00001
@@ -232,23 +232,23 @@ select arrow_typeof(c1+c5) from decimal_simple limit 1;
 Decimal128(13, 7)
 
 
-query R
+query R rowsort
 select c1+c5 from decimal_simple;
 ----
 0.000024
-0.000045
 0.000039
+0.000041
+0.000045
 0.000062
 0.000065
-0.000041
-0.000084
 0.00008
 0.00008
+0.000083
+0.000084
 0.000084
 0.000102
-0.000128
-0.000083
 0.000118
+0.000128
 0.00015
 
 
@@ -258,24 +258,24 @@ select arrow_typeof(c1-1) from decimal_simple limit 1;
 Decimal128(27, 6)
 
 
-query R
+query R rowsort
 select c1-1 from decimal_simple;
 ----
--0.99999
--0.99998
--0.99998
--0.99997
--0.99997
--0.99997
--0.99996
--0.99996
--0.99996
--0.99996
 -0.99995
 -0.99995
 -0.99995
 -0.99995
 -0.99995
+-0.99996
+-0.99996
+-0.99996
+-0.99996
+-0.99997
+-0.99997
+-0.99997
+-0.99998
+-0.99998
+-0.99999
 
 
 query ?
@@ -284,24 +284,24 @@ select arrow_typeof(c1-c5) from decimal_simple limit 1;
 Decimal128(13, 7)
 
 
-query R
+query R rowsort
 select c1-c5 from decimal_simple;
 ----
+-0.000002
+-0.000002
+-0.000004
+-0.000004
 -0.000004
 -0.000005
-0.000001
--0.000002
 -0.000005
-0.000019
--0.000004
+-0.000018
+-0.000028
+-0.00005
 0
 0
--0.000004
--0.000002
--0.000028
+0.000001
 0.000017
--0.000018
--0.00005
+0.000019
 
 
 query ?
@@ -310,7 +310,7 @@ select arrow_typeof(c1*20) from decimal_simple limit 1;
 Decimal128(31, 6)
 
 
-query R
+query R rowsort
 select c1*20 from decimal_simple;
 ----
 0.0002
@@ -336,23 +336,23 @@ select arrow_typeof(c1*c5) from decimal_simple limit 1;
 Decimal128(23, 13)
 
 
-query R
+query R rowsort
 select c1*c5 from decimal_simple;
 ----
 0.00000000014
-0.0000000005
+0.00000000033
 0.00000000038
+0.0000000005
 0.00000000096
 0.00000000105
-0.00000000033
-0.00000000176
 0.0000000016
 0.0000000016
+0.00000000165
+0.00000000176
 0.00000000176
 0.0000000026
-0.0000000039
-0.00000000165
 0.0000000034
+0.0000000039
 0.000000005
 
 
@@ -362,7 +362,7 @@ select arrow_typeof(c1/cast(0.00001 as decimal(5,5))) from decimal_simple limit
 Decimal128(21, 12)
 
 
-query R
+query R rowsort
 select c1/cast(0.00001 as decimal(5,5)) from decimal_simple;
 ----
 1
@@ -388,24 +388,24 @@ select arrow_typeof(c1/c5) from decimal_simple limit 1;
 Decimal128(30, 19)
 
 
-query R
+query R rowsort
 select c1/c5 from decimal_simple;
 ----
+0.5
+0.641025641026
 0.714285714286
+0.735294117647
 0.8
-1.052631578947
-0.9375
 0.857142857143
-2.727272727273
 0.909090909091
-1
-1
 0.909090909091
+0.9375
 0.961538461538
-0.641025641026
+1
+1
+1.052631578947
 1.515151515152
-0.735294117647
-0.5
+2.727272727273
 
 
 query ?
@@ -414,24 +414,24 @@ select arrow_typeof(c5%cast(0.00001 as decimal(5,5))) from decimal_simple limit
 Decimal128(7, 7)
 
 
-query R
+query R rowsort
 select c5%cast(0.00001 as decimal(5,5)) from decimal_simple;
 ----
-0.000004
-0.000005
-0.000009
-0.000002
-0.000005
-0.000001
-0.000004
 0
 0
-0.000004
+0
+0.000001
+0.000002
 0.000002
-0.000008
 0.000003
+0.000004
+0.000004
+0.000004
+0.000005
+0.000005
 0.000008
-0
+0.000008
+0.000009
 
 
 query ?
@@ -440,22 +440,22 @@ select arrow_typeof(c1%c5) from decimal_simple limit 1;
 Decimal128(11, 7)
 
 
-query R
+query R rowsort
 select c1%c5 from decimal_simple;
 ----
+0
+0
+0.000001
+0.000008
 0.00001
+0.000017
 0.00002
-0.000001
 0.00003
 0.00003
-0.000008
 0.00004
-0
-0
 0.00004
 0.00005
 0.00005
-0.000017
 0.00005
 0.00005
 
@@ -466,7 +466,7 @@ select arrow_typeof(abs(c1)) from decimal_simple limit 1;
 Float64
 
 
-query R
+query R rowsort
 SELECT abs(c1) from decimal_simple;
 ----
 0.00001
diff --git a/datafusion/core/tests/sqllogictests/test_files/describe.slt b/datafusion/core/tests/sqllogictests/test_files/describe.slt
index bb5fdeb59..2b7f118a4 100644
--- a/datafusion/core/tests/sqllogictests/test_files/describe.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/describe.slt
@@ -26,7 +26,7 @@ set datafusion.catalog.information_schema = true
 statement ok
 CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv';
 
-query C1
+query C1 rowsort
 DESCRIBE aggregate_simple;
 ----
 c1 Float32 NO
@@ -46,7 +46,7 @@ set datafusion.catalog.information_schema = false
 statement ok
 CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv';
 
-query C2
+query C2 rowsort
 DESCRIBE aggregate_simple;
 ----
 c1 Float32 NO
diff --git a/datafusion/core/tests/sqllogictests/test_files/information_schema.slt b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
index bd796ed71..99b096de1 100644
--- a/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
@@ -102,7 +102,7 @@ statement ok
 SET datafusion.execution.target_partitions=7
 
 # show all variables
-query R
+query R rowsort
 SHOW ALL
 ----
 datafusion.catalog.create_default_catalog_and_schema true
@@ -165,7 +165,7 @@ datafusion.execution.time_zone +00:00
 statement ok
 CREATE OR REPLACE TABLE some_table AS VALUES (1,2),(3,4);
 
-query R
+query R rowsort
 DESCRIBE some_table
 ----
 column1 Int64 YES
@@ -179,7 +179,7 @@ DROP TABLE public.some_table;
 statement ok
 CREATE OR REPLACE TABLE public.some_table AS VALUES (1,2),(3,4);
 
-query R
+query R rowsort
 DESCRIBE public.some_table
 ----
 column1 Int64 YES
@@ -193,7 +193,7 @@ DROP TABLE public.some_table;
 statement ok
 CREATE OR REPLACE TABLE datafusion.public.some_table AS VALUES (1,2),(3,4);
 
-query R
+query R rowsort
 DESCRIBE datafusion.public.some_table
 ----
 column1 Int64 YES
@@ -209,13 +209,13 @@ describe table;
 
 
 # information_schema_show_tables
-query CCCC
+query CCCC rowsort
 SHOW TABLES
 ----
-datafusion information_schema tables VIEW
-datafusion information_schema views VIEW
 datafusion information_schema columns VIEW
 datafusion information_schema df_settings VIEW
+datafusion information_schema tables VIEW
+datafusion information_schema views VIEW
 
 
 # information_schema_show_tables_no_information_schema
diff --git a/datafusion/core/tests/sqllogictests/test_files/nullif.slt b/datafusion/core/tests/sqllogictests/test_files/nullif.slt
index d6dd92d33..9db3b5c01 100644
--- a/datafusion/core/tests/sqllogictests/test_files/nullif.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/nullif.slt
@@ -31,55 +31,60 @@ CREATE TABLE test(
 ;
 
 # Arrays tests
-query T
+query T rowsort
 SELECT NULLIF(int_field, 2) FROM test;
 ----
 1
-NULL
 3
-NULL
 4
 NULL
+NULL
+NULL
 
-query T
+
+query T rowsort
 SELECT NULLIF(bool_field, false) FROM test;
 ----
-true
 NULL
 NULL
 NULL
 NULL
 true
+true
 
-query T
+
+query T rowsort
 SELECT NULLIF(text_field, 'zxc') FROM test;
 ----
-abc
-def
-ghij
 NULL
 NULL
 NULL
+abc
+def
+ghij
 
-query T
+
+query T rowsort
 SELECT NULLIF(int_field, more_ints) FROM test;
 ----
 1
+4
 NULL
 NULL
 NULL
-4
 NULL
 
-query T
+
+query T rowsort
 SELECT NULLIF(3, int_field) FROM test;
 ----
 3
 3
-NULL
 3
 3
 3
+NULL
+
 
 # Scalar values tests
 query T
diff --git a/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt
index 68194fcec..344fe09d8 100644
--- a/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt
@@ -316,7 +316,7 @@ ORDER BY c5 ASC, c4 DESC, c10;
 2143473091 -14704 4602675983996931623
 
 
-query IRII
+query IRII rowsort
 SELECT * FROM
 (VALUES (1,2.0,-3,1+1),(10,20.0,-30,2+2))
 AS tbl(int_col, float_col, negative_col, summation);
diff --git a/datafusion/core/tests/sqllogictests/test_files/select.slt b/datafusion/core/tests/sqllogictests/test_files/select.slt
index 2d8209fb7..1b5ede066 100644
--- a/datafusion/core/tests/sqllogictests/test_files/select.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/select.slt
@@ -44,14 +44,14 @@ VALUES (2+1,2-1,2>1)
 3    1    true
 
 # multiple rows values
-query I
+query I rowsort
 VALUES (1),(2)
 ----
 1
 2
 
 # multiple rows and columns from VALUES
-query IC
+query IC rowsort
 VALUES (1,'a'),(2,'b')
 ----
 1   a
diff --git a/datafusion/core/tests/sqllogictests/test_files/strings.slt b/datafusion/core/tests/sqllogictests/test_files/strings.slt
index 464aef6ab..27ed0e2d0 100644
--- a/datafusion/core/tests/sqllogictests/test_files/strings.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/strings.slt
@@ -32,22 +32,22 @@ CREATE TABLE test(
 ;
 
 # LIKE
-query T
+query T rowsort
 SELECT s FROM test WHERE s LIKE 'p1%';
 ----
 p1
 p1e1
 p1m1e1
 
-query T
+query T rowsort
 SELECT s FROM test WHERE s LIKE '%m1%';
 ----
-p1m1e1
 P1m1e1
+p1m1e1
 p2m1e1
 
 # NOT LIKE
-query T
+query T rowsort
 SELECT s FROM test WHERE s NOT LIKE 'p1%';
 ----
 P1
@@ -60,18 +60,18 @@ p2m1e1
 
 
 # ILIKE
-query T
+query T rowsort
 SELECT s FROM test WHERE s ILIKE 'p1%';
 ----
-p1
-p1e1
-p1m1e1
 P1
 P1e1
 P1m1e1
+p1
+p1e1
+p1m1e1
 
 # NOT ILIKE
-query T
+query T rowsort
 SELECT s FROM test WHERE s NOT ILIKE 'p1%';
 ----
 e1