You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/15 04:21:35 UTC

[spark] branch master updated: [SPARK-28387][SQL][TEST] Port select_having.sql

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e238ebe  [SPARK-28387][SQL][TEST] Port select_having.sql
e238ebe is described below

commit e238ebe9b0355e871cebb8f939f1bcc879068e8d
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Sun Jul 14 21:21:09 2019 -0700

    [SPARK-28387][SQL][TEST] Port select_having.sql
    
    ## What changes were proposed in this pull request?
    
    This PR is to port select.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
    
    The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select_having.out
    
    When porting the test cases, found one bug:
    [SPARK-28386](https://issues.apache.org/jira/browse/SPARK-28386): Cannot resolve ORDER BY columns with GROUP BY and HAVING
    
    ## How was this patch tested?
    
    N/A
    
    Closes #25151 from wangyum/SPARK-28387.
    
    Authored-by: Yuming Wang <yu...@ebay.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../sql-tests/inputs/pgSQL/select_having.sql       |  55 ++++++
 .../sql-tests/results/pgSQL/select_having.sql.out  | 187 +++++++++++++++++++++
 2 files changed, 242 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql
new file mode 100644
index 0000000..2edde8d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql
@@ -0,0 +1,55 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- SELECT_HAVING
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
+--
+
+-- load test data
+CREATE TABLE test_having (a int, b int, c string, d string) USING parquet;
+INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A');
+INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b');
+INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c');
+INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D');
+INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e');
+INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F');
+INSERT INTO test_having VALUES (6, 4, 'cccc', 'g');
+INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
+INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
+INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
+
+SELECT b, c FROM test_having
+	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
+
+-- HAVING is effectively equivalent to WHERE in this case
+SELECT b, c FROM test_having
+	GROUP BY b, c HAVING b = 3 ORDER BY b, c;
+
+-- [SPARK-28386] Cannot resolve ORDER BY columns with GROUP BY and HAVING
+-- SELECT lower(c), count(c) FROM test_having
+-- 	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+-- 	ORDER BY lower(c);
+
+SELECT c, max(a) FROM test_having
+	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY c;
+
+-- test degenerate cases involving HAVING without GROUP BY
+-- Per SQL spec, these should generate 0 or 1 row, even without aggregates
+
+SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a);
+SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a);
+
+-- errors: ungrouped column references
+SELECT a FROM test_having HAVING min(a) < max(a);
+SELECT 1 AS one FROM test_having HAVING a > 1;
+
+-- the really degenerate case: need not scan table at all
+SELECT 1 AS one FROM test_having HAVING 1 > 2;
+SELECT 1 AS one FROM test_having HAVING 1 < 2;
+
+-- and just to prove that we aren't scanning the table:
+SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2;
+
+DROP TABLE test_having;
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out
new file mode 100644
index 0000000..02536eb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out
@@ -0,0 +1,187 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 22
+
+
+-- !query 0
+CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A')
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b')
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c')
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D')
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e')
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F')
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+INSERT INTO test_having VALUES (6, 4, 'cccc', 'g')
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+INSERT INTO test_having VALUES (7, 4, 'cccc', 'h')
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j')
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+SELECT b, c FROM test_having
+	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c
+-- !query 11 schema
+struct<b:int,c:string>
+-- !query 11 output
+1	XXXX
+3	bbbb
+
+
+-- !query 12
+SELECT b, c FROM test_having
+	GROUP BY b, c HAVING b = 3 ORDER BY b, c
+-- !query 12 schema
+struct<b:int,c:string>
+-- !query 12 output
+3	BBBB
+3	bbbb
+
+
+-- !query 13
+SELECT c, max(a) FROM test_having
+	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY c
+-- !query 13 schema
+struct<c:string,max(a):int>
+-- !query 13 output
+XXXX	0
+bbbb	5
+
+
+-- !query 14
+SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a)
+-- !query 14 schema
+struct<min(a):int,max(a):int>
+-- !query 14 output
+
+
+
+-- !query 15
+SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a)
+-- !query 15 schema
+struct<min(a):int,max(a):int>
+-- !query 15 output
+0	9
+
+
+-- !query 16
+SELECT a FROM test_having HAVING min(a) < max(a)
+-- !query 16 schema
+struct<>
+-- !query 16 output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'default.test_having.`a`' is not an aggregate function. Wrap '(min(default.test_having.`a`) AS `min(a#x)`, max(default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 17
+SELECT 1 AS one FROM test_having HAVING a > 1
+-- !query 17 schema
+struct<>
+-- !query 17 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`a`' given input columns: [one]; line 1 pos 40
+
+
+-- !query 18
+SELECT 1 AS one FROM test_having HAVING 1 > 2
+-- !query 18 schema
+struct<one:int>
+-- !query 18 output
+
+
+
+-- !query 19
+SELECT 1 AS one FROM test_having HAVING 1 < 2
+-- !query 19 schema
+struct<one:int>
+-- !query 19 output
+1
+
+
+-- !query 20
+SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2
+-- !query 20 schema
+struct<one:int>
+-- !query 20 output
+1
+
+
+-- !query 21
+DROP TABLE test_having
+-- !query 21 schema
+struct<>
+-- !query 21 output
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org