You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/01/06 03:00:46 UTC
spark git commit: [SPARK-18871][SQL] New test cases for IN/NOT IN
subquery
Repository: spark
Updated Branches:
refs/heads/master dfc4c935b -> bcc510b02
[SPARK-18871][SQL] New test cases for IN/NOT IN subquery
## What changes were proposed in this pull request?
This PR extends the existing IN/NOT IN subquery test cases coverage, adds more test cases to the IN subquery test suite.
Based on the discussion, we will create `subquery/in-subquery` sub structure under `sql/core/src/test/resources/sql-tests/inputs` directory.
This is the high level grouping for IN subquery:
`subquery/in-subquery/`
`subquery/in-subquery/simple-in.sql`
`subquery/in-subquery/in-group-by.sql (in parent side, subquery, and both)`
`subquery/in-subquery/not-in-group-by.sql`
`subquery/in-subquery/in-order-by.sql`
`subquery/in-subquery/in-limit.sql`
`subquery/in-subquery/in-having.sql`
`subquery/in-subquery/in-joins.sql`
`subquery/in-subquery/not-in-joins.sql`
`subquery/in-subquery/in-set-operations.sql`
`subquery/in-subquery/in-with-cte.sql`
`subquery/in-subquery/not-in-with-cte.sql`
subquery/in-subquery/in-multiple-columns.sql`
We will deliver it through multiple prs, this is the first pr for the IN subquery, it has
`subquery/in-subquery/simple-in.sql`
`subquery/in-subquery/in-group-by.sql (in parent side, subquery, and both)`
These are the results from running on DB2.
[Modified test file of in-group-by.sql used to run on DB2](https://github.com/apache/spark/files/683367/in-group-by.sql.db2.txt)
[Output of the run result on DB2](https://github.com/apache/spark/files/683362/in-group-by.sql.db2.out.txt)
[Modified test file of simple-in.sql used to run on DB2](https://github.com/apache/spark/files/683378/simple-in.sql.db2.txt)
[Output of the run result on DB2](https://github.com/apache/spark/files/683379/simple-in.sql.db2.out.txt)
## How was this patch tested?
This patch is adding tests.
Author: Kevin Yu <qy...@us.ibm.com>
Closes #16337 from kevinyu98/spark-18871.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcc510b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcc510b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcc510b0
Branch: refs/heads/master
Commit: bcc510b021391035abe6d07c5b82bb0f0be31167
Parents: dfc4c93
Author: Kevin Yu <qy...@us.ibm.com>
Authored: Thu Jan 5 19:00:39 2017 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Jan 5 19:00:39 2017 -0800
----------------------------------------------------------------------
.../inputs/subquery/in-subquery/in-group-by.sql | 239 +++++++++++++
.../inputs/subquery/in-subquery/simple-in.sql | 112 ++++++
.../subquery/in-subquery/in-group-by.sql.out | 357 +++++++++++++++++++
.../subquery/in-subquery/simple-in.sql.out | 176 +++++++++
4 files changed, 884 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bcc510b0/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
new file mode 100644
index 0000000..b1d96b3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
@@ -0,0 +1,239 @@
+-- A test suite for GROUP BY in parent side, subquery, and both predicate subquery
+-- It includes correlated cases.
+
+create temporary view t1 as select * from values
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+ ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+ ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+ ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
+ ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
+ ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+ ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
+
+create temporary view t2 as select * from values
+ ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+ ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
+ ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+ ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+ ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+create temporary view t3 as select * from values
+ ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+ ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+ ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+ ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+ ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
+ ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
+
+-- correlated IN subquery
+-- GROUP BY in parent side
+-- TC 01.01
+SELECT t1a,
+ Avg(t1b)
+FROM t1
+WHERE t1a IN (SELECT t2a
+ FROM t2)
+GROUP BY t1a;
+
+-- TC 01.02
+SELECT t1a,
+ Max(t1b)
+FROM t1
+WHERE t1b IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1d;
+
+-- TC 01.03
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1b;
+
+-- TC 01.04
+SELECT t1a,
+ Sum(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+ OR t1c IN (SELECT t3c
+ FROM t3
+ WHERE t1a = t3a)
+GROUP BY t1a,
+ t1c;
+
+-- TC 01.05
+SELECT t1a,
+ Sum(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+ AND t1c IN (SELECT t3c
+ FROM t3
+ WHERE t1a = t3a)
+GROUP BY t1a,
+ t1c;
+
+-- TC 01.06
+SELECT t1a,
+ Count(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1c
+HAVING t1a = "t1b";
+
+-- GROUP BY in subquery
+-- TC 01.07
+SELECT *
+FROM t1
+WHERE t1b IN (SELECT Max(t2b)
+ FROM t2
+ GROUP BY t2a);
+
+-- TC 01.08
+SELECT *
+FROM (SELECT t2a,
+ t2b
+ FROM t2
+ WHERE t2a IN (SELECT t1a
+ FROM t1
+ WHERE t1b = t2b)
+ GROUP BY t2a,
+ t2b) t2;
+
+-- TC 01.09
+SELECT Count(DISTINCT( * ))
+FROM t1
+WHERE t1b IN (SELECT Min(t2b)
+ FROM t2
+ WHERE t1a = t2a
+ AND t1c = t2c
+ GROUP BY t2a);
+
+-- TC 01.10
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT Max(t2c)
+ FROM t2
+ WHERE t1a = t2a
+ GROUP BY t2a,
+ t2c
+ HAVING t2c > 8);
+
+-- TC 01.11
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t2a IN (SELECT Min(t3a)
+ FROM t3
+ WHERE t3a = t2a
+ GROUP BY t3b)
+ GROUP BY t2c);
+
+-- GROUP BY in both
+-- TC 01.12
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+GROUP BY t1a;
+
+-- TC 01.13
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b IN (SELECT Min(t3b)
+ FROM t3
+ WHERE t2a = t3a
+ GROUP BY t3a)
+ GROUP BY t2c)
+GROUP BY t1a,
+ t1d;
+
+-- TC 01.14
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+ AND t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d)
+GROUP BY t1a;
+
+-- TC 01.15
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+ OR t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d)
+GROUP BY t1a;
+
+-- TC 01.16
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a
+ HAVING t2a > t1a)
+ OR t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d
+ HAVING t3d = t1d)
+GROUP BY t1a
+HAVING Min(t1b) IS NOT NULL;
+
+
+
http://git-wip-us.apache.org/repos/asf/spark/blob/bcc510b0/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
new file mode 100644
index 0000000..20370b0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
@@ -0,0 +1,112 @@
+-- A test suite for simple IN predicate subquery
+-- It includes correlated cases.
+
+create temporary view t1 as select * from values
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+ ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+ ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+ ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
+ ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
+ ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+ ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
+
+create temporary view t2 as select * from values
+ ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+ ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
+ ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+ ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+ ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+create temporary view t3 as select * from values
+ ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+ ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+ ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+ ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+ ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
+ ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
+
+-- correlated IN subquery
+-- simple select
+-- TC 01.01
+SELECT *
+FROM t1
+WHERE t1a IN (SELECT t2a
+ FROM t2);
+
+-- TC 01.02
+SELECT *
+FROM t1
+WHERE t1b IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a);
+
+-- TC 01.03
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t1a != t2a);
+
+-- TC 01.04
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a
+ OR t1b > t2b);
+
+-- TC 01.05
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t2i IN (SELECT t3i
+ FROM t3
+ WHERE t2c = t3c));
+
+-- TC 01.06
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t2a IN (SELECT t3a
+ FROM t3
+ WHERE t2c = t3c
+ AND t2b IS NOT NULL));
+
+-- simple select for NOT IN
+-- TC 01.07
+SELECT DISTINCT( t1a ),
+ t1b,
+ t1h
+FROM t1
+WHERE t1a NOT IN (SELECT t2a
+ FROM t2);
+
+
http://git-wip-us.apache.org/repos/asf/spark/blob/bcc510b0/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
new file mode 100644
index 0000000..a159aa8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
@@ -0,0 +1,357 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 19
+
+
+-- !query 0
+create temporary view t1 as select * from values
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+ ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+ ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+ ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
+ ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
+ ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+ ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+ ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+ ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
+ ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+ ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+ ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+ ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+ ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+ ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+ ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+ ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
+ ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT t1a,
+ Avg(t1b)
+FROM t1
+WHERE t1a IN (SELECT t2a
+ FROM t2)
+GROUP BY t1a
+-- !query 3 schema
+struct<t1a:string,avg(t1b):double>
+-- !query 3 output
+t1b 8.0
+t1c 8.0
+t1e 10.0
+
+
+-- !query 4
+SELECT t1a,
+ Max(t1b)
+FROM t1
+WHERE t1b IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1d
+-- !query 4 schema
+struct<t1a:string,max(t1b):smallint>
+-- !query 4 output
+t1b 8
+
+
+-- !query 5
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1b
+-- !query 5 schema
+struct<t1a:string,t1b:smallint>
+-- !query 5 output
+t1b 8
+t1c 8
+
+
+-- !query 6
+SELECT t1a,
+ Sum(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+ OR t1c IN (SELECT t3c
+ FROM t3
+ WHERE t1a = t3a)
+GROUP BY t1a,
+ t1c
+-- !query 6 schema
+struct<t1a:string,sum(DISTINCT t1b):bigint>
+-- !query 6 output
+t1b 8
+t1c 8
+
+
+-- !query 7
+SELECT t1a,
+ Sum(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+ AND t1c IN (SELECT t3c
+ FROM t3
+ WHERE t1a = t3a)
+GROUP BY t1a,
+ t1c
+-- !query 7 schema
+struct<t1a:string,sum(DISTINCT t1b):bigint>
+-- !query 7 output
+t1b 8
+
+
+-- !query 8
+SELECT t1a,
+ Count(DISTINCT( t1b ))
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t1a = t2a)
+GROUP BY t1a,
+ t1c
+HAVING t1a = "t1b"
+-- !query 8 schema
+struct<t1a:string,count(DISTINCT t1b):bigint>
+-- !query 8 output
+t1b 1
+
+
+-- !query 9
+SELECT *
+FROM t1
+WHERE t1b IN (SELECT Max(t2b)
+ FROM t2
+ GROUP BY t2a)
+-- !query 9 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 9 output
+t1a 6 8 10 15.0 20.0 2000 2014-04-04 01:00:00 2014-04-04
+t1a 6 8 10 15.0 20.0 2000 2014-04-04 01:02:00.001 2014-04-04
+t1b 8 16 19 17.0 25.0 2600 2014-05-04 01:01:00 2014-05-04
+t1c 8 16 19 17.0 25.0 2600 2014-05-04 01:02:00.001 2014-05-05
+t1d 10 NULL 12 17.0 25.0 2600 2015-05-04 01:01:00 2015-05-04
+t1e 10 NULL 19 17.0 25.0 2600 2014-05-04 01:01:00 2014-05-04
+t1e 10 NULL 19 17.0 25.0 2600 2014-09-04 01:02:00.001 2014-09-04
+t1e 10 NULL 25 17.0 25.0 2600 2014-08-04 01:01:00 2014-08-04
+
+
+-- !query 10
+SELECT *
+FROM (SELECT t2a,
+ t2b
+ FROM t2
+ WHERE t2a IN (SELECT t1a
+ FROM t1
+ WHERE t1b = t2b)
+ GROUP BY t2a,
+ t2b) t2
+-- !query 10 schema
+struct<t2a:string,t2b:smallint>
+-- !query 10 output
+t1b 8
+
+
+-- !query 11
+SELECT Count(DISTINCT( * ))
+FROM t1
+WHERE t1b IN (SELECT Min(t2b)
+ FROM t2
+ WHERE t1a = t2a
+ AND t1c = t2c
+ GROUP BY t2a)
+-- !query 11 schema
+struct<count(DISTINCT t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i):bigint>
+-- !query 11 output
+1
+
+
+-- !query 12
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT Max(t2c)
+ FROM t2
+ WHERE t1a = t2a
+ GROUP BY t2a,
+ t2c
+ HAVING t2c > 8)
+-- !query 12 schema
+struct<t1a:string,t1b:smallint>
+-- !query 12 output
+t1b 8
+t1c 8
+
+
+-- !query 13
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2c
+ FROM t2
+ WHERE t2a IN (SELECT Min(t3a)
+ FROM t3
+ WHERE t3a = t2a
+ GROUP BY t3b)
+ GROUP BY t2c)
+-- !query 13 schema
+struct<t1a:string,t1b:smallint>
+-- !query 13 output
+t1a 16
+t1a 16
+t1b 8
+t1c 8
+t1d NULL
+t1d NULL
+
+
+-- !query 14
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+GROUP BY t1a
+-- !query 14 schema
+struct<t1a:string,min(t1b):smallint>
+-- !query 14 output
+t1b 8
+t1c 8
+
+
+-- !query 15
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b IN (SELECT Min(t3b)
+ FROM t3
+ WHERE t2a = t3a
+ GROUP BY t3a)
+ GROUP BY t2c)
+GROUP BY t1a,
+ t1d
+-- !query 15 schema
+struct<t1a:string,min(t1b):smallint>
+-- !query 15 output
+t1b 8
+t1c 8
+t1d NULL
+t1d NULL
+
+
+-- !query 16
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+ AND t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d)
+GROUP BY t1a
+-- !query 16 schema
+struct<t1a:string,min(t1b):smallint>
+-- !query 16 output
+t1b 8
+t1c 8
+
+
+-- !query 17
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a)
+ OR t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d)
+GROUP BY t1a
+-- !query 17 schema
+struct<t1a:string,min(t1b):smallint>
+-- !query 17 output
+t1a 16
+t1b 8
+t1c 8
+t1d NULL
+
+
+-- !query 18
+SELECT t1a,
+ Min(t1b)
+FROM t1
+WHERE t1c IN (SELECT Min(t2c)
+ FROM t2
+ WHERE t2b = t1b
+ GROUP BY t2a
+ HAVING t2a > t1a)
+ OR t1d IN (SELECT t3d
+ FROM t3
+ WHERE t1c = t3c
+ GROUP BY t3d
+ HAVING t3d = t1d)
+GROUP BY t1a
+HAVING Min(t1b) IS NOT NULL
+-- !query 18 schema
+struct<t1a:string,min(t1b):smallint>
+-- !query 18 output
+t1a 16
+t1b 8
+t1c 8
http://git-wip-us.apache.org/repos/asf/spark/blob/bcc510b0/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
new file mode 100644
index 0000000..66493d7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
@@ -0,0 +1,176 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+create temporary view t1 as select * from values
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+ ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+ ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+ ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
+ ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
+ ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+ ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+ ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+ ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+ ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+ ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
+ ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+ ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+ ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+ ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+ ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+ ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+ ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+ ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+ ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+ ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
+ ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
+ ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+ ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT *
+FROM t1
+WHERE t1a IN (SELECT t2a
+ FROM t2)
+-- !query 3 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 3 output
+t1b 8 16 19 17.0 25.0 2600 2014-05-04 01:01:00 2014-05-04
+t1c 8 16 19 17.0 25.0 2600 2014-05-04 01:02:00.001 2014-05-05
+t1e 10 NULL 19 17.0 25.0 2600 2014-05-04 01:01:00 2014-05-04
+t1e 10 NULL 19 17.0 25.0 2600 2014-09-04 01:02:00.001 2014-09-04
+t1e 10 NULL 25 17.0 25.0 2600 2014-08-04 01:01:00 2014-08-04
+
+
+-- !query 4
+SELECT *
+FROM t1
+WHERE t1b IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a)
+-- !query 4 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 4 output
+t1b 8 16 19 17.0 25.0 2600 2014-05-04 01:01:00 2014-05-04
+
+
+-- !query 5
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t1a != t2a)
+-- !query 5 schema
+struct<t1a:string,t1b:smallint>
+-- !query 5 output
+t1a 16
+t1a 16
+t1a 6
+t1a 6
+
+
+-- !query 6
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t1a = t2a
+ OR t1b > t2b)
+-- !query 6 schema
+struct<t1a:string,t1b:smallint>
+-- !query 6 output
+t1a 16
+t1a 16
+
+
+-- !query 7
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t2i IN (SELECT t3i
+ FROM t3
+ WHERE t2c = t3c))
+-- !query 7 schema
+struct<t1a:string,t1b:smallint>
+-- !query 7 output
+t1a 6
+t1a 6
+
+
+-- !query 8
+SELECT t1a,
+ t1b
+FROM t1
+WHERE t1c IN (SELECT t2b
+ FROM t2
+ WHERE t2a IN (SELECT t3a
+ FROM t3
+ WHERE t2c = t3c
+ AND t2b IS NOT NULL))
+-- !query 8 schema
+struct<t1a:string,t1b:smallint>
+-- !query 8 output
+t1a 6
+t1a 6
+
+
+-- !query 9
+SELECT DISTINCT( t1a ),
+ t1b,
+ t1h
+FROM t1
+WHERE t1a NOT IN (SELECT t2a
+ FROM t2)
+-- !query 9 schema
+struct<t1a:string,t1b:smallint,t1h:timestamp>
+-- !query 9 output
+t1a 16 2014-06-04 01:02:00.001
+t1a 16 2014-07-04 01:01:00
+t1a 6 2014-04-04 01:00:00
+t1a 6 2014-04-04 01:02:00.001
+t1d 10 2015-05-04 01:01:00
+t1d NULL 2014-06-04 01:01:00
+t1d NULL 2014-07-04 01:02:00.001
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org