You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/07/30 03:19:23 UTC

[GitHub] [spark] maropu commented on a change in pull request #25148: [SPARK-28326][SQL][TEST] Port join.sql

maropu commented on a change in pull request #25148: [SPARK-28326][SQL][TEST] Port join.sql
URL: https://github.com/apache/spark/pull/25148#discussion_r308518298
 
 

 ##########
 File path: sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql
 ##########
 @@ -0,0 +1,2079 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- JOIN
+-- Test JOIN clauses
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/join.sql
+--
+CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
+  (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
+  AS v(f1);
+CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
+  (VALUES
+    (123, 456),
+    (123, 4567890123456789),
+    (4567890123456789, 123),
+    (4567890123456789, 4567890123456789),
+    (4567890123456789, -4567890123456789))
+  AS v(q1, q2);
+CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
+  (VALUES (0.0), (1004.30), (-34.84),
+    (cast('1.2345678901234e+200' as double)), (cast('1.2345678901234e-200' as double)))
+  AS v(f1);
+CREATE OR REPLACE TEMPORARY VIEW TEXT_TBL AS SELECT * FROM
+  (VALUES ('doh!'), ('hi de ho neighbor'))
+  AS v(f1);
+CREATE OR REPLACE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
+
+CREATE TABLE J1_TBL (
+  i integer,
+  j integer,
+  t string
+) USING parquet;
+
+CREATE TABLE J2_TBL (
+  i integer,
+  k integer
+) USING parquet;
+
+
+INSERT INTO J1_TBL VALUES (1, 4, 'one');
+INSERT INTO J1_TBL VALUES (2, 3, 'two');
+INSERT INTO J1_TBL VALUES (3, 2, 'three');
+INSERT INTO J1_TBL VALUES (4, 1, 'four');
+INSERT INTO J1_TBL VALUES (5, 0, 'five');
+INSERT INTO J1_TBL VALUES (6, 6, 'six');
+INSERT INTO J1_TBL VALUES (7, 7, 'seven');
+INSERT INTO J1_TBL VALUES (8, 8, 'eight');
+INSERT INTO J1_TBL VALUES (0, NULL, 'zero');
+INSERT INTO J1_TBL VALUES (NULL, NULL, 'null');
+INSERT INTO J1_TBL VALUES (NULL, 0, 'zero');
+
+INSERT INTO J2_TBL VALUES (1, -1);
+INSERT INTO J2_TBL VALUES (2, 2);
+INSERT INTO J2_TBL VALUES (3, -3);
+INSERT INTO J2_TBL VALUES (2, 4);
+INSERT INTO J2_TBL VALUES (5, -5);
+INSERT INTO J2_TBL VALUES (5, -5);
+INSERT INTO J2_TBL VALUES (0, NULL);
+INSERT INTO J2_TBL VALUES (NULL, NULL);
+INSERT INTO J2_TBL VALUES (NULL, 0);
+
+-- [SPARK-20856] Do not need onerow because it only used for test statement using nested joins
+-- useful in some tests below
+-- create temp table onerow();
+-- insert into onerow default values;
+-- analyze onerow;
+
+
+--
+-- CORRELATION NAMES
+-- Make sure that table/column aliases are supported
+-- before diving into more complex join syntax.
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL AS tx;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL tx;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL AS t1 (a, b, c);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL t1 (a, b, c);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e);
+
+-- [SPARK-28377] Fully support correlation names in the FROM clause
+-- SELECT '' AS "xxx", t1.a, t2.e
+--   FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e)
+--   WHERE t1.a = t2.d;
+
+
+--
+-- CROSS JOIN
+-- Qualifications are not allowed on cross joins,
+-- which degenerate into a standard unqualified inner join.
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL CROSS JOIN J2_TBL;
+
+-- ambiguous column
+SELECT '' AS `xxx`, i, k, t
+  FROM J1_TBL CROSS JOIN J2_TBL;
+
+-- resolve previous ambiguity by specifying the table name
+SELECT '' AS `xxx`, t1.i, k, t
+  FROM J1_TBL t1 CROSS JOIN J2_TBL t2;
+
+SELECT '' AS `xxx`, ii, tt, kk
+  FROM (J1_TBL CROSS JOIN J2_TBL)
+    AS tx (ii, jj, tt, ii2, kk);
+
+-- [SPARK-28377] Fully support correlation names in the FROM clause
+-- SELECT '' AS `xxx`, tx.ii, tx.jj, tx.kk
+--   FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))
+--     AS tx (ii, jj, tt, ii2, kk);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL CROSS JOIN J2_TBL a CROSS JOIN J2_TBL b;
+
+
+--
+--
+-- Inner joins (equi-joins)
+--
+--
+
+--
+-- Inner joins (equi-joins) with USING clause
+-- The USING syntax changes the shape of the resulting table
+-- by including a column in the USING clause only once in the result.
+--
+
+-- Inner equi-join on specified column
+SELECT '' AS `xxx`, *
+  FROM J1_TBL INNER JOIN J2_TBL USING (i);
+
+-- Same as above, slightly different syntax
+SELECT '' AS `xxx`, *
+  FROM J1_TBL JOIN J2_TBL USING (i);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, d) USING (a)
+  ORDER BY a, d;
+
+-- [SPARK-28377] Fully support correlation names in the FROM clause
+-- SELECT '' AS `xxx`, *
+--   FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, b) USING (b)
+--   ORDER BY b, t1.a;
+
+
+--
+-- NATURAL JOIN
+-- Inner equi-join on all columns with the same name
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL NATURAL JOIN J2_TBL;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a);
+
+-- [SPARK-28377] Fully support correlation names in the FROM clause
+-- mismatch number of columns
+-- currently, Postgres will fill in with underlying names
+-- SELECT '' AS `xxx`, *
+--   FROM J1_TBL t1 (a, b) NATURAL JOIN J2_TBL t2 (a);
+
+
+--
+-- Inner joins (equi-joins)
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.i);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.k);
+
+
+--
+-- Non-equi-joins
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i <= J2_TBL.k);
+
+
+--
+-- Outer joins
+-- Note that OUTER is a noise word
+--
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL LEFT OUTER JOIN J2_TBL USING (i)
+  ORDER BY i, k, t;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL LEFT JOIN J2_TBL USING (i)
+  ORDER BY i, k, t;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL RIGHT JOIN J2_TBL USING (i);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL FULL OUTER JOIN J2_TBL USING (i)
+  ORDER BY i, k, t;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL FULL JOIN J2_TBL USING (i)
+  ORDER BY i, k, t;
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1);
+
+SELECT '' AS `xxx`, *
+  FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1);
+
+--
+-- semijoin selectivity for <>
+--
+-- explain (costs off)
+-- select * from int4_tbl i4, tenk1 a
+-- where exists(select * from tenk1 b
+--              where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
+--       and i4.f1 = a.tenthous;
+
+
+--
+-- More complicated constructs
+--
+
+--
+-- Multiway full join
+--
+
+CREATE TABLE t1 (name STRING, n INTEGER) USING parquet;
+CREATE TABLE t2 (name STRING, n INTEGER) USING parquet;
+CREATE TABLE t3 (name STRING, n INTEGER) USING parquet;
+
+INSERT INTO t1 VALUES ( 'bb', 11 );
+INSERT INTO t2 VALUES ( 'bb', 12 );
+INSERT INTO t2 VALUES ( 'cc', 22 );
+INSERT INTO t2 VALUES ( 'ee', 42 );
+INSERT INTO t3 VALUES ( 'bb', 13 );
+INSERT INTO t3 VALUES ( 'cc', 23 );
+INSERT INTO t3 VALUES ( 'dd', 33 );
+
+SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name);
+
+--
+-- Test interactions of join syntax and subqueries
+--
+
+-- Basic cases (we expect planner to pull up the subquery here)
+SELECT * FROM
+(SELECT * FROM t2) as s2
+INNER JOIN
+(SELECT * FROM t3) s3
+USING (name);
+
+SELECT * FROM
+(SELECT * FROM t2) as s2
+LEFT JOIN
+(SELECT * FROM t3) s3
+USING (name);
+
+SELECT * FROM
+(SELECT * FROM t2) as s2
+FULL JOIN
+(SELECT * FROM t3) s3
+USING (name);
+
+-- Cases with non-nullable expressions in subquery results;
+-- make sure these go to null as expected
+SELECT * FROM
+(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+NATURAL INNER JOIN
+(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
+
+SELECT * FROM
+(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+NATURAL LEFT JOIN
+(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
+
+SELECT * FROM
+(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+NATURAL FULL JOIN
+(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
+
+SELECT * FROM
+(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
+NATURAL INNER JOIN
+(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+NATURAL INNER JOIN
+(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
+
+SELECT * FROM
+(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
+NATURAL FULL JOIN
+(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+NATURAL FULL JOIN
+(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
+
+SELECT * FROM
+(SELECT name, n as s1_n FROM t1) as s1
+NATURAL FULL JOIN
+  (SELECT * FROM
+    (SELECT name, n as s2_n FROM t2) as s2
+    NATURAL FULL JOIN
+    (SELECT name, n as s3_n FROM t3) as s3
+  ) ss2;
+
+SELECT * FROM
+(SELECT name, n as s1_n FROM t1) as s1
+NATURAL FULL JOIN
+  (SELECT * FROM
+    (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
+    NATURAL FULL JOIN
+    (SELECT name, n as s3_n FROM t3) as s3
+  ) ss2;
+
+-- Constants as join keys can also be problematic
+SELECT * FROM
+  (SELECT name, n as s1_n FROM t1) as s1
+FULL JOIN
+  (SELECT name, 2 as s2_n FROM t2) as s2
+ON (s1_n = s2_n);
+
+
+-- Test for propagation of nullability constraints into sub-joins
+
+create or replace temporary view x as select * from
+  (values (1,11), (2,22), (3,null), (4,44), (5,null))
+  as v(x1, x2);
+
+create or replace temporary view y as select * from
+  (values (1,111), (2,222), (3,333), (4,null))
+  as v(y1, y2);
+
+select * from x;
+select * from y;
+
+select * from x left join y on (x1 = y1 and x2 is not null);
+select * from x left join y on (x1 = y1 and y2 is not null);
+
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1);
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1 and x2 is not null);
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1 and y2 is not null);
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1 and xx2 is not null);
+-- these should NOT give the same answers as above
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1) where (x2 is not null);
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1) where (y2 is not null);
+select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
+on (x1 = xx1) where (xx2 is not null);
+
+--
+-- regression test: check for bug with propagation of implied equality
+-- to outside an IN
+--
+select count(*) from tenk1 a where unique1 in
+  (select unique1 from tenk1 b join tenk1 c using (unique1)
+   where b.unique2 = 42);
+
+--
+-- regression test: check for failure to generate a plan with multiple
+-- degenerate IN clauses
+--
+select count(*) from tenk1 x where
+  x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
+  x.unique1 = 0 and
+  x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
+
+-- try that with GEQO too
+-- begin;
+-- set geqo = on;
+-- set geqo_threshold = 2;
+select count(*) from tenk1 x where
+  x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
+  x.unique1 = 0 and
+  x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
+-- rollback;
+
+-- Skip this test because table b inherits from table a and we do not support this feature, see inherits.sql
+--
+-- regression test: be sure we cope with proven-dummy append rels
+--
+-- explain (costs off)
+-- select aa, bb, unique1, unique1
+--   from tenk1 right join b on aa = unique1
+--   where bb < bb and bb is null;
+
+-- select aa, bb, unique1, unique1
+--   from tenk1 right join b on aa = unique1
+--   where bb < bb and bb is null;
+
+--
+-- regression test: check handling of empty-FROM subquery underneath outer join
+--
+-- explain (costs off)
+-- select * from int8_tbl i1 left join (int8_tbl i2 join
+--   (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
+-- order by 1, 2;
+
+select * from int8_tbl i1 left join (int8_tbl i2 join
+  (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
+order by 1, 2;
+
+--
+-- regression test: check a case where join_clause_is_movable_into() gives
+-- an imprecise result, causing an assertion failure
+--
+select count(*)
+from
+  (select t3.tenthous as x1, coalesce(t1.stringu1, t2.stringu1) as x2
+   from tenk1 t1
+   left join tenk1 t2 on t1.unique1 = t2.unique1
+   join tenk1 t3 on t1.unique2 = t3.unique2) ss,
+  tenk1 t4,
+  tenk1 t5
+where t4.thousand = t5.unique1 and ss.x1 = t4.tenthous and ss.x2 = t5.stringu1;
+
+--
+-- regression test: check a case where we formerly missed including an EC
+-- enforcement clause because it was expected to be handled at scan level
+--
+-- explain (costs off)
+-- select a.f1, b.f1, t.thousand, t.tenthous from
+--   tenk1 t,
+--   (select sum(f1)+1 as f1 from int4_tbl i4a) a,
+--   (select sum(f1) as f1 from int4_tbl i4b) b
+-- where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
+
+select a.f1, b.f1, t.thousand, t.tenthous from
+  tenk1 t,
+  (select sum(f1)+1 as f1 from int4_tbl i4a) a,
+  (select sum(f1) as f1 from int4_tbl i4b) b
+where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
+
+--
+-- check a case where we formerly got confused by conflicting sort orders
+-- in redundant merge join path keys
+--
+-- explain (costs off)
+-- select * from
+--   j1_tbl full join
+--   (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
+--   on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k;
+
+select * from
+  j1_tbl full join
+  (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
+  on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k;
+
+--
+-- a different check for handling of redundant sort keys in merge joins
+--
+-- explain (costs off)
+-- select count(*) from
+--   (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x
+--   left join
+--   (select * from tenk1 y order by y.unique2) y
+--   on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2;
+
+select count(*) from
+  (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x
+  left join
+  (select * from tenk1 y order by y.unique2) y
+  on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2;
+
+
+--
+-- Clean up
+--
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+
+DROP TABLE J1_TBL;
+DROP TABLE J2_TBL;
+
+-- Both DELETE and UPDATE allow the specification of additional tables
+-- to "join" against to determine which rows should be modified.
+
+-- CREATE TEMP TABLE t1 (a int, b int);
+-- CREATE TEMP TABLE t2 (a int, b int);
+-- CREATE TEMP TABLE t3 (x int, y int);
+
+-- INSERT INTO t1 VALUES (5, 10);
+-- INSERT INTO t1 VALUES (15, 20);
+-- INSERT INTO t1 VALUES (100, 100);
+-- INSERT INTO t1 VALUES (200, 1000);
+-- INSERT INTO t2 VALUES (200, 2000);
+-- INSERT INTO t3 VALUES (5, 20);
+-- INSERT INTO t3 VALUES (6, 7);
+-- INSERT INTO t3 VALUES (7, 8);
+-- INSERT INTO t3 VALUES (500, 100);
+
+-- DELETE FROM t3 USING t1 table1 WHERE t3.x = table1.a;
+-- SELECT * FROM t3;
+-- DELETE FROM t3 USING t1 JOIN t2 USING (a) WHERE t3.x > t1.a;
+-- SELECT * FROM t3;
+-- DELETE FROM t3 USING t3 t3_other WHERE t3.x = t3_other.x AND t3.y = t3_other.y;
+-- SELECT * FROM t3;
+
+-- Test join against inheritance tree
+
+-- create temp table t2a () inherits (t2);
+
+-- insert into t2a values (200, 2001);
+
+-- select * from t1 left join t2 on (t1.a = t2.a);
+
+-- Test matching of column name with wrong alias
+
+-- select t1.x from t1 join t3 on (t1.a = t3.x);
+
+--
+-- regression test for 8.1 merge right join bug
+--
+
+create or replace temporary view tt1 as select * from
+  (values (1, 11), (2, NULL))
+  as v(tt1_id, joincol);
+
+create or replace temporary view tt2 as select * from
+  (values (21, 11), (22, 11))
+  as v(tt2_id, joincol);
+
+-- set enable_hashjoin to off;
+-- set enable_nestloop to off;
+
+-- these should give the same results
+
+select tt1.*, tt2.* from tt1 left join tt2 on tt1.joincol = tt2.joincol;
+
+select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol;
+
+-- reset enable_hashjoin;
+-- reset enable_nestloop;
+
+--
+-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
+--
+
+-- set work_mem to '64kB';
+-- set enable_mergejoin to off;
+
+-- explain (costs off)
+-- select count(*) from tenk1 a, tenk1 b
+--   where a.hundred = b.thousand and (b.fivethous % 10) < 10;
+select count(*) from tenk1 a, tenk1 b
+  where a.hundred = b.thousand and (b.fivethous % 10) < 10;
+
+-- reset work_mem;
+-- reset enable_mergejoin;
+
+--
+-- regression test for 8.2 bug with improper re-ordering of left joins
+--
+
+DROP TABLE IF EXISTS tt3;
+CREATE TABLE tt3(f1 int, f2 string) USING parquet;
+INSERT INTO tt3 SELECT x.id, repeat('xyzzy', 100) FROM range(1,10001) x;
+-- create index tt3i on tt3(f1);
+-- analyze tt3;
+
+DROP TABLE IF EXISTS tt4;
+CREATE TABLE tt4(f1 int) USING parquet;
+INSERT INTO tt4 VALUES (0),(1),(9999);
+-- analyze tt4;
+
+SELECT a.f1
+FROM tt4 a
+LEFT JOIN (
+        SELECT b.f1
+        FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1)
+        WHERE c.f1 IS NULL
+) AS d ON (a.f1 = d.f1)
+WHERE d.f1 IS NULL;
+
+--
+-- regression test for proper handling of outer joins within antijoins
+--
+
+-- create temp table tt4x(c1 int, c2 int, c3 int);
+
+-- explain (costs off)
+-- select * from tt4x t1
+-- where not exists (
+--   select 1 from tt4x t2
+--     left join tt4x t3 on t2.c3 = t3.c1
+--     left join ( select t5.c1 as c1
+--                 from tt4x t4 left join tt4x t5 on t4.c2 = t5.c1
+--               ) a1 on t3.c2 = a1.c1
+--   where t1.c1 = t2.c2
+-- );
+
+--
+-- regression test for problems of the sort depicted in bug #3494
+--
+
+create or replace temporary view tt5 as select * from
+  (values (1, 10), (1, 11))
+  as v(f1, f2);
+create or replace temporary view tt6 as select * from
+  (values (1, 9), (1, 2), (2, 9))
+  as v(f1, f2);
+
+select * from tt5,tt6 where tt5.f1 = tt6.f1 and tt5.f1 = tt5.f2 - tt6.f2;
+
+--
+-- regression test for problems of the sort depicted in bug #3588
+--
+
+create or replace temporary view xx as select * from
+  (values (1), (2), (3))
+  as v(pkxx);
+create or replace temporary view yy as select * from
+  (values (101, 1), (201, 2), (301, NULL))
+  as v(pkyy, pkxx);
+
+select yy.pkyy as yy_pkyy, yy.pkxx as yy_pkxx, yya.pkyy as yya_pkyy,
+       xxa.pkxx as xxa_pkxx, xxb.pkxx as xxb_pkxx
+from yy
+     left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
+     left join xx xxa on yya.pkxx = xxa.pkxx
+     left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx;
+
+--
+-- regression test for improper pushing of constants across outer-join clauses
+-- (as seen in early 8.2.x releases)
+--
+
+create or replace temporary view zt1 as select * from
+  (values (53))
+  as v(f1);
+create or replace temporary view zt2 as select * from
+  (values (53))
+  as v(f2);
+create or replace temporary view zt3(f3 int) using parquet;
+
+select * from
+  zt2 left join zt3 on (f2 = f3)
+      left join zt1 on (f3 = f1)
+where f2 = 53;
+
+create temp view zv1 as select *,'dummy' AS junk from zt1;
+
+select * from
+  zt2 left join zt3 on (f2 = f3)
+      left join zv1 on (f3 = f1)
+where f2 = 53;
+
+--
+-- regression test for improper extraction of OR indexqual conditions
+-- (as seen in early 8.3.x releases)
+--
+
+select a.unique2, a.ten, b.tenthous, b.unique2, b.hundred
+from tenk1 a left join tenk1 b on a.unique2 = b.tenthous
+where a.unique1 = 42 and
+      ((b.unique2 is null and a.ten = 2) or b.hundred = 3);
+
+--
+-- test proper positioning of one-time quals in EXISTS (8.4devel bug)
+--
+-- prepare foo(bool) as
+--   select count(*) from tenk1 a left join tenk1 b
+--     on (a.unique2 = b.unique1 and exists
+--         (select 1 from tenk1 c where c.thousand = b.unique2 and $1));
+-- execute foo(true);
+-- execute foo(false);
+
+--
+-- test for sane behavior with noncanonical merge clauses, per bug #4926
+--
+
+-- begin;
+
+-- set enable_mergejoin = 1;
+-- set enable_hashjoin = 0;
+-- set enable_nestloop = 0;
+
+create or replace temporary view a (i integer) using parquet;
+create or replace temporary view b (x integer, y integer) using parquet;
+
+select * from a left join b on i = x and i = y and x = i;
+
+-- rollback;
+
+--
+-- test handling of merge clauses using record_ops
+--
+-- begin;
+
+-- create type mycomptype as (id int, v bigint);
+
+-- create temp table tidv (idv mycomptype);
+-- create index on tidv (idv);
+
+-- explain (costs off)
+-- select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
+
+-- set enable_mergejoin = 0;
+
+-- explain (costs off)
+-- select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
+
+-- rollback;
+
+--
+-- test NULL behavior of whole-row Vars, per bug #5025
+--
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+-- [SPARK-28330] Enhance query limit
+-- select t1.q2, count(t2.*)
+-- from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1)
+-- group by t1.q2 order by 1;
+
+select t1.q2, count(t2.*)
+from int8_tbl t1 left join
+  (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
+  on (t1.q2 = t2.q1)
+group by t1.q2 order by 1;
+
+--
+-- test incorrect failure to NULL pulled-up subexpressions
+--
+-- begin;
+create or replace temporary view a as select * from
+  (values ('p'), ('q'))
+  as v(code);
+create or replace temporary view b as select * from
+  (values ('p', 1), ('p', 2))
+  as v(a, num);
+create or replace temporary view c as select * from
+  (values ('A', 'p'), ('B', 'q'), ('C', null))
+  as v(name, a);
+
+select c.name, ss.code, ss.b_cnt, ss.const
+from c left join
+  (select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
+   from a left join
+     (select count(1) as cnt, b.a from b group by b.a) as b_grp
+     on a.code = b_grp.a
+  ) as ss
+  on (c.a = ss.code)
+order by c.name;
+
+-- rollback;
+
+--
+-- test incorrect handling of placeholders that only appear in targetlists,
+-- per bug #6154
+--
+SELECT * FROM
+( SELECT 1 as key1 ) sub1
+LEFT JOIN
+( SELECT sub3.key3, sub4.value2, COALESCE(sub4.value2, 66) as value3 FROM
+    ( SELECT 1 as key3 ) sub3
+    LEFT JOIN
+    ( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
+        ( SELECT 1 as key5 ) sub5
+        LEFT JOIN
+        ( SELECT 2 as key6, 42 as value1 ) sub6
+        ON sub5.key5 = sub6.key6
+    ) sub4
+    ON sub4.key5 = sub3.key3
+) sub2
+ON sub1.key1 = sub2.key3;
+
+-- test the path using join aliases, too
+SELECT * FROM
+( SELECT 1 as key1 ) sub1
+LEFT JOIN
+( SELECT sub3.key3, value2, COALESCE(value2, 66) as value3 FROM
+    ( SELECT 1 as key3 ) sub3
+    LEFT JOIN
+    ( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
+        ( SELECT 1 as key5 ) sub5
+        LEFT JOIN
+        ( SELECT 2 as key6, 42 as value1 ) sub6
+        ON sub5.key5 = sub6.key6
+    ) sub4
+    ON sub4.key5 = sub3.key3
+) sub2
+ON sub1.key1 = sub2.key3;
+
+--
+-- test case where a PlaceHolderVar is used as a nestloop parameter
+--
+
+-- EXPLAIN (COSTS OFF)
+-- SELECT qq, unique1
+--   FROM
+--   ( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
+--   FULL OUTER JOIN
+--   ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
+--   USING (qq)
+--   INNER JOIN tenk1 c ON qq = unique2;
+
+SELECT qq, unique1
+  FROM
+  ( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
+  FULL OUTER JOIN
+  ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
+  USING (qq)
+  INNER JOIN tenk1 c ON qq = unique2;
+
+--
+-- nested nestloops can require nested PlaceHolderVars
+--
+
+create or replace temporary view nt1 as select * from
+  (values(1,true,true), (2,true,false), (3,false,false))
+  as v(id, a1, a2);
+create or replace temporary view nt2 as select * from
+  (values(1,1,true,true), (2,2,true,false), (3,3,false,false))
+  as v(id, nt1_id, b1, b2);
+create or replace temporary view nt3 as select * from
+  (values(1,1,true), (2,2,false), (3,3,true))
+  as v(id, nt2_id, c1);
+-- explain (costs off)
+-- select nt3.id
+-- from nt3 as nt3
+--   left join
+--     (select nt2.*, (nt2.b1 and ss1.a3) AS b3
+--      from nt2 as nt2
+--        left join
+--          (select nt1.*, (nt1.id is not null) as a3 from nt1) as ss1
+--          on ss1.id = nt2.nt1_id
+--     ) as ss2
+--     on ss2.id = nt3.nt2_id
+-- where nt3.id = 1 and ss2.b3;
+
+select nt3.id
+from nt3 as nt3
+  left join
+    (select nt2.*, (nt2.b1 and ss1.a3) AS b3
+     from nt2 as nt2
+       left join
+         (select nt1.*, (nt1.id is not null) as a3 from nt1) as ss1
+         on ss1.id = nt2.nt1_id
+    ) as ss2
+    on ss2.id = nt3.nt2_id
+where nt3.id = 1 and ss2.b3;
+
+-- [SPARK-28379] Correlated scalar subqueries must be aggregated
+--
+-- test case where a PlaceHolderVar is propagated into a subquery
+--
+
+-- explain (costs off)
+-- select * from
+--   int8_tbl t1 left join
+--   (select q1 as x, 42 as y from int8_tbl t2) ss
+--   on t1.q2 = ss.x
+-- where
+--   1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
+-- order by 1,2;
+
+-- select * from
+--   int8_tbl t1 left join
+--   (select q1 as x, 42 as y from int8_tbl t2) ss
+--   on t1.q2 = ss.x
+-- where
+--   1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
+-- order by 1,2;
+
+--
+-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
+--
+select * from int4_tbl a full join int4_tbl b on true;
+select * from int4_tbl a full join int4_tbl b on false;
+
+--
+-- test for ability to use a cartesian join when necessary
+--
+
+-- explain (costs off)
+-- select * from
+--   tenk1 join int4_tbl on f1 = twothousand,
+--   int4(sin(1)) q1,
+--   int4(sin(0)) q2
+-- where q1 = thousand or q2 = thousand;
+
+-- explain (costs off)
+-- select * from
+--   tenk1 join int4_tbl on f1 = twothousand,
+--   int4(sin(1)) q1,
+--   int4(sin(0)) q2
+-- where thousand = (q1 + q2);
+
+--
+-- test ability to generate a suitable plan for a star-schema query
+--
+
+-- explain (costs off)
+-- select * from
+--   tenk1, int8_tbl a, int8_tbl b
+-- where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2;
+
+--
+-- test a corner case in which we shouldn't apply the star-schema optimization
+--
+
+-- explain (costs off)
+-- select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
+--   tenk1 t1
+--   inner join int4_tbl i1
+--     left join (select v1.x2, v2.y1, 11 AS d1
+--                from (select 1,0 from onerow) v1(x1,x2)
+--                left join (select 3,1 from onerow) v2(y1,y2)
+--                on v1.x1 = v2.y2) subq1
+--     on (i1.f1 = subq1.x2)
+--   on (t1.unique2 = subq1.d1)
+--   left join tenk1 t2
+--   on (subq1.y1 = t2.unique1)
+-- where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
+
+-- [SPARK-20856] support statement using nested joins
+-- select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
+--   tenk1 t1
+--   inner join int4_tbl i1
+--     left join (select v1.x2, v2.y1, 11 AS d1
+--                from (select 1,0 from onerow) v1(x1,x2)
+--                left join (select 3,1 from onerow) v2(y1,y2)
+--                on v1.x1 = v2.y2) subq1
+--     on (i1.f1 = subq1.x2)
+--   on (t1.unique2 = subq1.d1)
+--   left join tenk1 t2
+--   on (subq1.y1 = t2.unique1)
+-- where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
+
+-- variant that isn't quite a star-schema case
+
+-- Spark SQL do not support information_schema.cardinal_number type
+-- select ss1.d1 from
+--   tenk1 as t1
+--   inner join tenk1 as t2
+--   on t1.tenthous = t2.ten
+--   inner join
+--     int8_tbl as i8
+--     left join int4_tbl as i4
+--       inner join (select 64::information_schema.cardinal_number as d1
+--                   from tenk1 t3,
+--                        lateral (select abs(t3.unique1) + random()) ss0(x)
+--                   where t3.fivethous < 0) as ss1
+--       on i4.f1 = ss1.d1
+--     on i8.q1 = i4.f1
+--   on t1.tenthous = ss1.d1
+-- where t1.unique1 < i4.f1;
+
+-- this variant is foldable by the remove-useless-RESULT-RTEs code
+
+-- explain (costs off)
+-- select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
+--   tenk1 t1
+--   inner join int4_tbl i1
+--     left join (select v1.x2, v2.y1, 11 AS d1
+--                from (values(1,0)) v1(x1,x2)
+--                left join (values(3,1)) v2(y1,y2)
+--                on v1.x1 = v2.y2) subq1
+--     on (i1.f1 = subq1.x2)
+--   on (t1.unique2 = subq1.d1)
+--   left join tenk1 t2
+--   on (subq1.y1 = t2.unique1)
+-- where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
+
+-- [SPARK-20856] support statement using nested joins
+-- select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
+--   tenk1 t1
+--   inner join int4_tbl i1
+--     left join (select v1.x2, v2.y1, 11 AS d1
+--                from (values(1,0)) v1(x1,x2)
+--                left join (values(3,1)) v2(y1,y2)
+--                on v1.x1 = v2.y2) subq1
+--     on (i1.f1 = subq1.x2)
+--   on (t1.unique2 = subq1.d1)
+--   left join tenk1 t2
+--   on (subq1.y1 = t2.unique1)
+-- where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
+
+--
+-- test extraction of restriction OR clauses from join OR clause
+-- (we used to only do this for indexable clauses)
+--
+
+-- explain (costs off)
+-- select * from tenk1 a join tenk1 b on
+--   (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4);
+-- explain (costs off)
+-- select * from tenk1 a join tenk1 b on
+--   (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4);
+-- explain (costs off)
+-- select * from tenk1 a join tenk1 b on
+--   (a.unique1 = 1 and b.unique1 = 2) or
+--   ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+
+--
+-- test placement of movable quals in a parameterized join tree
+--
+
+-- explain (costs off)
+-- select * from tenk1 t1 left join
+--   (tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
+--   on t1.hundred = t2.hundred and t1.ten = t3.ten
+-- where t1.unique1 = 1;
+
+-- explain (costs off)
+-- select * from tenk1 t1 left join
+--   (tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
+--   on t1.hundred = t2.hundred and t1.ten + t2.ten = t3.ten
+-- where t1.unique1 = 1;
+
+-- explain (costs off)
+-- select count(*) from
+--   tenk1 a join tenk1 b on a.unique1 = b.unique2
+--   left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
+--   join int4_tbl on b.thousand = f1;
+
+select count(*) from
+  tenk1 a join tenk1 b on a.unique1 = b.unique2
+  left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
+  join int4_tbl on b.thousand = f1;
+
+-- explain (costs off)
+-- select b.unique1 from
+--   tenk1 a join tenk1 b on a.unique1 = b.unique2
+--   left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
+--   join int4_tbl i1 on b.thousand = f1
+--   right join int4_tbl i2 on i2.f1 = b.tenthous
+--   order by 1;
+
+select b.unique1 from
+  tenk1 a join tenk1 b on a.unique1 = b.unique2
+  left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
+  join int4_tbl i1 on b.thousand = f1
+  right join int4_tbl i2 on i2.f1 = b.tenthous
+  order by 1;
+
+-- explain (costs off)
+-- select * from
+-- (
+--   select unique1, q1, coalesce(unique1, -1) + q1 as fault
+--   from int8_tbl left join tenk1 on (q2 = unique2)
+-- ) ss
+-- where fault = 122
+-- order by fault;
+
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+
+-- explain (costs off)
+-- select * from
+-- (values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
+-- left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+-- left join unnest(v1ys) as u1(u1y) on u1y = v2y;
+
+-- [SPARK-28382] Array Functions: unnest
+-- select * from
+-- (values (1, array(10,20)), (2, array(20,30))) as v1(v1x,v1ys)
+-- left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+-- left join unnest(v1ys) as u1(u1y) on u1y = v2y;
+
+--
+-- test handling of potential equivalence clauses above outer joins
+--
+
+-- explain (costs off)
+-- select q1, unique2, thousand, hundred
+--   from int8_tbl a left join tenk1 b on q1 = unique2
+--   where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
+
+select q1, unique2, thousand, hundred
+  from int8_tbl a left join tenk1 b on q1 = unique2
+  where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
+
+-- explain (costs off)
+-- select f1, unique2, case when unique2 is null then f1 else 0 end
+--   from int4_tbl a left join tenk1 b on f1 = unique2
+--   where (case when unique2 is null then f1 else 0 end) = 0;
+
+select f1, unique2, case when unique2 is null then f1 else 0 end
+  from int4_tbl a left join tenk1 b on f1 = unique2
+  where (case when unique2 is null then f1 else 0 end) = 0;
+
+--
+-- another case with equivalence clauses above outer joins (bug #8591)
+--
+
+-- explain (costs off)
+-- select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+--   from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+--   where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;
+
+select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+  where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;
+
+--
+-- check handling of join aliases when flattening multiple levels of subquery
+--
+
+-- explain (verbose, costs off)
+-- select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
+--   (values (0),(1)) foo1(join_key)
+-- left join
+--   (select join_key, bug_field from
+--     (select ss1.join_key, ss1.bug_field from
+--       (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1
+--     ) foo2
+--    left join
+--     (select unique2 as join_key from tenk1 i2) ss2
+--    using (join_key)
+--   ) foo3
+-- using (join_key);
+
+
+-- [SPARK-28377] Fully support correlation names in the FROM clause
+-- select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
+--   (values (0),(1)) foo1(join_key)
+-- left join
+--   (select join_key, bug_field from
+--     (select ss1.join_key, ss1.bug_field from
+--       (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1
+--     ) foo2
+--    left join
+--     (select unique2 as join_key from tenk1 i2) ss2
+--    using (join_key)
+--   ) foo3
+-- using (join_key);
+
+-- [SPARK-20856] Support statement using nested joins
+--
+-- test successful handling of nested outer joins with degenerate join quals
+--
+
+-- explain (verbose, costs off)
+-- select t1.* from
+--   text_tbl t1
+--   left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
+--     left join int8_tbl i8
+--       left join (select *, null::int as d2 from int8_tbl i8b2) b2
+--       on (i8.q1 = b2.q1)
+--     on (b2.d2 = b1.q2)
+--   on (t1.f1 = b1.d1)
+--   left join int4_tbl i4
+--   on (i8.q2 = i4.f1);
+
+-- select t1.* from
 
 Review comment:
   skip this case for correlation names, too?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org