You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by gatorsmile <gi...@git.apache.org> on 2017/01/01 18:53:00 UTC

[GitHub] spark pull request #16337: [SPARK-18871][SQL] New test cases for IN/NOT IN s...

Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16337#discussion_r94287611
  
    --- Diff: sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out ---
    @@ -0,0 +1,213 @@
    +-- Automatically generated by SQLQueryTestSuite
    +-- Number of queries: 12
    +
    +
    +-- !query 0
    +create temporary view t1 as select * from values
    +  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
    +  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
    +  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
    +  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
    +  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
    +  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
    +  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
    +  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
    +  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
    +  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
    +  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
    +  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
    +  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
    +-- !query 0 schema
    +struct<>
    +-- !query 0 output
    +
    +
    +
    +-- !query 1
    +create temporary view t2 as select * from values
    +  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
    +  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
    +  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
    +  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
    +  ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
    +  ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
    +  ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
    +  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
    +  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
    +  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
    +  ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
    +  ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
    +  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
    +  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
    +-- !query 1 schema
    +struct<>
    +-- !query 1 output
    +
    +
    +
    +-- !query 2
    +create temporary view t3 as select * from values
    +  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
    +  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
    +  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
    +  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
    +  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
    +  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
    +  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
    +  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
    +  ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
    +  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
    +  ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
    +  ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
    +  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
    +-- !query 2 schema
    +struct<>
    +-- !query 2 output
    +
    +
    +
    +-- !query 3
    +SELECT *
    +FROM   t1
    +WHERE  t1a IN (SELECT t2a
    +               FROM   t2)
    +-- !query 3 schema
    +struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
    +-- !query 3 output
    +t1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
    +t1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
    +t1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
    +t1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
    +t1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
    +
    +
    +-- !query 4
    +SELECT *
    +FROM   t1
    +WHERE  t1b IN (SELECT t2b
    +               FROM   t2
    +               WHERE  t1a = t2a)
    +-- !query 4 schema
    +struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
    +-- !query 4 output
    +t1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
    +
    +
    +-- !query 5
    +SELECT t1a,
    +       t1b
    +FROM   t1
    +WHERE  t1c IN (SELECT t2b
    +               FROM   t2
    +               WHERE  t1a != t2a)
    +-- !query 5 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 5 output
    +t1a	16
    +t1a	16
    +t1a	6
    +t1a	6
    +
    +
    +-- !query 6
    +SELECT t1a,
    +       t1b
    +FROM   t1
    +WHERE  t1c IN (SELECT t2b
    +               FROM   t2
    +               WHERE  t1a = t2a
    +                       OR t1b > t2b)
    +-- !query 6 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 6 output
    +t1a	16
    +t1a	16
    +
    +
    +-- !query 7
    +SELECT t1a,
    +       t1b
    +FROM   t1
    +WHERE  t1c IN (SELECT t2b
    +               FROM   t2
    +               WHERE  t2i IN (SELECT t3i
    +                              FROM   t3
    +                              WHERE  t2c = t3c))
    +-- !query 7 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 7 output
    +t1a	6
    +t1a	6
    +
    +
    +-- !query 8
    +SELECT t1a,
    +       t1b
    +FROM   t1
    +WHERE  t1c IN (SELECT t2b
    +               FROM   t2
    +               WHERE  t2a IN (SELECT t3a
    +                              FROM   t3
    +                              WHERE  t2c = t3c
    +                                     AND t2b IS NOT NULL))
    +-- !query 8 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 8 output
    +t1a	6
    +t1a	6
    +
    +
    +-- !query 9
    +SELECT DISTINCT( t1a ),
    +               t1b,
    +               t1h
    +FROM   t1
    +WHERE  t1a NOT IN (SELECT t2a
    +                   FROM   t2)
    +-- !query 9 schema
    +struct<t1a:string,t1b:smallint,t1h:timestamp>
    +-- !query 9 output
    +t1a	16	2014-06-04 01:02:00.001
    +t1a	16	2014-07-04 01:01:00
    +t1a	6	2014-04-04 01:00:00
    +t1a	6	2014-04-04 01:02:00.001
    +t1d	10	2015-05-04 01:01:00
    +t1d	NULL	2014-06-04 01:01:00
    +t1d	NULL	2014-07-04 01:02:00.001
    +
    +
    +-- !query 10
    +SELECT DISTINCT( t1a ),
    +               t1b
    +FROM   t1
    +WHERE  t1b NOT IN (SELECT t2b
    +                   FROM   t2
    +                   WHERE  t1a < t2a
    +                          AND t2b > 8)
    +-- !query 10 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 10 output
    +t1a	16
    +t1a	6
    +t1b	8
    +t1c	8
    +t1d	10
    +t1e	10
    +
    +
    +-- !query 11
    +SELECT t1a,
    +       t1b
    +FROM   t1
    +WHERE  t1h NOT IN (SELECT t2h
    +                   FROM   t2
    +                   WHERE  t2a = t1a)
    +       AND t1b NOT IN ((SELECT Min(t3b)
    +                        FROM   t3
    +                        WHERE  t3d = t1d))
    +-- !query 11 schema
    +struct<t1a:string,t1b:smallint>
    +-- !query 11 output
    +t1a	16
    +t1e	10
    +t1e	10
    --- End diff --
    
    The result do not match with the one from DB2, right? 
    ```
    select t1a, t1b from t1 where t1h not in (select t2h from t2 where t2a = t1a) and t1b not in ( (select min(t3b) from t3 where t3d = t1d))
    
    T1A                            T1B   
    ------------------------------ ------
    t1a                                16
    t1e                                10
    
      2 record(s) selected.
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org