You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/18 15:51:04 UTC

[spark] branch master updated: [SPARK-28138][SQL][TEST] Port timestamp.sql

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8acc22c  [SPARK-28138][SQL][TEST] Port timestamp.sql
8acc22c is described below

commit 8acc22ca64e9f0f5d621b8bbf5e70ca4e4c55927
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Thu Jul 18 08:50:31 2019 -0700

    [SPARK-28138][SQL][TEST] Port timestamp.sql
    
    ## What changes were proposed in this pull request?
    
    This PR is to port timestamp.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/timestamp.sql
    
    The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/timestamp.out
    
    When porting the test cases, found five PostgreSQL specific features that do not exist in Spark SQL:
    [SPARK-28141](https://issues.apache.org/jira/browse/SPARK-28141): Timestamp type can not accept special values
    [SPARK-28259](https://issues.apache.org/jira/browse/SPARK-28259): Date/Time Output Styles and Date Order Conventions
    [SPARK-28425](https://issues.apache.org/jira/browse/SPARK-28425): Add more Date/Time Operators
    [SPARK-28420](https://issues.apache.org/jira/browse/SPARK-28420): Date/Time Functions: date_part
    [SPARK-28137](https://issues.apache.org/jira/browse/SPARK-28137): Data Type Formatting Functions
    [SPARK-28432](https://issues.apache.org/jira/browse/SPARK-28432): Date/Time Functions: make_date/make_timestamp
    
    Also, found one inconsistent behavior:
    [SPARK-27923](https://issues.apache.org/jira/browse/SPARK-27923): Spark SQL insert bad inputs to NULL
    
    ## How was this patch tested?
    
    N/A
    
    Closes #25181 from wangyum/SPARK-28138.
    
    Authored-by: Yuming Wang <yu...@ebay.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../resources/sql-tests/inputs/pgSQL/timestamp.sql | 247 +++++++++++++++++++++
 .../sql-tests/results/pgSQL/timestamp.sql.out      | 130 +++++++++++
 2 files changed, 377 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
new file mode 100644
index 0000000..02af15a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
@@ -0,0 +1,247 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- TIMESTAMP
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/timestamp.sql
+
+CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
+
+-- [SPARK-28141] Timestamp type can not accept special values
+-- Test shorthand input values
+-- We can't just "select" the results since they aren't constants; test for
+-- equality instead.  We can do that by running the test inside a transaction
+-- block, within which the value of 'now' shouldn't change.  We also check
+-- that 'now' *does* change over a reasonable interval such as 100 msec.
+-- NOTE: it is possible for this part of the test to fail if the transaction
+-- block is entered exactly at local midnight; then 'now' and 'today' have
+-- the same values and the counts will come out different.
+
+-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+-- SELECT pg_sleep(0.1);
+
+-- BEGIN;
+
+-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('today');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
+-- time zone should be ignored by this data type
+-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow zulu');
+
+-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
+-- SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
+-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
+-- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
+
+-- COMMIT;
+
+-- DELETE FROM TIMESTAMP_TBL;
+
+-- verify uniform transaction time within transaction block
+-- BEGIN;
+-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+-- SELECT pg_sleep(0.1);
+-- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+-- SELECT pg_sleep(0.1);
+-- SELECT count(*) AS two FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
+-- COMMIT;
+
+-- TRUNCATE TIMESTAMP_TBL;
+
+-- Special values
+-- INSERT INTO TIMESTAMP_TBL VALUES ('-infinity');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('infinity');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
+-- [SPARK-27923] Spark SQL insert there obsolete special values to NULL
+-- Obsolete special values
+-- INSERT INTO TIMESTAMP_TBL VALUES ('invalid');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('undefined');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('current');
+
+-- [SPARK-28259] Date/Time Output Styles and Date Order Conventions
+-- Postgres v6.0 standard output format
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01 1997 PST');
+
+-- Variations on Postgres v6.1 standard output format
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.000001 1997 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.999999 1997 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.4 1997 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.5 1997 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.6 1997 PST');
+
+-- ISO 8601 format
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02');
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05');
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-0800');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01 -08:00');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('19970210 173201 -0800');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997-06-10 17:32:01 -07:00');
+INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20');
+
+-- POSIX format (note that the timezone abbrev is just decoration here)
+-- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 08:14:01 GMT+8');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 13:14:02 GMT-1');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 12:14:03 GMT-2');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 03:14:04 PST+8');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 02:14:05 MST+7:00');
+
+-- Variations for acceptable input formats
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 10 17:32:01 1997 -0800');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 10 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 10 5:32PM 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997/02/10 17:32:01-0800');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb-10-1997 17:32:01 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('02-10-1997 17:32:01 PST');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('19970210 173201 PST');
+-- set datestyle to ymd;
+-- INSERT INTO TIMESTAMP_TBL VALUES ('97FEB10 5:32:01PM UTC');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('97/02/10 17:32:01 UTC');
+-- reset datestyle;
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997.041 17:32:01 UTC');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('19970210 173201 America/New_York');
+-- this fails (even though TZ is a no-op, we still look it up)
+-- INSERT INTO TIMESTAMP_TBL VALUES ('19970710 173201 America/Does_not_exist');
+
+-- Check date conversion and date arithmetic
+-- INSERT INTO TIMESTAMP_TBL VALUES ('1997-06-10 18:32:01 PDT');
+
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 10 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 11 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 12 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 13 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 14 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 15 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1997');
+
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 0097 BC');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 0097');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 0597');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1097');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1697');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1797');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1897');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 2097');
+
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 28 17:32:01 1996');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 29 17:32:01 1996');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mar 01 17:32:01 1996');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 30 17:32:01 1996');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 1996');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 28 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 29 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Mar 01 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 30 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 1997');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 1999');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2000');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 2000');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2001');
+
+-- Currently unsupported syntax and ranges
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 -0097');
+-- INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 5097 BC');
+
+SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL;
+
+-- [SPARK-28253] Date/Timestamp type have different low value and high value with Spark
+-- Check behavior at the lower boundary of the timestamp range
+-- SELECT '4714-11-24 00:00:00 BC'::timestamp;
+-- SELECT '4714-11-23 23:59:59 BC'::timestamp;  -- out of range
+-- The upper boundary differs between integer and float timestamps, so no check
+
+-- Demonstrate functions and operators
+SELECT '' AS `48`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 > timestamp '1997-01-02';
+
+SELECT '' AS `15`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 < timestamp '1997-01-02';
+
+SELECT '' AS one, d1 FROM TIMESTAMP_TBL
+   WHERE d1 = timestamp '1997-01-02';
+
+SELECT '' AS `63`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 != timestamp '1997-01-02';
+
+SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 <= timestamp '1997-01-02';
+
+SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 >= timestamp '1997-01-02';
+
+-- [SPARK-28425] Add more Date/Time Operators
+-- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+--    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc;
+
+-- [SPARK-28425] Add more Date/Time Operators
+-- Test casting within a BETWEEN qualifier
+-- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+--   FROM TIMESTAMP_TBL
+--   WHERE d1 BETWEEN timestamp '1902-01-01'
+--    AND timestamp '2038-01-01';
+
+-- [SPARK-28420] Date/Time Functions: date_part
+-- SELECT '' AS "54", d1 as "timestamp",
+--    date_part( 'year', d1) AS year, date_part( 'month', d1) AS month,
+--    date_part( 'day', d1) AS day, date_part( 'hour', d1) AS hour,
+--    date_part( 'minute', d1) AS minute, date_part( 'second', d1) AS second
+--    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+-- SELECT '' AS "54", d1 as "timestamp",
+--    date_part( 'quarter', d1) AS quarter, date_part( 'msec', d1) AS msec,
+--    date_part( 'usec', d1) AS usec
+--    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+-- SELECT '' AS "54", d1 as "timestamp",
+--    date_part( 'isoyear', d1) AS isoyear, date_part( 'week', d1) AS week,
+--    date_part( 'dow', d1) AS dow
+--    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+-- [SPARK-28137] Data Type Formatting Functions
+-- TO_CHAR()
+-- SELECT '' AS to_char_1, to_char(d1, 'DAY Day day DY Dy dy MONTH Month month RM MON Mon mon')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_2, to_char(d1, 'FMDAY FMDay FMday FMMONTH FMMonth FMmonth FMRM')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_3, to_char(d1, 'Y,YYY YYYY YYY YY Y CC Q MM WW DDD DD D J')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_4, to_char(d1, 'FMY,YYY FMYYYY FMYYY FMYY FMY FMCC FMQ FMMM FMWW FMDDD FMDD FMD FMJ')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_5, to_char(d1, 'HH HH12 HH24 MI SS SSSS')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_6, to_char(d1, E'"HH:MI:SS is" HH:MI:SS "\\"text between quote marks\\""')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_7, to_char(d1, 'HH24--text--MI--text--SS')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_8, to_char(d1, 'YYYYTH YYYYth Jth')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_9, to_char(d1, 'YYYY A.D. YYYY a.d. YYYY bc HH:MI:SS P.M. HH:MI:SS p.m. HH:MI:SS pm')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_10, to_char(d1, 'IYYY IYY IY I IW IDDD ID')
+--    FROM TIMESTAMP_TBL;
+
+-- SELECT '' AS to_char_11, to_char(d1, 'FMIYYY FMIYY FMIY FMI FMIW FMIDDD FMID')
+--    FROM TIMESTAMP_TBL;
+
+
+--[SPARK-28432] Missing Date/Time Functions: make_timestamp
+-- timestamp numeric fields constructor
+-- SELECT make_timestamp(2014,12,28,6,30,45.887);
+
+DROP TABLE TIMESTAMP_TBL;
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
new file mode 100644
index 0000000..200fecc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
@@ -0,0 +1,130 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query 0
+CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02')
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05')
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08')
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20')
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL
+-- !query 5 schema
+struct<64:string,d1:timestamp>
+-- !query 5 output
+1997-01-02 00:00:00
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query 6
+SELECT '' AS `48`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 > timestamp '1997-01-02'
+-- !query 6 schema
+struct<48:string,d1:timestamp>
+-- !query 6 output
+1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query 7
+SELECT '' AS `15`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 < timestamp '1997-01-02'
+-- !query 7 schema
+struct<15:string,d1:timestamp>
+-- !query 7 output
+
+
+
+-- !query 8
+SELECT '' AS one, d1 FROM TIMESTAMP_TBL
+   WHERE d1 = timestamp '1997-01-02'
+-- !query 8 schema
+struct<one:string,d1:timestamp>
+-- !query 8 output
+1997-01-02 00:00:00
+
+
+-- !query 9
+SELECT '' AS `63`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 != timestamp '1997-01-02'
+-- !query 9 schema
+struct<63:string,d1:timestamp>
+-- !query 9 output
+1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query 10
+SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 <= timestamp '1997-01-02'
+-- !query 10 schema
+struct<16:string,d1:timestamp>
+-- !query 10 output
+1997-01-02 00:00:00
+
+
+-- !query 11
+SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 >= timestamp '1997-01-02'
+-- !query 11 schema
+struct<49:string,d1:timestamp>
+-- !query 11 output
+1997-01-02 00:00:00
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query 12
+SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc
+-- !query 12 schema
+struct<date_trunc_week:string,week_trunc:timestamp>
+-- !query 12 output
+2004-02-23 00:00:00
+
+
+-- !query 13
+DROP TABLE TIMESTAMP_TBL
+-- !query 13 schema
+struct<>
+-- !query 13 output
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org