You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/03/27 17:22:44 UTC

[impala] 02/02: IMPALA-9555: [Hive3] Fix test failure introduced by HIVE-22589

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2cd7a2b77acfa04094e91efbc6803d11fabcc0e9
Author: Attila Jeges <at...@cloudera.com>
AuthorDate: Thu Mar 26 18:28:13 2020 +0100

    IMPALA-9555: [Hive3] Fix test failure introduced by HIVE-22589
    
    With HIVE-22589 Hive3 switched back to using Julian Calendar for
    historical dates by default which caused an Impala test failure
    around Avro DATE values.
    
    Change-Id: I51dd933867ea7877235e7f6e1f2b56711dca107e
    Reviewed-on: http://gerrit.cloudera.org:8080/15564
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../queries/QueryTest/avro_date.test               | 51 +++++++++++++---------
 tests/query_test/test_date_queries.py              |  3 +-
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test
index 317e58c..248872a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test
@@ -33,32 +33,30 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off
 ---- HIVE_MAJOR_VERSION
 3
 ---- QUERY
+# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for
+# dates before 1582-10-15 by default, therefore we expect the same results as above.
 select id_col, date_part, date_col from date_tbl;
 ---- RESULTS
-0,0001-01-01,0001-01-01
-1,0001-01-01,0001-12-31
-2,0001-01-01,0002-01-01
-3,0001-01-01,1399-12-31
-4,0001-01-01,2017-11-28
-5,0001-01-01,9999-12-31
-6,0001-01-01,NULL
 10,1399-06-27,2017-11-28
 11,1399-06-27,NULL
 12,1399-06-27,2018-12-31
-20,2017-11-27,0001-06-21
-21,2017-11-27,0001-06-22
-22,2017-11-27,0001-06-23
-23,2017-11-27,0001-06-24
-24,2017-11-27,0001-06-25
-25,2017-11-27,0001-06-26
-26,2017-11-27,0001-06-27
-27,2017-11-27,0001-06-28
-28,2017-11-27,0001-06-29
+20,2017-11-27,0001-06-19
+21,2017-11-27,0001-06-20
+22,2017-11-27,0001-06-21
+23,2017-11-27,0001-06-22
+24,2017-11-27,0001-06-23
+25,2017-11-27,0001-06-24
+26,2017-11-27,0001-06-25
+27,2017-11-27,0001-06-26
+28,2017-11-27,0001-06-27
 29,2017-11-27,2017-11-28
 30,9999-12-31,9999-12-01
 31,9999-12-31,9999-12-31
 ---- TYPES
 INT,DATE,DATE
+---- ERRORS
+Problem parsing file __HDFS_FILENAME__ at 309
+Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
 ====
 ---- HIVE_MAJOR_VERSION
 2
@@ -80,14 +78,18 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off
 ---- HIVE_MAJOR_VERSION
 3
 ---- QUERY
+# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for
+# dates before 1582-10-15 by default, therefore we expect the same results as above.
 select date_part, count(date_col) from date_tbl group by date_part;
 ---- RESULTS
 2017-11-27,10
 1399-06-27,2
 9999-12-31,2
-0001-01-01,6
 ---- TYPES
 DATE, BIGINT
+---- ERRORS
+Problem parsing file __HDFS_FILENAME__ at 309
+Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
 ====
 ---- HIVE_MAJOR_VERSION
 2
@@ -107,11 +109,16 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off
 ---- HIVE_MAJOR_VERSION
 3
 ---- QUERY
+# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for
+# dates before 1582-10-15 by default, therefore we expect the same results as above.
 select min(date_part), max(date_part), min(date_col), max(date_col) from date_tbl;
 ---- RESULTS
-0001-01-01,9999-12-31,0001-01-01,9999-12-31
+1399-06-27,9999-12-31,0001-06-19,9999-12-31
 ---- TYPES
 DATE, DATE, DATE, DATE
+---- ERRORS
+Problem parsing file __HDFS_FILENAME__ at 309
+Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
 ====
 ---- HIVE_MAJOR_VERSION
 2
@@ -133,14 +140,18 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off
 ---- HIVE_MAJOR_VERSION
 3
 ---- QUERY
+# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for
+# dates before 1582-10-15 by default, therefore we expect the same results as above.
 select date_part, min(date_col), max(date_col) from date_tbl group by date_part;
 ---- RESULTS
-2017-11-27,0001-06-21,2017-11-28
+2017-11-27,0001-06-19,2017-11-28
 1399-06-27,2017-11-28,2018-12-31
 9999-12-31,9999-12-01,9999-12-31
-0001-01-01,0001-01-01,9999-12-31
 ---- TYPES
 DATE, DATE, DATE
+---- ERRORS
+Problem parsing file __HDFS_FILENAME__ at 309
+Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
 ====
 ---- QUERY
 select date_part, count(*) from date_tbl group by date_part;
diff --git a/tests/query_test/test_date_queries.py b/tests/query_test/test_date_queries.py
index ae4c169..46ca0fa 100644
--- a/tests/query_test/test_date_queries.py
+++ b/tests/query_test/test_date_queries.py
@@ -56,7 +56,8 @@ class TestDateQueries(ImpalaTestSuite):
       #  - Hive2 uses Julian Calendar for writing dates before 1582-10-15, whereas Impala
       #    uses proleptic Gregorian Calendar. This affects the results Impala gets when
       #    querying avro tables written by Hive2.
-      #  - Hive3 on the other hand uses proleptic Gregorian Calendar to write dates.
+      #  - Since HIVE-22589, Hive3 also uses Julian Calendar for dates before 1582-10-15
+      #    by default.
       self.run_test_case('QueryTest/avro_date', vector)
     else:
       self.run_test_case('QueryTest/date', vector)