You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/01 03:05:05 UTC

[impala] 01/02: IMPALA-9584: remove flaky avg(TIMESTAMP) aggregates from test_analytic_fns

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a08cd7f49bb9c69b05fabe9ccd18577cfd300b4e
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Tue Mar 31 19:23:01 2020 +0200

    IMPALA-9584: remove flaky avg(TIMESTAMP) aggregates from test_analytic_fns
    
    AVG(TIMESTAMP) is not deterministic, because it uses a double to sum
    the timestamps, and adding doubles in different order can lead to
    different results. This does not cause problems for DOUBLE columns,
    because the test framework does not require exact match if the result
    is double. As AVG is the only function for TIMESTAMP with this problem,
    reducing the precision of all timestamps checks seemed like an
    overkill.
    
    As a short term solution I removed the problematic aggregates from the
    tests.
    
    Testing:
    - ran only the related tests
    
    Change-Id: I10e0027a64a4e430b7db3ed7c8d0cc8cdcb202e0
    Reviewed-on: http://gerrit.cloudera.org:8080/15621
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../queries/QueryTest/analytic-fns.test            | 99 ++++++++--------------
 1 file changed, 35 insertions(+), 64 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index 72d8288..e558f7f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -1,53 +1,34 @@
 ====
 ---- QUERY
+# The test used to contain avg(timestamp_col) but it had to be removed due to IMPALA-9584.
 select year, month,
 count(int_col) over (partition by year, month),
 avg(int_col) over (partition by year, month),
-avg(timestamp_col) over (partition by year, month),
 min(string_col) over (partition by year, month),
 max(string_col) over (partition by year, month)
 from alltypessmall
 where id % 4 = 0 and month != 1;
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
----- DBAPI_RESULTS: VERIFY_IS_EQUAL_SORTED
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
-2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
----- TYPES
-INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
+2009,2,6,4.666666666666667,'1','9'
+2009,2,6,4.666666666666667,'1','9'
+2009,2,6,4.666666666666667,'1','9'
+2009,2,6,4.666666666666667,'1','9'
+2009,2,6,4.666666666666667,'1','9'
+2009,2,6,4.666666666666667,'1','9'
+2009,3,6,3.666666666666667,'0','8'
+2009,3,6,3.666666666666667,'0','8'
+2009,3,6,3.666666666666667,'0','8'
+2009,3,6,3.666666666666667,'0','8'
+2009,3,6,3.666666666666667,'0','8'
+2009,3,6,3.666666666666667,'0','8'
+2009,4,6,4.333333333333333,'1','9'
+2009,4,6,4.333333333333333,'1','9'
+2009,4,6,4.333333333333333,'1','9'
+2009,4,6,4.333333333333333,'1','9'
+2009,4,6,4.333333333333333,'1','9'
+2009,4,6,4.333333333333333,'1','9'
+---- TYPES
+INT, INT, BIGINT, DOUBLE, STRING, STRING
 ====
 ---- QUERY
 select date_part,
@@ -625,33 +606,24 @@ INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIG
 ====
 ---- QUERY
 # Test sum() and avg() removing values
+# The test used to contain avg(timestamp_col) but it had to be removed due to IMPALA-9584.
 select id,
 sum(int_col) over (order by id rows between 1 preceding and 1 following),
 sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
 avg(int_col) over (order by id rows between 1 preceding and 1 following),
-avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
-avg(timestamp_col) over (order by id rows between 2 following and 3 following)
+avg(double_col) over (order by id rows between 3 preceding and 2 preceding)
 from alltypes where id < 8
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
-0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
-1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
-2,6,0,2,0,2009-01-01 00:04:30.349999904
-3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
-4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
-5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
-6,18,70.69999999999999,6,35.34999999999999,NULL
-7,13,90.89999999999999,6.5,45.45,NULL
----- DBAPI_RESULTS: VERIFY_IS_EQUAL_SORTED
-0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999
-1,3,NULL,1,NULL,2009-01-01 00:03:30.449999
-2,6,0,2,0,2009-01-01 00:04:30.349999
-3,9,10.1,3,5.05,2009-01-01 00:05:30.124999
-4,12,30.3,4,15.15,2009-01-01 00:06:30.180000
-5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000
-6,18,70.69999999999999,6,35.34999999999999,NULL
-7,13,90.89999999999999,6.5,45.45,NULL
+0,1,NULL,0.5,NULL
+1,3,NULL,1,NULL
+2,6,0,2,0
+3,9,10.1,3,5.05
+4,12,30.3,4,15.15
+5,15,50.49999999999999,5,25.25
+6,18,70.69999999999999,6,35.34999999999999
+7,13,90.89999999999999,6.5,45.45
 ---- TYPES
-INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
+INT, BIGINT, DOUBLE, DOUBLE, DOUBLE
 ====
 ---- QUERY
 # More testing of start bounds. This exposed a bug in removing
@@ -1612,6 +1584,9 @@ INT, BIGINT
 # repro was crafted to hit the crash by playing with the window and row sizes
 # so that memory allocated within the analytic node is just large enough (>8mb)
 # to be transfered to the output row batch immediately before eos.
+#
+# Results are not checked because avg(TIMESTAMP) is not deterministic, see IMPALA-9584.
+# The test should be still enough to catch a crash.
 select max(t3.c1), max(t3.c2)
 from (
   select
@@ -1621,10 +1596,6 @@ from (
     over (order by t1.id, t2.id rows between 5000 following and 50000 following) c2
   from alltypesagg t1 join alltypesagg t2 where t1.int_col = t2.int_col
 ) t3
----- RESULTS
-2010-01-10 18:02:05.234931468,2010-01-10 18:02:05.215156078
----- DBAPI_RESULTS
-2010-01-10 18:02:05.234931,2010-01-10 18:02:05.215156
 ---- TYPES
 TIMESTAMP, TIMESTAMP
 ====