You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2021/05/14 16:53:15 UTC

[impala] 02/02: IMPALA-5121: Fix AVG() on timestamp col with use_local_tz_for_unix_timestamp_conversions

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4697db021411e95165cfd199e1aa3fc848dbfc8b
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Sat May 8 08:15:30 2021 +0200

    IMPALA-5121: Fix AVG() on timestamp col with use_local_tz_for_unix_timestamp_conversions
    
    AVG used to contain a back and forth timezone conversion if
    use_local_tz_for_unix_timestamp_conversions is true. This could
    affect the results if there were values from different DST rules.
    
    Note that AVG on timestamps has other issues besides this, see
    IMPALA-7472 for details.
    
    Testing:
    - added a regression test
    
    Change-Id: I999099de8e07269b96b75d473f5753be4479cecd
    Reviewed-on: http://gerrit.cloudera.org:8080/17412
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/aggregate-functions-ir.cc                       |  9 +++------
 .../queries/QueryTest/utc-timestamp-functions.test           | 12 ++++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index 9048df8..a5833ae 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -354,8 +354,7 @@ void AggregateFunctions::TimestampAvgUpdate(FunctionContext* ctx,
   AvgState* avg = reinterpret_cast<AvgState*>(dst->ptr);
   const TimestampValue& tm_src = TimestampValue::FromTimestampVal(src);
   double val;
-  const Timezone* tz = ctx->impl()->state()->time_zone_for_unix_time_conversions();
-  if (tm_src.ToSubsecondUnixTime(tz, &val)) {
+  if (tm_src.ToSubsecondUnixTime(UTCPTR, &val)) {
     avg->sum += val;
     ++avg->count;
   }
@@ -369,8 +368,7 @@ void AggregateFunctions::TimestampAvgRemove(FunctionContext* ctx,
   AvgState* avg = reinterpret_cast<AvgState*>(dst->ptr);
   const TimestampValue& tm_src = TimestampValue::FromTimestampVal(src);
   double val;
-  const Timezone* tz = ctx->impl()->state()->time_zone_for_unix_time_conversions();
-  if (tm_src.ToSubsecondUnixTime(tz, &val)) {
+  if (tm_src.ToSubsecondUnixTime(UTCPTR, &val)) {
     avg->sum -= val;
     --avg->count;
     DCHECK_GE(avg->count, 0);
@@ -381,9 +379,8 @@ TimestampVal AggregateFunctions::TimestampAvgGetValue(FunctionContext* ctx,
     const StringVal& src) {
   AvgState* val_struct = reinterpret_cast<AvgState*>(src.ptr);
   if (val_struct->count == 0) return TimestampVal::null();
-  const Timezone* tz = ctx->impl()->state()->time_zone_for_unix_time_conversions();
   const TimestampValue& tv = TimestampValue::FromSubsecondUnixTime(
-      val_struct->sum / val_struct->count, tz);
+      val_struct->sum / val_struct->count, UTCPTR);
   if (tv.HasDate()) {
     TimestampVal result;
     tv.ToTimestampVal(&result);
diff --git a/testdata/workloads/functional-query/queries/QueryTest/utc-timestamp-functions.test b/testdata/workloads/functional-query/queries/QueryTest/utc-timestamp-functions.test
index a74ef68..1e60a56 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/utc-timestamp-functions.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/utc-timestamp-functions.test
@@ -100,3 +100,15 @@ TIMESTAMP
 ---- RESULTS
 1969-12-31 16:00:00
 ====
+---- QUERY
+# Regression test for IMPALA-5121. AVG used to contain a back and forth timezone
+# conversion (if use_local_tz_for_unix_timestamp_conversions is true) that could affect
+# the results if there were values from different DST rules.
+SET timezone=CET;
+SET use_local_tz_for_unix_timestamp_conversions=1;
+select avg(timestamp_col) from functional.alltypestiny;
+---- TYPES
+TIMESTAMP
+---- RESULTS
+2009-02-15 00:00:30
+====
\ No newline at end of file